From 3b5d512831af3a43303d1c3615a0a7d6c693bce9 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 1 Aug 2018 13:45:11 +0200 Subject: [PATCH 0001/2608] xfrm: Validate address prefix lengths in the xfrm selector. [ Upstream commit 07bf7908950a8b14e81aa1807e3c667eab39287a ] We don't validate the address prefix lengths in the xfrm selector we got from userspace. This can lead to undefined behaviour in the address matching functions if the prefix is too big for the given address family. Fix this by checking the prefixes and refuse SA/policy insertation when a prefix is invalid. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Air Icy Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_user.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5554d28a32eb..4292347bf45e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -151,10 +151,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; switch (p->family) { case AF_INET: + if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) + goto out; + break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) + if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) + goto out; + break; #else err = -EAFNOSUPPORT; @@ -1353,10 +1359,16 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) switch (p->sel.family) { case AF_INET: + if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) + return -EINVAL; + break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) + if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) + return -EINVAL; + break; #else return -EAFNOSUPPORT; From 2a55e64d5c5e98e425f7eafd2c480440b4aa78c2 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 31 Aug 2018 08:38:49 -0300 Subject: [PATCH 0002/2608] xfrm6: call kfree_skb when skb is toobig [ Upstream commit 215ab0f021c9fea3c18b75e7d522400ee6a49990 ] After commit d6990976af7c5d8f55903bfb4289b6fb030bf754 ("vti6: fix PMTU caching and reporting on xmit"), some too big skbs might be potentially passed down to __xfrm6_output, causing it to fail to transmit but not free the skb, causing a leak of skb, and consequentially a leak of dst references. After running pmtu.sh, that shows as failure to unregister devices in a namespace: [ 311.397671] unregister_netdevice: waiting for veth_b to become free. Usage count = 1 The fix is to call kfree_skb in case of transmit failures. Fixes: dd767856a36e ("xfrm6: Don't call icmpv6_send on local error") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Sabrina Dubroca Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/ipv6/xfrm6_output.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 8ae87d4ec5ff..29dae7f2ff14 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -170,9 +170,11 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (toobig && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); + kfree_skb(skb); return -EMSGSIZE; } else if (!skb->ignore_df && toobig && skb->sk) { xfrm_local_error(skb, mtu); + kfree_skb(skb); return -EMSGSIZE; } From a95d9004fbdedebb2e2550352808f0e8b1b0ae79 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Mon, 3 Sep 2018 04:36:52 -0700 Subject: [PATCH 0003/2608] xfrm: reset transport header back to network header after all input transforms ahave been applied [ Upstream commit bfc0698bebcb16d19ecfc89574ad4d696955e5d3 ] A policy may have been set up with multiple transforms (e.g., ESP and ipcomp). In this situation, the ingress IPsec processing iterates in xfrm_input() and applies each transform in turn, processing the nexthdr to find any additional xfrm that may apply. This patch resets the transport header back to network header only after the last transformation so that subsequent xfrms can find the correct transport header. Fixes: 7785bba299a8 ("esp: Add a software GRO codepath") Suggested-by: Steffen Klassert Signed-off-by: Sowmini Varadhan Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/ipv4/xfrm4_input.c | 1 + net/ipv4/xfrm4_mode_transport.c | 4 +--- net/ipv6/xfrm6_input.c | 1 + net/ipv6/xfrm6_mode_transport.c | 4 +--- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index bcfc00e88756..f8de2482a529 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -67,6 +67,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index 3d36644890bb..1ad2c2c4e250 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -46,7 +46,6 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); - struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -54,8 +53,7 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) skb->network_header = skb->transport_header; } ip_hdr(skb)->tot_len = htons(skb->len + ihl); - if (!xo || !(xo->flags & XFRM_GRO)) - skb_reset_transport_header(skb); + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 841f4a07438e..9ef490dddcea 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -59,6 +59,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); + skb_reset_transport_header(skb); return -1; } diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index 9ad07a91708e..3c29da5defe6 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -51,7 +51,6 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); - struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -60,8 +59,7 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) } ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - sizeof(struct ipv6hdr)); - if (!xo || !(xo->flags & XFRM_GRO)) - skb_reset_transport_header(skb); + skb_reset_transport_header(skb); return 0; } From 64f38286c7be284f4eba70bff165269a65461183 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Mon, 3 Sep 2018 04:36:53 -0700 Subject: [PATCH 0004/2608] xfrm: reset crypto_done when iterating over multiple input xfrms [ Upstream commit 782710e333a526780d65918d669cb96646983ba2 ] We only support one offloaded xfrm (we do not have devices that can handle more than one offload), so reset crypto_done in xfrm_input() when iterating over multiple transforms in xfrm_input, so that we can invoke the appropriate x->type->input for the non-offloaded transforms Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Signed-off-by: Sowmini Varadhan Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 9f492dc417d5..8e75319dd9c0 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -453,6 +453,7 @@ resume: XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; } + crypto_done = false; } while (!err); err = xfrm_rcv_cb(skb, family, x->type->proto, 0); From b0be0d83fc6894337b807cbf66ee66e001caf89b Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Wed, 5 Sep 2018 08:06:13 +0300 Subject: [PATCH 0005/2608] mac80211: Always report TX status [ Upstream commit 8682250b3c1b75a45feb7452bc413d004cfe3778 ] If a frame is dropped for any reason, mac80211 wouldn't report the TX status back to user space. As the user space may rely on the TX_STATUS to kick its state machines, resends etc, it's better to just report this frame as not acked instead. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/status.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index da7427a41529..ccac205e5853 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -470,11 +470,6 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, if (!skb) return; - if (dropped) { - dev_kfree_skb_any(skb); - return; - } - if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie; struct ieee80211_sub_if_data *sdata; @@ -495,6 +490,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, } rcu_read_unlock(); + dev_kfree_skb_any(skb); + } else if (dropped) { dev_kfree_skb_any(skb); } else { /* consumes skb */ From d46c334f07f5f67cf0fc56a1e8dace1a97f8b908 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Wed, 5 Sep 2018 08:06:12 +0300 Subject: [PATCH 0006/2608] cfg80211: reg: Init wiphy_idx in regulatory_hint_core() [ Upstream commit 24f33e64fcd0d50a4b1a8e5b41bd0257aa66b0e8 ] Core regulatory hints didn't set wiphy_idx to WIPHY_IDX_INVALID. Since the regulatory request is zeroed, wiphy_idx was always implicitly set to 0. This resulted in updating only phy #0. Fix that. Fixes: 806a9e39670b ("cfg80211: make regulatory_request use wiphy_idx instead of wiphy") Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho [add fixes tag] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/reg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 6e94f6934a0e..6f032c7b8732 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2384,6 +2384,7 @@ static int regulatory_hint_core(const char *alpha2) request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_CORE; + request->wiphy_idx = WIPHY_IDX_INVALID; queue_regulatory_request(request); From 574be53ab82c3525236c8a3dd70f3dcaebd9e9aa Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Wed, 5 Sep 2018 06:22:59 -0400 Subject: [PATCH 0007/2608] mac80211: fix pending queue hang due to TX_DROP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6eae4a6c2be387fec41b0d2782c4fffb57159498 ] In our environment running lots of mesh nodes, we are seeing the pending queue hang periodically, with the debugfs queues file showing lines such as: 00: 0x00000000/348 i.e. there are a large number of frames but no stop reason set. One way this could happen is if queue processing from the pending tasklet exited early without processing all frames, and without having some future event (incoming frame, stop reason flag, ...) to reschedule it. Exactly this can occur today if ieee80211_tx() returns false due to packet drops or power-save buffering in the tx handlers. In the past, this function would return true in such cases, and the change to false doesn't seem to be intentional. Fix this case by reverting to the previous behavior. Fixes: bb42f2d13ffc ("mac80211: Move reorder-sensitive TX handlers to after TXQ dequeue") Signed-off-by: Bob Copeland Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d8fddd88bf46..a17a56032a21 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1837,7 +1837,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; if (invoke_tx_handlers_early(&tx)) - return false; + return true; if (ieee80211_queue_skb(local, sdata, tx.sta, tx.skb)) return true; From 9da59d2e283d424cd9c947b849fc819536bd0882 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 5 Sep 2018 18:52:22 +0300 Subject: [PATCH 0008/2608] cfg80211: Address some corner cases in scan result channel updating [ Upstream commit 119f94a6fefcc76d47075b83d2b73d04c895df78 ] cfg80211_get_bss_channel() is used to update the RX channel based on the available frame payload information (channel number from DSSS Parameter Set element or HT Operation element). This is needed on 2.4 GHz channels where frames may be received on neighboring channels due to overlapping frequency range. This might of some use on the 5 GHz band in some corner cases, but things are more complex there since there is no n:1 or 1:n mapping between channel numbers and frequencies due to multiple different starting frequencies in different operating classes. This could result in ieee80211_channel_to_frequency() returning incorrect frequency and ieee80211_get_channel() returning incorrect channel information (or indication of no match). In the previous implementation, this could result in some scan results being dropped completely, e.g., for the 4.9 GHz channels. That prevented connection to such BSSs. Fix this by using the driver-provided channel pointer if ieee80211_get_channel() does not find matching channel data for the channel number in the frame payload and if the scan is done with 5 MHz or 10 MHz channel bandwidth. While doing this, also add comments describing what the function is trying to achieve to make it easier to understand what happens here and why. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/scan.c | 58 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index f6c5fe482506..5ed0ed0559dc 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1055,13 +1055,23 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, return NULL; } +/* + * Update RX channel information based on the available frame payload + * information. This is mainly for the 2.4 GHz band where frames can be received + * from neighboring channels and the Beacon frames use the DSSS Parameter Set + * element to indicate the current (transmitting) channel, but this might also + * be needed on other bands if RX frequency does not match with the actual + * operating channel of a BSS. + */ static struct ieee80211_channel * cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, - struct ieee80211_channel *channel) + struct ieee80211_channel *channel, + enum nl80211_bss_scan_width scan_width) { const u8 *tmp; u32 freq; int channel_number = -1; + struct ieee80211_channel *alt_channel; tmp = cfg80211_find_ie(WLAN_EID_DS_PARAMS, ie, ielen); if (tmp && tmp[1] == 1) { @@ -1075,16 +1085,45 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, } } - if (channel_number < 0) + if (channel_number < 0) { + /* No channel information in frame payload */ return channel; + } freq = ieee80211_channel_to_frequency(channel_number, channel->band); - channel = ieee80211_get_channel(wiphy, freq); - if (!channel) + alt_channel = ieee80211_get_channel(wiphy, freq); + if (!alt_channel) { + if (channel->band == NL80211_BAND_2GHZ) { + /* + * Better not allow unexpected channels when that could + * be going beyond the 1-11 range (e.g., discovering + * BSS on channel 12 when radio is configured for + * channel 11. + */ + return NULL; + } + + /* No match for the payload channel number - ignore it */ + return channel; + } + + if (scan_width == NL80211_BSS_CHAN_WIDTH_10 || + scan_width == NL80211_BSS_CHAN_WIDTH_5) { + /* + * Ignore channel number in 5 and 10 MHz channels where there + * may not be an n:1 or 1:n mapping between frequencies and + * channel numbers. + */ + return channel; + } + + /* + * Use the channel determined through the payload channel number + * instead of the RX channel reported by the driver. + */ + if (alt_channel->flags & IEEE80211_CHAN_DISABLED) return NULL; - if (channel->flags & IEEE80211_CHAN_DISABLED) - return NULL; - return channel; + return alt_channel; } /* Returned bss is reference counted and must be cleaned up appropriately. */ @@ -1109,7 +1148,8 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, (data->signal < 0 || data->signal > 100))) return NULL; - channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan); + channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan, + data->scan_width); if (!channel) return NULL; @@ -1207,7 +1247,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, return NULL; channel = cfg80211_get_bss_channel(wiphy, mgmt->u.beacon.variable, - ielen, data->chan); + ielen, data->chan, data->scan_width); if (!channel) return NULL; From ebec37ed21a220b824f9ac8f467c81d0955b01ce Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Sep 2018 13:34:02 +0200 Subject: [PATCH 0009/2608] mac80211: TDLS: fix skb queue/priority assignment [ Upstream commit cb59bc14e830028d2244861216df038165d7625d ] If the TDLS setup happens over a connection to an AP that doesn't have QoS, we nevertheless assign a non-zero TID (skb->priority) and queue mapping, which may confuse us or drivers later. Fix it by just assigning the special skb->priority and then using ieee80211_select_queue() just like other data frames would go through. Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/tdls.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 91093d4a2f84..6e7aa65cf345 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -16,6 +16,7 @@ #include "ieee80211_i.h" #include "driver-ops.h" #include "rate.h" +#include "wme.h" /* give usermode some time for retries in setting up the TDLS session */ #define TDLS_PEER_SETUP_TIMEOUT (15 * HZ) @@ -1006,14 +1007,13 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, switch (action_code) { case WLAN_TDLS_SETUP_REQUEST: case WLAN_TDLS_SETUP_RESPONSE: - skb_set_queue_mapping(skb, IEEE80211_AC_BK); - skb->priority = 2; + skb->priority = 256 + 2; break; default: - skb_set_queue_mapping(skb, IEEE80211_AC_VI); - skb->priority = 5; + skb->priority = 256 + 5; break; } + skb_set_queue_mapping(skb, ieee80211_select_queue(sdata, skb)); /* * Set the WLAN_TDLS_TEARDOWN flag to indicate a teardown in progress. From 35507aabf0e5e50525de25603328f09a7a629da5 Mon Sep 17 00:00:00 2001 From: Yuan-Chi Pang Date: Thu, 6 Sep 2018 16:57:48 +0800 Subject: [PATCH 0010/2608] mac80211: fix TX status reporting for ieee80211s [ Upstream commit c42055105785580563535e6d3143cad95c7ac7ee ] TX status reporting to ieee80211s is through ieee80211s_update_metric. There are two problems about ieee80211s_update_metric: 1. The purpose is to estimate the fail probability to a specific link. No need to restrict to data frame. 2. Current implementation does not work if wireless driver does not pass tx_status with skb. Fix this by removing ieee80211_is_data condition, passing ieee80211_tx_status directly to ieee80211s_update_metric, and putting it in both __ieee80211_tx_status and ieee80211_tx_status_ext. Signed-off-by: Yuan-Chi Pang Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mesh.h | 3 ++- net/mac80211/mesh_hwmp.c | 9 +++------ net/mac80211/status.c | 4 +++- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 7e5f271e3c30..4f1c61637ce3 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -217,7 +217,8 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata); int mesh_rmc_init(struct ieee80211_sub_if_data *sdata); void ieee80211s_init(void); void ieee80211s_update_metric(struct ieee80211_local *local, - struct sta_info *sta, struct sk_buff *skb); + struct sta_info *sta, + struct ieee80211_tx_status *st); void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata); void ieee80211_mesh_teardown_sdata(struct ieee80211_sub_if_data *sdata); int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 055ea36ff27b..fab0764c315f 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -295,15 +295,12 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, } void ieee80211s_update_metric(struct ieee80211_local *local, - struct sta_info *sta, struct sk_buff *skb) + struct sta_info *sta, + struct ieee80211_tx_status *st) { - struct ieee80211_tx_info *txinfo = IEEE80211_SKB_CB(skb); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *txinfo = st->info; int failed; - if (!ieee80211_is_data(hdr->frame_control)) - return; - failed = !(txinfo->flags & IEEE80211_TX_STAT_ACK); /* moving average, scaled to 100. diff --git a/net/mac80211/status.c b/net/mac80211/status.c index ccac205e5853..bdf131ed5ce8 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -797,7 +797,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, rate_control_tx_status(local, sband, status); if (ieee80211_vif_is_mesh(&sta->sdata->vif)) - ieee80211s_update_metric(local, sta, skb); + ieee80211s_update_metric(local, sta, status); if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && acked) ieee80211_frame_acked(sta, skb); @@ -958,6 +958,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, } rate_control_tx_status(local, sband, status); + if (ieee80211_vif_is_mesh(&sta->sdata->vif)) + ieee80211s_update_metric(local, sta, status); } if (acked || noack_success) { From 73c6f86060703daf8b7aa72c51451a6339164915 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 11 Sep 2018 10:31:15 +0200 Subject: [PATCH 0011/2608] xfrm: Fix NULL pointer dereference when skb_dst_force clears the dst_entry. [ Upstream commit 9e1437937807b0122e8da1ca8765be2adca9aee6 ] Since commit 222d7dbd258d ("net: prevent dst uses after free") skb_dst_force() might clear the dst_entry attached to the skb. The xfrm code don't expect this to happen, so we crash with a NULL pointer dereference in this case. Fix it by checking skb_dst(skb) for NULL after skb_dst_force() and drop the packet in cast the dst_entry was cleared. Fixes: 222d7dbd258d ("net: prevent dst uses after free") Reported-by: Tobias Hommel Reported-by: Kristian Evensen Reported-by: Wolfgang Walter Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_output.c | 4 ++++ net/xfrm/xfrm_policy.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 35610cc881a9..c47660fba498 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -101,6 +101,10 @@ static int xfrm_output_one(struct sk_buff *skb, int err) spin_unlock_bh(&x->lock); skb_dst_force(skb); + if (!skb_dst(skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); + goto error_nolock; + } if (xfrm_offload(skb)) { x->type_offload->encap(x, skb); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2fb7a78308e1..37c32e73aaef 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2550,6 +2550,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) } skb_dst_force(skb); + if (!skb_dst(skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); + return 0; + } dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) { From 5d7bf7b4d022ba0f5a01f2fc2be352fad925a627 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 13 Sep 2018 16:48:08 +0100 Subject: [PATCH 0012/2608] ARM: 8799/1: mm: fix pci_ioremap_io() offset check [ Upstream commit 3a58ac65e2d7969bcdf1b6acb70fa4d12a88e53e ] IO_SPACE_LIMIT is the ending address of the PCI IO space, i.e something like 0xfffff (and not 0x100000). Therefore, when offset = 0xf0000 is passed as argument, this function fails even though the offset + SZ_64K fits below the IO_SPACE_LIMIT. This makes the last chunk of 64 KB of the I/O space not usable as it cannot be mapped. This patch fixes that by substracing 1 to offset + SZ_64K, so that we compare the addrss of the last byte of the I/O space against IO_SPACE_LIMIT instead of the address of the first byte of what is after the I/O space. Fixes: c2794437091a4 ("ARM: Add fixed PCI i/o mapping") Signed-off-by: Thomas Petazzoni Acked-by: Nicolas Pitre Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/mm/ioremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index fc91205ff46c..5bf9443cfbaa 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -473,7 +473,7 @@ void pci_ioremap_set_mem_type(int mem_type) int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr) { - BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT); + BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT); return ioremap_page_range(PCI_IO_VIRT_BASE + offset, PCI_IO_VIRT_BASE + offset + SZ_64K, From 26c6b9da7a001cf37e1b2029a33955cdc890d676 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Wed, 19 Sep 2018 13:54:56 -0600 Subject: [PATCH 0013/2608] xfrm: validate template mode [ Upstream commit 32bf94fb5c2ec4ec842152d0e5937cd4bb6738fa ] XFRM mode parameters passed as part of the user templates in the IP_XFRM_POLICY are never properly validated. Passing values other than valid XFRM modes can cause stack-out-of-bounds reads to occur later in the XFRM processing: [ 140.535608] ================================================================ [ 140.543058] BUG: KASAN: stack-out-of-bounds in xfrm_state_find+0x17e4/0x1cc4 [ 140.550306] Read of size 4 at addr ffffffc0238a7a58 by task repro/5148 [ 140.557369] [ 140.558927] Call trace: [ 140.558936] dump_backtrace+0x0/0x388 [ 140.558940] show_stack+0x24/0x30 [ 140.558946] __dump_stack+0x24/0x2c [ 140.558949] dump_stack+0x8c/0xd0 [ 140.558956] print_address_description+0x74/0x234 [ 140.558960] kasan_report+0x240/0x264 [ 140.558963] __asan_report_load4_noabort+0x2c/0x38 [ 140.558967] xfrm_state_find+0x17e4/0x1cc4 [ 140.558971] xfrm_resolve_and_create_bundle+0x40c/0x1fb8 [ 140.558975] xfrm_lookup+0x238/0x1444 [ 140.558977] xfrm_lookup_route+0x48/0x11c [ 140.558984] ip_route_output_flow+0x88/0xc4 [ 140.558991] raw_sendmsg+0xa74/0x266c [ 140.558996] inet_sendmsg+0x258/0x3b0 [ 140.559002] sock_sendmsg+0xbc/0xec [ 140.559005] SyS_sendto+0x3a8/0x5a8 [ 140.559008] el0_svc_naked+0x34/0x38 [ 140.559009] [ 140.592245] page dumped because: kasan: bad access detected [ 140.597981] page_owner info is not active (free page?) [ 140.603267] [ 140.653503] ================================================================ Signed-off-by: Sean Tranchetti Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4292347bf45e..4e8319766f2b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1449,6 +1449,9 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) (ut[i].family != prev_family)) return -EINVAL; + if (ut[i].mode >= XFRM_MODE_MAX) + return -EINVAL; + prev_family = ut[i].family; switch (ut[i].family) { From 1157713407a691eb48b2d6abfda1bc87b0c3848d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 17 Sep 2018 08:20:36 -0700 Subject: [PATCH 0014/2608] netfilter: bridge: Don't sabotage nf_hook calls from an l3mdev [ Upstream commit a173f066c7cfc031acb8f541708041e009fc9812 ] For starters, the bridge netfilter code registers operations that are invoked any time nh_hook is called. Specifically, ip_sabotage_in watches for nested calls for NF_INET_PRE_ROUTING when a bridge is in the stack. Packet wise, the bridge netfilter hook runs first. br_nf_pre_routing allocates nf_bridge, sets in_prerouting to 1 and calls NF_HOOK for NF_INET_PRE_ROUTING. It's finish function, br_nf_pre_routing_finish, then resets in_prerouting flag to 0 and the packet continues up the stack. The packet eventually makes it to the VRF driver and it invokes nf_hook for NF_INET_PRE_ROUTING in case any rules have been added against the vrf device. Because of the registered operations the call to nf_hook causes ip_sabotage_in to be invoked. That function sees the nf_bridge on the skb and that in_prerouting is not set. Thinking it is an invalid nested call it steals (drops) the packet. Update ip_sabotage_in to recognize that the bridge or one of its upper devices (e.g., vlan) can be enslaved to a VRF (L3 master device) and allow the packet to go through the nf_hook a second time. Fixes: 73e20b761acf ("net: vrf: Add support for PREROUTING rules on vrf device") Reported-by: D'Souza, Nelson Signed-off-by: David Ahern Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/bridge/br_netfilter_hooks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index c2eea1b8737a..7582f28ab306 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -832,7 +832,8 @@ static unsigned int ip_sabotage_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) { + if (skb->nf_bridge && !skb->nf_bridge->in_prerouting && + !netif_is_l3_master(skb->dev)) { state->okfn(state->net, state->sk, skb); return NF_STOLEN; } From 1241679ce248b13db2193ef04b942098fa85e3ec Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Fri, 21 Sep 2018 16:34:04 +0100 Subject: [PATCH 0015/2608] arm64: hugetlb: Fix handling of young ptes [ Upstream commit 469ed9d823b7d240d6b9574f061ded7c3834c167 ] In the contiguous bit hugetlb break-before-make code we assume that all hugetlb pages are young. In fact, remove_migration_pte is able to place an old hugetlb pte so this assumption is not valid. This patch fixes the contiguous hugetlb scanning code to preserve young ptes. Fixes: d8bdcff28764 ("arm64: hugetlb: Add break-before-make logic for contiguous entries") Signed-off-by: Steve Capper Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/mm/hugetlbpage.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 6cb0fa92a651..9f6ae9686dac 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -118,11 +118,14 @@ static pte_t get_clear_flush(struct mm_struct *mm, /* * If HW_AFDBM is enabled, then the HW could turn on - * the dirty bit for any page in the set, so check - * them all. All hugetlb entries are already young. + * the dirty or accessed bit for any page in the set, + * so check them all. */ if (pte_dirty(pte)) orig_pte = pte_mkdirty(orig_pte); + + if (pte_young(pte)) + orig_pte = pte_mkyoung(orig_pte); } if (valid) @@ -347,10 +350,13 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, if (!pte_same(orig_pte, pte)) changed = 1; - /* Make sure we don't lose the dirty state */ + /* Make sure we don't lose the dirty or young state */ if (pte_dirty(orig_pte)) pte = pte_mkdirty(pte); + if (pte_young(orig_pte)) + pte = pte_mkyoung(pte); + hugeprot = pte_pgprot(pte); for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); From a5bdfc6892b5f6c4d131c246771dc67a55460691 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 19 Sep 2018 17:14:01 -0700 Subject: [PATCH 0016/2608] ARM: dts: BCM63xx: Fix incorrect interrupt specifiers [ Upstream commit 3ab97942d0213b6583a5408630a8cbbfbf54730f ] A number of our interrupts were incorrectly specified, fix both the PPI and SPI interrupts to be correct. Fixes: b5762cacc411 ("ARM: bcm63138: add NAND DT support") Fixes: 46d4bca0445a ("ARM: BCM63XX: add BCM63138 minimal Device Tree") Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm/boot/dts/bcm63138.dtsi | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/bcm63138.dtsi b/arch/arm/boot/dts/bcm63138.dtsi index 43ee992ccdcf..6df61518776f 100644 --- a/arch/arm/boot/dts/bcm63138.dtsi +++ b/arch/arm/boot/dts/bcm63138.dtsi @@ -106,21 +106,23 @@ global_timer: timer@1e200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x1e200 0x20>; - interrupts = ; + interrupts = ; clocks = <&axi_clk>; }; local_timer: local-timer@1e600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0x1e600 0x20>; - interrupts = ; + interrupts = ; clocks = <&axi_clk>; }; twd_watchdog: watchdog@1e620 { compatible = "arm,cortex-a9-twd-wdt"; reg = <0x1e620 0x20>; - interrupts = ; + interrupts = ; }; armpll: armpll { @@ -158,7 +160,7 @@ serial0: serial@600 { compatible = "brcm,bcm6345-uart"; reg = <0x600 0x1b>; - interrupts = ; + interrupts = ; clocks = <&periph_clk>; clock-names = "periph"; status = "disabled"; @@ -167,7 +169,7 @@ serial1: serial@620 { compatible = "brcm,bcm6345-uart"; reg = <0x620 0x1b>; - interrupts = ; + interrupts = ; clocks = <&periph_clk>; clock-names = "periph"; status = "disabled"; @@ -180,7 +182,7 @@ reg = <0x2000 0x600>, <0xf0 0x10>; reg-names = "nand", "nand-int-base"; status = "disabled"; - interrupts = ; + interrupts = ; interrupt-names = "nand"; }; From d9e742766c763db1e8ead9a9fbd7640232735ede Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 25 Sep 2018 08:32:50 +0200 Subject: [PATCH 0017/2608] net: macb: Clean 64b dma addresses if they are not detected [ Upstream commit e1e5d8a9fe737d94ccc0ccbaf0c97f69a8f3e000 ] Clear ADDR64 dma bit in DMACFG register in case that HW_DMA_CAP_64B is not detected on 64bit system. The issue was observed when bootloader(u-boot) does not check macb feature at DCFG6 register (DAW64_OFFSET) and enabling 64bit dma support by default. Then macb driver is reading DMACFG register back and only adding 64bit dma configuration but not cleaning it out. Signed-off-by: Michal Simek Acked-by: Nicolas Ferre Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/cadence/macb_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index b4f92de1efbd..d6f8d6c8b0f1 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2000,6 +2000,7 @@ static void macb_configure_dma(struct macb *bp) else dmacfg &= ~GEM_BIT(TXCOEN); + dmacfg &= ~GEM_BIT(ADDR64); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT if (bp->hw_dma_cap & HW_DMA_CAP_64B) dmacfg |= GEM_BIT(ADDR64); From 795b13034224f1b82fc22d749f9226a8c97977d4 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 23 Aug 2018 23:36:00 +0200 Subject: [PATCH 0018/2608] soc: fsl: qbman: qman: avoid allocating from non existing gen_pool [ Upstream commit 64e9e22e68512da8df3c9a7430f07621e48db3c2 ] If the qman driver didn't probe, calling qman_alloc_fqid_range, qman_alloc_pool_range or qman_alloc_cgrid_range (as done in dpaa_eth) will pass a NULL pointer to gen_pool_alloc, leading to a NULL pointer dereference. Signed-off-by: Alexandre Belloni Reviewed-by: Roy Pledge Signed-off-by: Li Yang (cherry picked from commit f72487a2788aa70c3aee1d0ebd5470de9bac953a) Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin --- drivers/soc/fsl/qbman/qman.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 0c6065dba48a..4f27e95efcdd 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -2699,6 +2699,9 @@ static int qman_alloc_range(struct gen_pool *p, u32 *result, u32 cnt) { unsigned long addr; + if (!p) + return -ENODEV; + addr = gen_pool_alloc(p, cnt); if (!addr) return -ENOMEM; From fa1578ec411d8cf9724166738fd41fc5b686e28e Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Thu, 1 Feb 2018 14:54:32 +0800 Subject: [PATCH 0019/2608] soc: fsl: qe: Fix copy/paste bug in ucc_get_tdm_sync_shift() [ Upstream commit 96fc74333f84cfdf8d434c6c07254e215e2aad00 ] There is a copy and paste bug so we accidentally use the RX_ shift when we're in TX_ mode. Fixes: bb8b2062aff3 ("fsl/qe: setup clock source for TDM mode") Signed-off-by: Dan Carpenter Signed-off-by: Zhao Qiang Signed-off-by: Li Yang (cherry picked from commit 3cb31b634052ed458922e0c8e2b4b093d7fb60b9) Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin --- drivers/soc/fsl/qe/ucc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/fsl/qe/ucc.c b/drivers/soc/fsl/qe/ucc.c index c646d8713861..681f7d4b7724 100644 --- a/drivers/soc/fsl/qe/ucc.c +++ b/drivers/soc/fsl/qe/ucc.c @@ -626,7 +626,7 @@ static u32 ucc_get_tdm_sync_shift(enum comm_dir mode, u32 tdm_num) { u32 shift; - shift = (mode == COMM_DIR_RX) ? RX_SYNC_SHIFT_BASE : RX_SYNC_SHIFT_BASE; + shift = (mode == COMM_DIR_RX) ? RX_SYNC_SHIFT_BASE : TX_SYNC_SHIFT_BASE; shift -= tdm_num * 2; return shift; From c7b66583c9edb63ab7613a6ff545367175f95125 Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Tue, 25 Sep 2018 11:15:00 +0900 Subject: [PATCH 0020/2608] nl80211: Fix possible Spectre-v1 for NL80211_TXRATE_HT [ Upstream commit 30fe6d50eb088783c8729c7d930f65296b2b3fa7 ] Use array_index_nospec() to sanitize ridx with respect to speculation. Signed-off-by: Masashi Honma Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3de415bca391..5e7c9b361e8a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3480,6 +3480,7 @@ static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, return false; /* check availability */ + ridx = array_index_nospec(ridx, IEEE80211_HT_MCS_MASK_LEN); if (sband->ht_cap.mcs.rx_mask[ridx] & rbit) mcs[ridx] |= rbit; else From db2a11e706bf59d60dbd250f19a383d102a92e69 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 25 Sep 2018 09:51:02 +0200 Subject: [PATCH 0021/2608] mac80211_hwsim: do not omit multicast announce of first added radio [ Upstream commit 28ef8b49a338dc1844e86b7954cfffc7dfa2660a ] The allocation of hwsim radio identifiers uses a post-increment from 0, so the first radio has idx 0. This idx is explicitly excluded from multicast announcements ever since, but it is unclear why. Drop that idx check and announce the first radio as well. This makes userspace happy if it relies on these events. Signed-off-by: Martin Willi Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/mac80211_hwsim.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index aafa7aa18fbd..477f9f2f6626 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2730,8 +2730,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, list_add_tail(&data->list, &hwsim_radios); spin_unlock_bh(&hwsim_radio_lock); - if (idx > 0) - hwsim_mcast_new_radio(idx, info, param); + hwsim_mcast_new_radio(idx, info, param); return idx; From 0f8a689c6a2f93cbbc16b0ff2003b3240241bdc5 Mon Sep 17 00:00:00 2001 From: Matias Karhumaa Date: Wed, 26 Sep 2018 09:13:46 +0300 Subject: [PATCH 0022/2608] Bluetooth: SMP: fix crash in unpairing [ Upstream commit cb28c306b93b71f2741ce1a5a66289db26715f4d ] In case unpair_device() was called through mgmt interface at the same time when pairing was in progress, Bluetooth kernel module crash was seen. [ 600.351225] general protection fault: 0000 [#1] SMP PTI [ 600.351235] CPU: 1 PID: 11096 Comm: btmgmt Tainted: G OE 4.19.0-rc1+ #1 [ 600.351238] Hardware name: Dell Inc. Latitude E5440/08RCYC, BIOS A18 05/14/2017 [ 600.351272] RIP: 0010:smp_chan_destroy.isra.10+0xce/0x2c0 [bluetooth] [ 600.351276] Code: c0 0f 84 b4 01 00 00 80 78 28 04 0f 84 53 01 00 00 4d 85 ed 0f 85 ab 00 00 00 48 8b 08 48 8b 50 08 be 10 00 00 00 48 89 51 08 <48> 89 0a 48 b9 00 02 00 00 00 00 ad de 48 89 48 08 48 8b 83 00 01 [ 600.351279] RSP: 0018:ffffa9be839b3b50 EFLAGS: 00010246 [ 600.351282] RAX: ffff9c999ac565a0 RBX: ffff9c9996e98c00 RCX: ffff9c999aa28b60 [ 600.351285] RDX: dead000000000200 RSI: 0000000000000010 RDI: ffff9c999e403500 [ 600.351287] RBP: ffffa9be839b3b70 R08: 0000000000000000 R09: ffffffff92a25c00 [ 600.351290] R10: ffffa9be839b3ae8 R11: 0000000000000001 R12: ffff9c995375b800 [ 600.351292] R13: 0000000000000000 R14: ffff9c99619a5000 R15: ffff9c9962a01c00 [ 600.351295] FS: 00007fb2be27c700(0000) GS:ffff9c999e880000(0000) knlGS:0000000000000000 [ 600.351298] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 600.351300] CR2: 00007fb2bdadbad0 CR3: 000000041c328001 CR4: 00000000001606e0 [ 600.351302] Call Trace: [ 600.351325] smp_failure+0x4f/0x70 [bluetooth] [ 600.351345] smp_cancel_pairing+0x74/0x80 [bluetooth] [ 600.351370] unpair_device+0x1c1/0x330 [bluetooth] [ 600.351399] hci_sock_sendmsg+0x960/0x9f0 [bluetooth] [ 600.351409] ? apparmor_socket_sendmsg+0x1e/0x20 [ 600.351417] sock_sendmsg+0x3e/0x50 [ 600.351422] sock_write_iter+0x85/0xf0 [ 600.351429] do_iter_readv_writev+0x12b/0x1b0 [ 600.351434] do_iter_write+0x87/0x1a0 [ 600.351439] vfs_writev+0x98/0x110 [ 600.351443] ? ep_poll+0x16d/0x3d0 [ 600.351447] ? ep_modify+0x73/0x170 [ 600.351451] do_writev+0x61/0xf0 [ 600.351455] ? do_writev+0x61/0xf0 [ 600.351460] __x64_sys_writev+0x1c/0x20 [ 600.351465] do_syscall_64+0x5a/0x110 [ 600.351471] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 600.351474] RIP: 0033:0x7fb2bdb62fe0 [ 600.351477] Code: 73 01 c3 48 8b 0d b8 6e 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 69 c7 2c 00 00 75 10 b8 14 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 de 80 01 00 48 89 04 24 [ 600.351479] RSP: 002b:00007ffe062cb8f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 [ 600.351484] RAX: ffffffffffffffda RBX: 000000000255b3d0 RCX: 00007fb2bdb62fe0 [ 600.351487] RDX: 0000000000000001 RSI: 00007ffe062cb920 RDI: 0000000000000004 [ 600.351490] RBP: 00007ffe062cb920 R08: 000000000255bd80 R09: 0000000000000000 [ 600.351494] R10: 0000000000000353 R11: 0000000000000246 R12: 0000000000000001 [ 600.351497] R13: 00007ffe062cbbe0 R14: 0000000000000000 R15: 0000000000000000 [ 600.351501] Modules linked in: algif_hash algif_skcipher af_alg cmac ipt_MASQUERADE nf_conntrack_netlink nfnetlink xfrm_user xfrm_algo iptable_nat nf_nat_ipv4 xt_addrtype iptable_filter ip_tables xt_conntrack x_tables nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c br_netfilter bridge stp llc overlay arc4 nls_iso8859_1 dm_crypt intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp dell_laptop kvm_intel crct10dif_pclmul dell_smm_hwmon crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper intel_cstate intel_rapl_perf uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_common videodev media hid_multitouch input_leds joydev serio_raw dell_wmi snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic dell_smbios dcdbas sparse_keymap [ 600.351569] snd_hda_intel btusb snd_hda_codec btrtl btbcm btintel snd_hda_core bluetooth(OE) snd_hwdep snd_pcm iwlmvm ecdh_generic wmi_bmof dell_wmi_descriptor snd_seq_midi mac80211 snd_seq_midi_event lpc_ich iwlwifi snd_rawmidi snd_seq snd_seq_device snd_timer cfg80211 snd soundcore mei_me mei dell_rbtn dell_smo8800 mac_hid parport_pc ppdev lp parport autofs4 hid_generic usbhid hid i915 nouveau kvmgt vfio_mdev mdev vfio_iommu_type1 vfio kvm irqbypass i2c_algo_bit ttm drm_kms_helper syscopyarea sysfillrect sysimgblt mxm_wmi psmouse ahci sdhci_pci cqhci libahci fb_sys_fops sdhci drm e1000e video wmi [ 600.351637] ---[ end trace e49e9f1df09c94fb ]--- [ 600.351664] RIP: 0010:smp_chan_destroy.isra.10+0xce/0x2c0 [bluetooth] [ 600.351666] Code: c0 0f 84 b4 01 00 00 80 78 28 04 0f 84 53 01 00 00 4d 85 ed 0f 85 ab 00 00 00 48 8b 08 48 8b 50 08 be 10 00 00 00 48 89 51 08 <48> 89 0a 48 b9 00 02 00 00 00 00 ad de 48 89 48 08 48 8b 83 00 01 [ 600.351669] RSP: 0018:ffffa9be839b3b50 EFLAGS: 00010246 [ 600.351672] RAX: ffff9c999ac565a0 RBX: ffff9c9996e98c00 RCX: ffff9c999aa28b60 [ 600.351674] RDX: dead000000000200 RSI: 0000000000000010 RDI: ffff9c999e403500 [ 600.351676] RBP: ffffa9be839b3b70 R08: 0000000000000000 R09: ffffffff92a25c00 [ 600.351679] R10: ffffa9be839b3ae8 R11: 0000000000000001 R12: ffff9c995375b800 [ 600.351681] R13: 0000000000000000 R14: ffff9c99619a5000 R15: ffff9c9962a01c00 [ 600.351684] FS: 00007fb2be27c700(0000) GS:ffff9c999e880000(0000) knlGS:0000000000000000 [ 600.351686] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 600.351689] CR2: 00007fb2bdadbad0 CR3: 000000041c328001 CR4: 00000000001606e0 Crash happened because list_del_rcu() was called twice for smp->ltk. This was possible if unpair_device was called right after ltk was generated but before keys were distributed. In this commit smp_cancel_pairing was refactored to cancel pairing if it is in progress and otherwise just removes keys. Once keys are removed from rcu list, pointers to smp context's keys are set to NULL to make sure removed list items are not accessed later. This commit also adjusts the functionality of mgmt unpair_device() little bit. Previously pairing was canceled only if pairing was in state that keys were already generated. With this commit unpair_device() cancels pairing already in earlier states. Bug was found by fuzzing kernel SMP implementation using Synopsys Defensics. Reported-by: Pekka Oikarainen Signed-off-by: Matias Karhumaa Signed-off-by: Johan Hedberg Signed-off-by: Sasha Levin --- net/bluetooth/mgmt.c | 7 ++----- net/bluetooth/smp.c | 29 +++++++++++++++++++++++++---- net/bluetooth/smp.h | 3 ++- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 1fba2a03f8ae..ba24f613c0fc 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2298,9 +2298,8 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, /* LE address type */ addr_type = le_addr_type(cp->addr.type); - hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type); - - err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type); + /* Abort any ongoing SMP pairing. Removes ltk and irk if they exist. */ + err = smp_cancel_and_remove_pairing(hdev, &cp->addr.bdaddr, addr_type); if (err < 0) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, MGMT_STATUS_NOT_PAIRED, &rp, @@ -2314,8 +2313,6 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, goto done; } - /* Abort any ongoing SMP pairing */ - smp_cancel_pairing(conn); /* Defer clearing up the connection parameters until closing to * give a chance of keeping them if a repairing happens. diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index a27704ff13a9..dbcc439fc78b 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2410,30 +2410,51 @@ unlock: return ret; } -void smp_cancel_pairing(struct hci_conn *hcon) +int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 addr_type) { - struct l2cap_conn *conn = hcon->l2cap_data; + struct hci_conn *hcon; + struct l2cap_conn *conn; struct l2cap_chan *chan; struct smp_chan *smp; + int err; + err = hci_remove_ltk(hdev, bdaddr, addr_type); + hci_remove_irk(hdev, bdaddr, addr_type); + + hcon = hci_conn_hash_lookup_le(hdev, bdaddr, addr_type); + if (!hcon) + goto done; + + conn = hcon->l2cap_data; if (!conn) - return; + goto done; chan = conn->smp; if (!chan) - return; + goto done; l2cap_chan_lock(chan); smp = chan->data; if (smp) { + /* Set keys to NULL to make sure smp_failure() does not try to + * remove and free already invalidated rcu list entries. */ + smp->ltk = NULL; + smp->slave_ltk = NULL; + smp->remote_irk = NULL; + if (test_bit(SMP_FLAG_COMPLETE, &smp->flags)) smp_failure(conn, 0); else smp_failure(conn, SMP_UNSPECIFIED); + err = 0; } l2cap_chan_unlock(chan); + +done: + return err; } static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb) diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index 0ff6247eaa6c..121edadd5f8d 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -181,7 +181,8 @@ enum smp_key_pref { }; /* SMP Commands */ -void smp_cancel_pairing(struct hci_conn *hcon); +int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 addr_type); bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level, enum smp_key_pref key_pref); int smp_conn_security(struct hci_conn *hcon, __u8 sec_level); From 865741554925b7f53c1ccc01d38d3854dffbbcb3 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Wed, 26 Sep 2018 18:11:22 +0200 Subject: [PATCH 0023/2608] pxa168fb: prepare the clock [ Upstream commit d85536cde91fcfed6fb8d983783bd2b92c843939 ] Add missing prepare/unprepare operations for fbi->clk, this fixes following kernel warning: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at drivers/clk/clk.c:874 clk_core_enable+0x2c/0x1b0 Enabling unprepared disp0_clk Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.18.0-rc8-00032-g02b43ddd4f21-dirty #25 Hardware name: Marvell MMP2 (Device Tree Support) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (__warn+0xd8/0xf0) [] (__warn) from [] (warn_slowpath_fmt+0x44/0x6c) [] (warn_slowpath_fmt) from [] (clk_core_enable+0x2c/0x1b0) [] (clk_core_enable) from [] (clk_core_enable_lock+0x18/0x2c) [] (clk_core_enable_lock) from [] (pxa168fb_probe+0x464/0x6ac) [] (pxa168fb_probe) from [] (platform_drv_probe+0x48/0x94) [] (platform_drv_probe) from [] (driver_probe_device+0x328/0x470) [] (driver_probe_device) from [] (__driver_attach+0xb0/0x124) [] (__driver_attach) from [] (bus_for_each_dev+0x64/0xa0) [] (bus_for_each_dev) from [] (bus_add_driver+0x1b8/0x230) [] (bus_add_driver) from [] (driver_register+0xac/0xf0) [] (driver_register) from [] (do_one_initcall+0xb8/0x1f0) [] (do_one_initcall) from [] (kernel_init_freeable+0x294/0x2e0) [] (kernel_init_freeable) from [] (kernel_init+0x8/0x10c) [] (kernel_init) from [] (ret_from_fork+0x14/0x2c) Exception stack(0xd008bfb0 to 0xd008bff8) bfa0: 00000000 00000000 00000000 00000000 bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bfe0: 00000000 00000000 00000000 00000000 00000013 00000000 ---[ end trace c0af40f9e2ed7cb4 ]--- Signed-off-by: Lubomir Rintel [b.zolnierkie: enhance patch description a bit] Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin --- drivers/video/fbdev/pxa168fb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c index def3a501acd6..d059d04c63ac 100644 --- a/drivers/video/fbdev/pxa168fb.c +++ b/drivers/video/fbdev/pxa168fb.c @@ -712,7 +712,7 @@ static int pxa168fb_probe(struct platform_device *pdev) /* * enable controller clock */ - clk_enable(fbi->clk); + clk_prepare_enable(fbi->clk); pxa168fb_set_par(info); @@ -767,7 +767,7 @@ static int pxa168fb_probe(struct platform_device *pdev) failed_free_cmap: fb_dealloc_cmap(&info->cmap); failed_free_clk: - clk_disable(fbi->clk); + clk_disable_unprepare(fbi->clk); failed_free_fbmem: dma_free_coherent(fbi->dev, info->fix.smem_len, info->screen_base, fbi->fb_start_dma); @@ -807,7 +807,7 @@ static int pxa168fb_remove(struct platform_device *pdev) dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len), info->screen_base, info->fix.smem_start); - clk_disable(fbi->clk); + clk_disable_unprepare(fbi->clk); framebuffer_release(info); From 76f86190774e9b1655cc53781311e065462f1fc4 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 13:53:34 -0700 Subject: [PATCH 0024/2608] qed: Avoid implicit enum conversion in qed_set_tunn_cls_info [ Upstream commit a898fba32229efd5e6b6154f83fa86a7145156b9 ] Clang warns when one enumerated type is implicitly converted to another. drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:163:25: warning: implicit conversion from enumeration type 'enum tunnel_clss' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] p_tun->vxlan.tun_cls = type; ~ ^~~~ drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:165:26: warning: implicit conversion from enumeration type 'enum tunnel_clss' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] p_tun->l2_gre.tun_cls = type; ~ ^~~~ drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:167:26: warning: implicit conversion from enumeration type 'enum tunnel_clss' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] p_tun->ip_gre.tun_cls = type; ~ ^~~~ drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:169:29: warning: implicit conversion from enumeration type 'enum tunnel_clss' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] p_tun->l2_geneve.tun_cls = type; ~ ^~~~ drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:171:29: warning: implicit conversion from enumeration type 'enum tunnel_clss' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] p_tun->ip_geneve.tun_cls = type; ~ ^~~~ 5 warnings generated. Avoid this by changing type to an int. Link: https://github.com/ClangBuiltLinux/linux/issues/125 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_sp_commands.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 46d0c3cb83a5..d7c5965328be 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -154,7 +154,7 @@ qed_set_pf_update_tunn_mode(struct qed_tunnel_info *p_tun, static void qed_set_tunn_cls_info(struct qed_tunnel_info *p_tun, struct qed_tunnel_info *p_src) { - enum tunnel_clss type; + int type; p_tun->b_update_rx_cls = p_src->b_update_rx_cls; p_tun->b_update_tx_cls = p_src->b_update_tx_cls; From ab7998abfea24e59acd3d387e46760628f95ff28 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 14:05:27 -0700 Subject: [PATCH 0025/2608] qed: Fix mask parameter in qed_vf_prep_tunn_req_tlv [ Upstream commit db803f36e56f23b5a2266807e190d1dc11554d54 ] Clang complains when one enumerated type is implicitly converted to another. drivers/net/ethernet/qlogic/qed/qed_vf.c:686:6: warning: implicit conversion from enumeration type 'enum qed_tunn_mode' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] QED_MODE_L2GENEVE_TUNN, ^~~~~~~~~~~~~~~~~~~~~~ Update mask's parameter to expect qed_tunn_mode, which is what was intended. Link: https://github.com/ClangBuiltLinux/linux/issues/125 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_vf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 6eb85db69f9a..b8b1a791a4fa 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -572,7 +572,7 @@ free_p_iov: static void __qed_vf_prep_tunn_req_tlv(struct vfpf_update_tunn_param_tlv *p_req, struct qed_tunn_update_type *p_src, - enum qed_tunn_clss mask, u8 *p_cls) + enum qed_tunn_mode mask, u8 *p_cls) { if (p_src->b_update_mode) { p_req->tun_mode_update_mask |= BIT(mask); @@ -587,7 +587,7 @@ __qed_vf_prep_tunn_req_tlv(struct vfpf_update_tunn_param_tlv *p_req, static void qed_vf_prep_tunn_req_tlv(struct vfpf_update_tunn_param_tlv *p_req, struct qed_tunn_update_type *p_src, - enum qed_tunn_clss mask, + enum qed_tunn_mode mask, u8 *p_cls, struct qed_tunn_update_udp_port *p_port, u8 *p_update_port, u16 *p_udp_port) { From 935d441968b1877707367cc7ac78b1822bf483fc Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 14:34:53 -0700 Subject: [PATCH 0026/2608] qed: Avoid implicit enum conversion in qed_roce_mode_to_flavor [ Upstream commit d3a315795b4ce8b105a64a90699103121bde04a8 ] Clang warns when one enumerated type is implicitly converted to another. drivers/net/ethernet/qlogic/qed/qed_roce.c:153:12: warning: implicit conversion from enumeration type 'enum roce_mode' to different enumeration type 'enum roce_flavor' [-Wenum-conversion] flavor = ROCE_V2_IPV6; ~ ^~~~~~~~~~~~ drivers/net/ethernet/qlogic/qed/qed_roce.c:156:12: warning: implicit conversion from enumeration type 'enum roce_mode' to different enumeration type 'enum roce_flavor' [-Wenum-conversion] flavor = MAX_ROCE_MODE; ~ ^~~~~~~~~~~~~ 2 warnings generated. Use the appropriate values from the expected type, roce_flavor: ROCE_V2_IPV6 = RROCE_IPV6 = 2 MAX_ROCE_MODE = MAX_ROCE_FLAVOR = 3 While we're add it, ditch the local variable flavor, we can just return the value directly from the switch statement. Link: https://github.com/ClangBuiltLinux/linux/issues/125 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_roce.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index fb7c2d1562ae..bedbf840fd7d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -129,23 +129,16 @@ static void qed_rdma_copy_gids(struct qed_rdma_qp *qp, __le32 *src_gid, static enum roce_flavor qed_roce_mode_to_flavor(enum roce_mode roce_mode) { - enum roce_flavor flavor; - switch (roce_mode) { case ROCE_V1: - flavor = PLAIN_ROCE; - break; + return PLAIN_ROCE; case ROCE_V2_IPV4: - flavor = RROCE_IPV4; - break; + return RROCE_IPV4; case ROCE_V2_IPV6: - flavor = ROCE_V2_IPV6; - break; + return RROCE_IPV6; default: - flavor = MAX_ROCE_MODE; - break; + return MAX_ROCE_FLAVOR; } - return flavor; } void qed_roce_free_cid_pair(struct qed_hwfn *p_hwfn, u16 cid) From ee86b4d659c23f735c3162b0c9c461a2e6f07a8b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 15:17:03 -0700 Subject: [PATCH 0027/2608] qed: Avoid constant logical operation warning in qed_vf_pf_acquire [ Upstream commit 1c492a9d55ba99079210ed901dd8a5423f980487 ] Clang warns when a constant is used in a boolean context as it thinks a bitwise operation may have been intended. drivers/net/ethernet/qlogic/qed/qed_vf.c:415:27: warning: use of logical '&&' with constant operand [-Wconstant-logical-operand] if (!p_iov->b_pre_fp_hsi && ^ drivers/net/ethernet/qlogic/qed/qed_vf.c:415:27: note: use '&' for a bitwise operation if (!p_iov->b_pre_fp_hsi && ^~ & drivers/net/ethernet/qlogic/qed/qed_vf.c:415:27: note: remove constant to silence this warning if (!p_iov->b_pre_fp_hsi && ~^~ 1 warning generated. This has been here since commit 1fe614d10f45 ("qed: Relax VF firmware requirements") and I am not entirely sure why since 0 isn't a special case. Just remove the statement causing Clang to warn since it isn't required. Link: https://github.com/ClangBuiltLinux/linux/issues/126 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_vf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index b8b1a791a4fa..dd8ebf6d380f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -413,7 +413,6 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) } if (!p_iov->b_pre_fp_hsi && - ETH_HSI_VER_MINOR && (resp->pfdev_info.minor_fp_hsi < ETH_HSI_VER_MINOR)) { DP_INFO(p_hwfn, "PF is using older fastpath HSI; %02x.%02x is configured\n", From b93393d63d8f66135acc32a3ff8f3680f923aa08 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 14:42:12 -0700 Subject: [PATCH 0028/2608] qed: Avoid implicit enum conversion in qed_iwarp_parse_rx_pkt [ Upstream commit 77f2d753819b7d50c16abfb778caf1fe075faed0 ] Clang warns when one enumerated type is implicitly converted to another. drivers/net/ethernet/qlogic/qed/qed_iwarp.c:1713:25: warning: implicit conversion from enumeration type 'enum tcp_ip_version' to different enumeration type 'enum qed_tcp_ip_version' [-Wenum-conversion] cm_info->ip_version = TCP_IPV4; ~ ^~~~~~~~ drivers/net/ethernet/qlogic/qed/qed_iwarp.c:1733:25: warning: implicit conversion from enumeration type 'enum tcp_ip_version' to different enumeration type 'enum qed_tcp_ip_version' [-Wenum-conversion] cm_info->ip_version = TCP_IPV6; ~ ^~~~~~~~ 2 warnings generated. Use the appropriate values from the expected type, qed_tcp_ip_version: TCP_IPV4 = QED_TCP_IPV4 = 0 TCP_IPV6 = QED_TCP_IPV6 = 1 Link: https://github.com/ClangBuiltLinux/linux/issues/125 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index e41f28602535..eb666877d1aa 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1672,7 +1672,7 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, cm_info->local_ip[0] = ntohl(iph->daddr); cm_info->remote_ip[0] = ntohl(iph->saddr); - cm_info->ip_version = TCP_IPV4; + cm_info->ip_version = QED_TCP_IPV4; ip_hlen = (iph->ihl) * sizeof(u32); *payload_len = ntohs(iph->tot_len) - ip_hlen; @@ -1692,7 +1692,7 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, cm_info->remote_ip[i] = ntohl(ip6h->saddr.in6_u.u6_addr32[i]); } - cm_info->ip_version = TCP_IPV6; + cm_info->ip_version = QED_TCP_IPV6; ip_hlen = sizeof(*ip6h); *payload_len = ntohs(ip6h->payload_len); From 915670c48eb3d073ef8dc29a12db92cbde233195 Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Tue, 25 Sep 2018 11:15:01 +0900 Subject: [PATCH 0029/2608] nl80211: Fix possible Spectre-v1 for CQM RSSI thresholds [ Upstream commit 1222a16014888ed9733c11e221730d4a8196222b ] Use array_index_nospec() to sanitize i with respect to speculation. Note that the user doesn't control i directly, but can make it out of bounds by not finding a threshold in the array. Signed-off-by: Masashi Honma [add note about user control, as explained by Masashi] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/nl80211.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5e7c9b361e8a..46e9812d13c0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9720,7 +9720,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; s32 last, low, high; u32 hyst; - int i, n; + int i, n, low_index; int err; /* RSSI reporting disabled? */ @@ -9757,10 +9757,19 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, if (last < wdev->cqm_config->rssi_thresholds[i]) break; - low = i > 0 ? - (wdev->cqm_config->rssi_thresholds[i - 1] - hyst) : S32_MIN; - high = i < n ? - (wdev->cqm_config->rssi_thresholds[i] + hyst - 1) : S32_MAX; + low_index = i - 1; + if (low_index >= 0) { + low_index = array_index_nospec(low_index, n); + low = wdev->cqm_config->rssi_thresholds[low_index] - hyst; + } else { + low = S32_MIN; + } + if (i < n) { + i = array_index_nospec(i, n); + high = wdev->cqm_config->rssi_thresholds[i] + hyst - 1; + } else { + high = S32_MAX; + } return rdev_set_cqm_rssi_range_config(rdev, dev, low, high); } From eef5935c2d7a349479c6992ddb60b2c4d9e17263 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:50 -0700 Subject: [PATCH 0030/2608] asix: Check for supported Wake-on-LAN modes [ Upstream commit c4ce446e33d7a0e978256ac6fea4c80e59d9de5f ] The driver currently silently accepts unsupported Wake-on-LAN modes (other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user, which is confusing. Fixes: 2e55cc7210fe ("[PATCH] USB: usbnet (3/9) module for ASIX Ethernet adapters") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/asix_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 522d2900cd1d..e9fcf6ef716a 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -607,6 +607,9 @@ int asix_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= AX_MONITOR_LINK; if (wolinfo->wolopts & WAKE_MAGIC) From a0fc063355300102c96ee8b9b9c460102a93a7c0 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:51 -0700 Subject: [PATCH 0031/2608] ax88179_178a: Check for supported Wake-on-LAN modes [ Upstream commit 5ba6b4aa9a410c5e2c6417df52b5e2118ea9b467 ] The driver currently silently accepts unsupported Wake-on-LAN modes (other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user, which is confusing. Fixes: e2ca90c276e1 ("ax88179_178a: ASIX AX88179_178A USB 3.0/2.0 to gigabit ethernet adapter driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/ax88179_178a.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index f32261ecd215..0f69b77e8502 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -566,6 +566,9 @@ ax88179_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= AX_MONITOR_MODE_RWLC; if (wolinfo->wolopts & WAKE_MAGIC) From 9f0962c0f6f7126c0d407989e86daeb1317e0dda Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:52 -0700 Subject: [PATCH 0032/2608] lan78xx: Check for supported Wake-on-LAN modes [ Upstream commit eb9ad088f96653a26b340f7c447c44cf023d5cdc ] The driver supports a fair amount of Wake-on-LAN modes, but is not checking that the user specified one that is supported. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Florian Fainelli Reviewed-by: Woojung Huh Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 9e3f632e22f1..00ddcaf09014 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1375,19 +1375,10 @@ static int lan78xx_set_wol(struct net_device *netdev, if (ret < 0) return ret; - pdata->wol = 0; - if (wol->wolopts & WAKE_UCAST) - pdata->wol |= WAKE_UCAST; - if (wol->wolopts & WAKE_MCAST) - pdata->wol |= WAKE_MCAST; - if (wol->wolopts & WAKE_BCAST) - pdata->wol |= WAKE_BCAST; - if (wol->wolopts & WAKE_MAGIC) - pdata->wol |= WAKE_MAGIC; - if (wol->wolopts & WAKE_PHY) - pdata->wol |= WAKE_PHY; - if (wol->wolopts & WAKE_ARP) - pdata->wol |= WAKE_ARP; + if (wol->wolopts & ~WAKE_ALL) + return -EINVAL; + + pdata->wol = wol->wolopts; device_set_wakeup_enable(&dev->udev->dev, (bool)wol->wolopts); From 4301fb3391eddf9c697bac653320749735d5a2a9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:53 -0700 Subject: [PATCH 0033/2608] sr9800: Check for supported Wake-on-LAN modes [ Upstream commit c5cb93e994ffb43b7b3b1ff10b9f928f54574a36 ] The driver currently silently accepts unsupported Wake-on-LAN modes (other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user, which is confusing. Fixes: 19a38d8e0aa3 ("USB2NET : SR9800 : One chip USB2.0 USB2NET SR9800 Device Driver Support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/sr9800.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index 9277a0f228df..35f39f23d881 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -421,6 +421,9 @@ sr_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= SR_MONITOR_LINK; if (wolinfo->wolopts & WAKE_MAGIC) From 9eaabc0a77799b3563a5be757e551ae7463d2706 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:54 -0700 Subject: [PATCH 0034/2608] r8152: Check for supported Wake-on-LAN Modes [ Upstream commit f2750df1548bd8a2b060eb609fc43ca82811af4c ] The driver does not check for Wake-on-LAN modes specified by an user, but will conditionally set the device as wake-up enabled or not based on that, which could be a very confusing user experience. Fixes: 21ff2e8976b1 ("r8152: support WOL") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/r8152.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0fa64cc1a011..66beff4d7646 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -4497,6 +4497,9 @@ static int rtl8152_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) if (!rtl_can_wakeup(tp)) return -EOPNOTSUPP; + if (wol->wolopts & ~WAKE_ANY) + return -EINVAL; + ret = usb_autopm_get_interface(tp->intf); if (ret < 0) goto out_set_wol; From 9e3a0366365bf82da3668d6e20cc753476fb9519 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:55 -0700 Subject: [PATCH 0035/2608] smsc75xx: Check for Wake-on-LAN modes [ Upstream commit 9c734b2769a73eea2e9e9767c0e0bf839ff23679 ] The driver does not check for Wake-on-LAN modes specified by an user, but will conditionally set the device as wake-up enabled or not based on that, which could be a very confusing user experience. Fixes: 6c636503260d ("smsc75xx: add wol magic packet support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/smsc75xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index b64b1ee56d2d..ec287c9741e8 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -731,6 +731,9 @@ static int smsc75xx_ethtool_set_wol(struct net_device *net, struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); int ret; + if (wolinfo->wolopts & ~SUPPORTED_WAKE) + return -EINVAL; + pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE; ret = device_set_wakeup_enable(&dev->udev->dev, pdata->wolopts); From 35ffbd523797f58a3017906904a718db6035fc56 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:56 -0700 Subject: [PATCH 0036/2608] smsc95xx: Check for Wake-on-LAN modes [ Upstream commit c530c471ba37bdd9fe1c7185b01455c00ae606fb ] The driver does not check for Wake-on-LAN modes specified by an user, but will conditionally set the device as wake-up enabled or not based on that, which could be a very confusing user experience. Fixes: e0e474a83c18 ("smsc95xx: add wol magic packet support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/smsc95xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 309b88acd3d0..99e684e39d35 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -774,6 +774,9 @@ static int smsc95xx_ethtool_set_wol(struct net_device *net, struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); int ret; + if (wolinfo->wolopts & ~SUPPORTED_WAKE) + return -EINVAL; + pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE; ret = device_set_wakeup_enable(&dev->udev->dev, pdata->wolopts); From 492a81318e8cf41f18ad6845152b54f4a3387556 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 27 Sep 2018 17:05:04 -0600 Subject: [PATCH 0037/2608] cfg80211: fix use-after-free in reg_process_hint() [ Upstream commit 1db58529454742f67ebd96e3588315e880b72837 ] reg_process_hint_country_ie() can free regulatory_request and return REG_REQ_ALREADY_SET. We shouldn't use regulatory_request after it's called. KASAN error was observed when this happens. BUG: KASAN: use-after-free in reg_process_hint+0x839/0x8aa [cfg80211] Read of size 4 at addr ffff8800c430d434 by task kworker/1:3/89 Workqueue: events reg_todo [cfg80211] Call Trace: dump_stack+0xc1/0x10c ? _atomic_dec_and_lock+0x1ad/0x1ad ? _raw_spin_lock_irqsave+0xa0/0xd2 print_address_description+0x86/0x26f ? reg_process_hint+0x839/0x8aa [cfg80211] kasan_report+0x241/0x29b reg_process_hint+0x839/0x8aa [cfg80211] reg_todo+0x204/0x5b9 [cfg80211] process_one_work+0x55f/0x8d0 ? worker_detach_from_pool+0x1b5/0x1b5 ? _raw_spin_unlock_irq+0x65/0xdd ? _raw_spin_unlock_irqrestore+0xf3/0xf3 worker_thread+0x5dd/0x841 ? kthread_parkme+0x1d/0x1d kthread+0x270/0x285 ? pr_cont_work+0xe3/0xe3 ? rcu_read_unlock_sched_notrace+0xca/0xca ret_from_fork+0x22/0x40 Allocated by task 2718: set_track+0x63/0xfa __kmalloc+0x119/0x1ac regulatory_hint_country_ie+0x38/0x329 [cfg80211] __cfg80211_connect_result+0x854/0xadd [cfg80211] cfg80211_rx_assoc_resp+0x3bc/0x4f0 [cfg80211] smsc95xx v1.0.6 ieee80211_sta_rx_queued_mgmt+0x1803/0x7ed5 [mac80211] ieee80211_iface_work+0x411/0x696 [mac80211] process_one_work+0x55f/0x8d0 worker_thread+0x5dd/0x841 kthread+0x270/0x285 ret_from_fork+0x22/0x40 Freed by task 89: set_track+0x63/0xfa kasan_slab_free+0x6a/0x87 kfree+0xdc/0x470 reg_process_hint+0x31e/0x8aa [cfg80211] reg_todo+0x204/0x5b9 [cfg80211] process_one_work+0x55f/0x8d0 worker_thread+0x5dd/0x841 kthread+0x270/0x285 ret_from_fork+0x22/0x40 Signed-off-by: Yu Zhao Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/reg.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 6f032c7b8732..bd91de416035 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2170,11 +2170,12 @@ static void reg_process_hint(struct regulatory_request *reg_request) { struct wiphy *wiphy = NULL; enum reg_request_treatment treatment; + enum nl80211_reg_initiator initiator = reg_request->initiator; if (reg_request->wiphy_idx != WIPHY_IDX_INVALID) wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); - switch (reg_request->initiator) { + switch (initiator) { case NL80211_REGDOM_SET_BY_CORE: treatment = reg_process_hint_core(reg_request); break; @@ -2192,7 +2193,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) treatment = reg_process_hint_country_ie(wiphy, reg_request); break; default: - WARN(1, "invalid initiator %d\n", reg_request->initiator); + WARN(1, "invalid initiator %d\n", initiator); goto out_free; } @@ -2207,7 +2208,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) */ if (treatment == REG_REQ_ALREADY_SET && wiphy && wiphy->regulatory_flags & REGULATORY_STRICT_REG) { - wiphy_update_regulatory(wiphy, reg_request->initiator); + wiphy_update_regulatory(wiphy, initiator); wiphy_all_share_dfs_chan_state(wiphy); reg_check_channels(); } From ffc3cb561ecea45c95ba1690876c5e905eef3791 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Sep 2018 17:58:35 +0200 Subject: [PATCH 0038/2608] perf/core: Fix perf_pmu_unregister() locking [ Upstream commit a9f9772114c8b07ae75bcb3654bd017461248095 ] When we unregister a PMU, we fail to serialize the @pmu_idr properly. Fix that by doing the entire thing under pmu_lock. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 2e80a82a49c4 ("perf: Dynamic pmu types") Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/events/core.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4dbce29a9313..ee1c07c0b833 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9020,9 +9020,7 @@ static void free_pmu_context(struct pmu *pmu) if (pmu->task_ctx_nr > perf_invalid_context) return; - mutex_lock(&pmus_lock); free_percpu(pmu->pmu_cpu_context); - mutex_unlock(&pmus_lock); } /* @@ -9278,12 +9276,8 @@ EXPORT_SYMBOL_GPL(perf_pmu_register); void perf_pmu_unregister(struct pmu *pmu) { - int remove_device; - mutex_lock(&pmus_lock); - remove_device = pmu_bus_running; list_del_rcu(&pmu->entry); - mutex_unlock(&pmus_lock); /* * We dereference the pmu list under both SRCU and regular RCU, so @@ -9295,13 +9289,14 @@ void perf_pmu_unregister(struct pmu *pmu) free_percpu(pmu->pmu_disable_count); if (pmu->type >= PERF_TYPE_MAX) idr_remove(&pmu_idr, pmu->type); - if (remove_device) { + if (pmu_bus_running) { if (pmu->nr_addr_filters) device_remove_file(pmu->dev, &dev_attr_nr_addr_filters); device_del(pmu->dev); put_device(pmu->dev); } free_pmu_context(pmu); + mutex_unlock(&pmus_lock); } EXPORT_SYMBOL_GPL(perf_pmu_unregister); From a18e2159c3ffcc8fe18155302d208adec443cd66 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 23 Sep 2018 18:13:43 +0200 Subject: [PATCH 0039/2608] perf/ring_buffer: Prevent concurent ring buffer access [ Upstream commit cd6fb677ce7e460c25bdd66f689734102ec7d642 ] Some of the scheduling tracepoints allow the perf_tp_event code to write to ring buffer under different cpu than the code is running on. This results in corrupted ring buffer data demonstrated in following perf commands: # perf record -e 'sched:sched_switch,sched:sched_wakeup' perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.383 [sec] [ perf record: Woken up 8 times to write data ] 0x42b890 [0]: failed to process type: -1765585640 [ perf record: Captured and wrote 4.825 MB perf.data (29669 samples) ] # perf report --stdio 0x42b890 [0]: failed to process type: -1765585640 The reason for the corruption are some of the scheduling tracepoints, that have __perf_task dfined and thus allow to store data to another cpu ring buffer: sched_waking sched_wakeup sched_wakeup_new sched_stat_wait sched_stat_sleep sched_stat_iowait sched_stat_blocked The perf_tp_event function first store samples for current cpu related events defined for tracepoint: hlist_for_each_entry_rcu(event, head, hlist_entry) perf_swevent_event(event, count, &data, regs); And then iterates events of the 'task' and store the sample for any task's event that passes tracepoint checks: ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (event->attr.type != PERF_TYPE_TRACEPOINT) continue; if (event->attr.config != entry->type) continue; perf_swevent_event(event, count, &data, regs); } Above code can race with same code running on another cpu, ending up with 2 cpus trying to store under the same ring buffer, which is specifically not allowed. This patch prevents the problem, by allowing only events with the same current cpu to receive the event. NOTE: this requires the use of (per-task-)per-cpu buffers for this feature to work; perf-record does this. Signed-off-by: Jiri Olsa [peterz: small edits to Changelog] Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Andrew Vagin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: e6dab5ffab59 ("perf/trace: Add ability to set a target task for events") Link: http://lkml.kernel.org/r/20180923161343.GB15054@krava Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/events/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index ee1c07c0b833..991af683ef9e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8058,6 +8058,8 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, goto unlock; list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (event->cpu != smp_processor_id()) + continue; if (event->attr.type != PERF_TYPE_TRACEPOINT) continue; if (event->attr.config != entry->type) From 40568f21f235aefb8755650f2a09797b3d297ec1 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 21 Sep 2018 07:07:06 -0700 Subject: [PATCH 0040/2608] perf/x86/intel/uncore: Fix PCI BDF address of M3UPI on SKX [ Upstream commit 9d92cfeaf5215158d26d2991be7f7ff865cb98f3 ] The counters on M3UPI Link 0 and Link 3 don't count properly, and writing 0 to these counters may causes system crash on some machines. The PCI BDF addresses of the M3UPI in the current code are incorrect. The correct addresses should be: D18:F1 0x204D D18:F2 0x204E D18:F5 0x204D Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: cd34cd97b7b4 ("perf/x86/intel/uncore: Add Skylake server uncore support") Link: http://lkml.kernel.org/r/1537538826-55489-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/events/intel/uncore_snbep.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 2dae3f585c01..a68aba8a482f 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3807,16 +3807,16 @@ static const struct pci_device_id skx_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3), }, { /* M3UPI0 Link 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 0), }, { /* M3UPI0 Link 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204E), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 2, SKX_PCI_UNCORE_M3UPI, 1), }, { /* M3UPI1 Link 2 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 5, SKX_PCI_UNCORE_M3UPI, 2), }, { /* end: all zeroes */ } }; From 784f8395895f7b7c56b3c1195c7ce0f0376085dc Mon Sep 17 00:00:00 2001 From: "Natarajan, Janakarajan" Date: Thu, 27 Sep 2018 15:51:55 +0000 Subject: [PATCH 0041/2608] perf/x86/amd/uncore: Set ThreadMask and SliceMask for L3 Cache perf events [ Upstream commit d7cbbe49a9304520181fb8c9272d1327deec8453 ] In Family 17h, some L3 Cache Performance events require the ThreadMask and SliceMask to be set. For other events, these fields do not affect the count either way. Set ThreadMask and SliceMask to 0xFF and 0xF respectively. Signed-off-by: Janakarajan Natarajan Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H . Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Suravee Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/Message-ID: Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/events/amd/uncore.c | 10 ++++++++++ arch/x86/include/asm/perf_event.h | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index f5cbbba99283..4e1d7483b78c 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -35,6 +35,7 @@ static int num_counters_llc; static int num_counters_nb; +static bool l3_mask; static HLIST_HEAD(uncore_unused_list); @@ -208,6 +209,13 @@ static int amd_uncore_event_init(struct perf_event *event) hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; + /* + * SliceMask and ThreadMask need to be set for certain L3 events in + * Family 17h. For other events, the two fields do not affect the count. + */ + if (l3_mask) + hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK); + if (event->cpu < 0) return -EINVAL; @@ -542,6 +550,7 @@ static int __init amd_uncore_init(void) amd_llc_pmu.name = "amd_l3"; format_attr_event_df.show = &event_show_df; format_attr_event_l3.show = &event_show_l3; + l3_mask = true; } else { num_counters_nb = NUM_COUNTERS_NB; num_counters_llc = NUM_COUNTERS_L2; @@ -549,6 +558,7 @@ static int __init amd_uncore_init(void) amd_llc_pmu.name = "amd_l2"; format_attr_event_df = format_attr_event; format_attr_event_l3 = format_attr_event; + l3_mask = false; } amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df; diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 12f54082f4c8..78241b736f2a 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -46,6 +46,14 @@ #define INTEL_ARCH_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) +#define AMD64_L3_SLICE_SHIFT 48 +#define AMD64_L3_SLICE_MASK \ + ((0xFULL) << AMD64_L3_SLICE_SHIFT) + +#define AMD64_L3_THREAD_SHIFT 56 +#define AMD64_L3_THREAD_MASK \ + ((0xFFULL) << AMD64_L3_THREAD_SHIFT) + #define X86_RAW_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_EVENT | \ ARCH_PERFMON_EVENTSEL_UMASK | \ From 1837dbb25cc517d96d51e4c9e1e8bffd80e9b48c Mon Sep 17 00:00:00 2001 From: Rickard x Andersson Date: Tue, 2 Oct 2018 14:49:32 +0200 Subject: [PATCH 0042/2608] net: fec: fix rare tx timeout [ Upstream commit 657ade07df72847f591ccdb36bd9b91ed0edbac3 ] During certain heavy network loads TX could time out with TX ring dump. TX is sometimes never restarted after reaching "tx_stop_threshold" because function "fec_enet_tx_queue" only tests the first queue. In addition the TX timeout callback function failed to recover because it also operated only on the first queue. Signed-off-by: Rickard x Andersson Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fec_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index eb2ea231c7ca..8bfa6ef826a9 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1155,7 +1155,7 @@ static void fec_enet_timeout_work(struct work_struct *work) napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_restart(ndev); - netif_wake_queue(ndev); + netif_tx_wake_all_queues(ndev); netif_tx_unlock_bh(ndev); napi_enable(&fep->napi); } @@ -1270,7 +1270,7 @@ skb_done: /* Since we have freed up a buffer, the ring is no longer full */ - if (netif_queue_stopped(ndev)) { + if (netif_tx_queue_stopped(nq)) { entries_free = fec_enet_get_free_txdesc_num(txq); if (entries_free >= txq->tx_wake_threshold) netif_tx_wake_queue(nq); @@ -1747,7 +1747,7 @@ static void fec_enet_adjust_link(struct net_device *ndev) napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_restart(ndev); - netif_wake_queue(ndev); + netif_tx_wake_all_queues(ndev); netif_tx_unlock_bh(ndev); napi_enable(&fep->napi); } @@ -2249,7 +2249,7 @@ static int fec_enet_set_pauseparam(struct net_device *ndev, napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_restart(ndev); - netif_wake_queue(ndev); + netif_tx_wake_all_queues(ndev); netif_tx_unlock_bh(ndev); napi_enable(&fep->napi); } From 3095f0c98c6a2a3e279c801955efdd73ff54c76e Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 2 Oct 2018 14:23:45 +0100 Subject: [PATCH 0043/2608] declance: Fix continuation with the adapter identification message [ Upstream commit fe3a83af6a50199bf250fa331e94216912f79395 ] Fix a commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") regression with the `declance' driver, which caused the adapter identification message to be split between two lines, e.g.: declance.c: v0.011 by Linux MIPS DECstation task force tc6: PMAD-AA , addr = 08:00:2b:1b:2a:6a, irq = 14 tc6: registered as eth0. Address that properly, by printing identification with a single call, making the messages now look like: declance.c: v0.011 by Linux MIPS DECstation task force tc6: PMAD-AA, addr = 08:00:2b:1b:2a:6a, irq = 14 tc6: registered as eth0. Signed-off-by: Maciej W. Rozycki Fixes: 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/declance.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index 82cc81385033..c7cde58feaf7 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -1029,6 +1029,7 @@ static int dec_lance_probe(struct device *bdev, const int type) int i, ret; unsigned long esar_base; unsigned char *esar; + const char *desc; if (dec_lance_debug && version_printed++ == 0) printk(version); @@ -1214,19 +1215,20 @@ static int dec_lance_probe(struct device *bdev, const int type) */ switch (type) { case ASIC_LANCE: - printk("%s: IOASIC onboard LANCE", name); + desc = "IOASIC onboard LANCE"; break; case PMAD_LANCE: - printk("%s: PMAD-AA", name); + desc = "PMAD-AA"; break; case PMAX_LANCE: - printk("%s: PMAX onboard LANCE", name); + desc = "PMAX onboard LANCE"; break; } for (i = 0; i < 6; i++) dev->dev_addr[i] = esar[i * 4]; - printk(", addr = %pM, irq = %d\n", dev->dev_addr, dev->irq); + printk("%s: %s, addr = %pM, irq = %d\n", + name, desc, dev->dev_addr, dev->irq); dev->netdev_ops = &lance_netdev_ops; dev->watchdog_timeo = 5*HZ; From a0ab962b674dd64a7bce9de8b5a108f4832d6139 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Tue, 2 Oct 2018 18:52:01 -0600 Subject: [PATCH 0044/2608] net: qualcomm: rmnet: Skip processing loopback packets [ Upstream commit a07f388e2cde2be74b263f85df6f672fea0305a1 ] RMNET RX handler was processing invalid packets that were originally sent on the real device and were looped back via dev_loopback_xmit(). This was detected using syzkaller. Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 929fb8d96ec0..8d979fef5fc7 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -205,6 +205,9 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) if (!skb) return RX_HANDLER_CONSUMED; + if (skb->pkt_type == PACKET_LOOPBACK) + return RX_HANDLER_PASS; + dev = skb->dev; port = rmnet_get_port(dev); From 45894023bee9e2733083891650b03eaa2ce794a1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 2 Oct 2018 14:48:49 -0700 Subject: [PATCH 0045/2608] locking/ww_mutex: Fix runtime warning in the WW mutex selftest [ Upstream commit e4a02ed2aaf447fa849e3254bfdb3b9b01e1e520 ] If CONFIG_WW_MUTEX_SELFTEST=y is enabled, booting an image in an arm64 virtual machine results in the following traceback if 8 CPUs are enabled: DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current) WARNING: CPU: 2 PID: 537 at kernel/locking/mutex.c:1033 __mutex_unlock_slowpath+0x1a8/0x2e0 ... Call trace: __mutex_unlock_slowpath() ww_mutex_unlock() test_cycle_work() process_one_work() worker_thread() kthread() ret_from_fork() If requesting b_mutex fails with -EDEADLK, the error variable is reassigned to the return value from calling ww_mutex_lock on a_mutex again. If this call fails, a_mutex is not locked. It is, however, unconditionally unlocked subsequently, causing the reported warning. Fix the problem by using two error variables. With this change, the selftest still fails as follows: cyclic deadlock not resolved, ret[7/8] = -35 However, the traceback is gone. Signed-off-by: Guenter Roeck Cc: Chris Wilson Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Fixes: d1b42b800e5d0 ("locking/ww_mutex: Add kselftests for resolving ww_mutex cyclic deadlocks") Link: http://lkml.kernel.org/r/1538516929-9734-1-git-send-email-linux@roeck-us.net Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/locking/test-ww_mutex.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 0e4cd64ad2c0..654977862b06 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -260,7 +260,7 @@ static void test_cycle_work(struct work_struct *work) { struct test_cycle *cycle = container_of(work, typeof(*cycle), work); struct ww_acquire_ctx ctx; - int err; + int err, erra = 0; ww_acquire_init(&ctx, &ww_class); ww_mutex_lock(&cycle->a_mutex, &ctx); @@ -270,17 +270,19 @@ static void test_cycle_work(struct work_struct *work) err = ww_mutex_lock(cycle->b_mutex, &ctx); if (err == -EDEADLK) { + err = 0; ww_mutex_unlock(&cycle->a_mutex); ww_mutex_lock_slow(cycle->b_mutex, &ctx); - err = ww_mutex_lock(&cycle->a_mutex, &ctx); + erra = ww_mutex_lock(&cycle->a_mutex, &ctx); } if (!err) ww_mutex_unlock(cycle->b_mutex); - ww_mutex_unlock(&cycle->a_mutex); + if (!erra) + ww_mutex_unlock(&cycle->a_mutex); ww_acquire_fini(&ctx); - cycle->result = err; + cycle->result = err ?: erra; } static int __test_cycle(unsigned int nthreads) From 049b662eeaddf34fab850cc4023d8cc1a7709ef6 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 3 Oct 2018 15:20:58 +0200 Subject: [PATCH 0046/2608] be2net: don't flip hw_features when VXLANs are added/deleted [ Upstream commit 2d52527e80c2dc0c5f43f50adf183781262ec565 ] the be2net implementation of .ndo_tunnel_{add,del}() changes the value of NETIF_F_GSO_UDP_TUNNEL bit in 'features' and 'hw_features', but it forgets to call netdev_features_change(). Moreover, ethtool setting for that bit can potentially be reverted after a tunnel is added or removed. GSO already does software segmentation when 'hw_enc_features' is 0, even if VXLAN offload is turned on. In addition, commit 096de2f83ebc ("benet: stricter vxlan offloading check in be_features_check") avoids hardware segmentation of non-VXLAN tunneled packets, or VXLAN packets having wrong destination port. So, it's safe to avoid flipping the above feature on addition/deletion of VXLAN tunnels. Fixes: 630f4b70567f ("be2net: Export tunnel offloads only when a VxLAN tunnel is created") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/emulex/benet/be_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 7e2b70c2bba3..39f399741647 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -3900,8 +3900,6 @@ static int be_enable_vxlan_offloads(struct be_adapter *adapter) netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_UDP_TUNNEL; - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->features |= NETIF_F_GSO_UDP_TUNNEL; dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n", be16_to_cpu(port)); @@ -3923,8 +3921,6 @@ static void be_disable_vxlan_offloads(struct be_adapter *adapter) adapter->vxlan_port = 0; netdev->hw_enc_features = 0; - netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL); - netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL); } static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs, @@ -5215,6 +5211,7 @@ static void be_netdev_init(struct net_device *netdev) struct be_adapter *adapter = netdev_priv(netdev); netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_GSO_UDP_TUNNEL | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX; if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS)) From ec4e9618d1fdc980b7018c698cccbc1bbcd6e3fc Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Fri, 5 Oct 2018 08:48:27 -0500 Subject: [PATCH 0047/2608] net: cxgb3_main: fix a missing-check bug [ Upstream commit 2c05d88818ab6571816b93edce4d53703870d7ae ] In cxgb_extension_ioctl(), the command of the ioctl is firstly copied from the user-space buffer 'useraddr' to 'cmd' and checked through the switch statement. If the command is not as expected, an error code EOPNOTSUPP is returned. In the following execution, i.e., the cases of the switch statement, the whole buffer of 'useraddr' is copied again to a specific data structure, according to what kind of command is requested. However, after the second copy, there is no re-check on the newly-copied command. Given that the buffer 'useraddr' is in the user space, a malicious user can race to change the command between the two copies. By doing so, the attacker can supply malicious data to the kernel and cause undefined behavior. This patch adds a re-check in each case of the switch statement if there is a second copy in that case, to re-check whether the command obtained in the second copy is the same as the one in the first copy. If not, an error code EINVAL is returned. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index bf291e90cdb0..79053d2ce7a3 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -2159,6 +2159,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EPERM; if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_SET_QSET_PARAMS) + return -EINVAL; if (t.qset_idx >= SGE_QSETS) return -EINVAL; if (!in_range(t.intr_lat, 0, M_NEWTIMER) || @@ -2258,6 +2260,9 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_GET_QSET_PARAMS) + return -EINVAL; + /* Display qsets for all ports when offload enabled */ if (test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) { q1 = 0; @@ -2303,6 +2308,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EBUSY; if (copy_from_user(&edata, useraddr, sizeof(edata))) return -EFAULT; + if (edata.cmd != CHELSIO_SET_QSET_NUM) + return -EINVAL; if (edata.val < 1 || (edata.val > 1 && !(adapter->flags & USING_MSIX))) return -EINVAL; @@ -2343,6 +2350,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EPERM; if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_LOAD_FW) + return -EINVAL; /* Check t.len sanity ? */ fw_data = memdup_user(useraddr + sizeof(t), t.len); if (IS_ERR(fw_data)) @@ -2366,6 +2375,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EBUSY; if (copy_from_user(&m, useraddr, sizeof(m))) return -EFAULT; + if (m.cmd != CHELSIO_SETMTUTAB) + return -EINVAL; if (m.nmtus != NMTUS) return -EINVAL; if (m.mtus[0] < 81) /* accommodate SACK */ @@ -2407,6 +2418,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EBUSY; if (copy_from_user(&m, useraddr, sizeof(m))) return -EFAULT; + if (m.cmd != CHELSIO_SET_PM) + return -EINVAL; if (!is_power_of_2(m.rx_pg_sz) || !is_power_of_2(m.tx_pg_sz)) return -EINVAL; /* not power of 2 */ @@ -2440,6 +2453,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EIO; /* need the memory controllers */ if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_GET_MEM) + return -EINVAL; if ((t.addr & 7) || (t.len & 7)) return -EINVAL; if (t.mem_id == MEM_CM) @@ -2492,6 +2507,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EAGAIN; if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_SET_TRACE_FILTER) + return -EINVAL; tp = (const struct trace_params *)&t.sip; if (t.config_tx) From d0539c56391d449603f7ff2bcddf38995df49749 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Fri, 5 Oct 2018 10:59:36 -0500 Subject: [PATCH 0048/2608] yam: fix a missing-check bug [ Upstream commit 0781168e23a2fc8dceb989f11fc5b39b3ccacc35 ] In yam_ioctl(), the concrete ioctl command is firstly copied from the user-space buffer 'ifr->ifr_data' to 'ioctl_cmd' and checked through the following switch statement. If the command is not as expected, an error code EINVAL is returned. In the following execution the buffer 'ifr->ifr_data' is copied again in the cases of the switch statement to specific data structures according to what kind of ioctl command is requested. However, after the second copy, no re-check is enforced on the newly-copied command. Given that the buffer 'ifr->ifr_data' is in the user space, a malicious user can race to change the command between the two copies. This way, the attacker can inject inconsistent data and cause undefined behavior. This patch adds a re-check in each case of the switch statement if there is a second copy in that case, to re-check whether the command obtained in the second copy is the same as the one in the first copy. If not, an error code EINVAL will be returned. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/hamradio/yam.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index 7a7c5224a336..16a6e1193912 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -980,6 +980,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) sizeof(struct yamdrv_ioctl_mcs)); if (IS_ERR(ym)) return PTR_ERR(ym); + if (ym->cmd != SIOCYAMSMCS) + return -EINVAL; if (ym->bitrate > YAM_MAXBITRATE) { kfree(ym); return -EINVAL; @@ -995,6 +997,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (copy_from_user(&yi, ifr->ifr_data, sizeof(struct yamdrv_ioctl_cfg))) return -EFAULT; + if (yi.cmd != SIOCYAMSCFG) + return -EINVAL; if ((yi.cfg.mask & YAM_IOBASE) && netif_running(dev)) return -EINVAL; /* Cannot change this parameter when up */ if ((yi.cfg.mask & YAM_IRQ) && netif_running(dev)) From f83f38fcf17fffd875f1f7f20213aaf4dfdbe5d0 Mon Sep 17 00:00:00 2001 From: Larry Chen Date: Fri, 5 Oct 2018 15:51:37 -0700 Subject: [PATCH 0049/2608] ocfs2: fix crash in ocfs2_duplicate_clusters_by_page() [ Upstream commit 69eb7765b9c6902444c89c54e7043242faf981e5 ] ocfs2_duplicate_clusters_by_page() may crash if one of the extent's pages is dirty. When a page has not been written back, it is still in dirty state. If ocfs2_duplicate_clusters_by_page() is called against the dirty page, the crash happens. To fix this bug, we can just unlock the page and wait until the page until its not dirty. The following is the backtrace: kernel BUG at /root/code/ocfs2/refcounttree.c:2961! [exception RIP: ocfs2_duplicate_clusters_by_page+822] __ocfs2_move_extent+0x80/0x450 [ocfs2] ? __ocfs2_claim_clusters+0x130/0x250 [ocfs2] ocfs2_defrag_extent+0x5b8/0x5e0 [ocfs2] __ocfs2_move_extents_range+0x2a4/0x470 [ocfs2] ocfs2_move_extents+0x180/0x3b0 [ocfs2] ? ocfs2_wait_for_recovery+0x13/0x70 [ocfs2] ocfs2_ioctl_move_extents+0x133/0x2d0 [ocfs2] ocfs2_ioctl+0x253/0x640 [ocfs2] do_vfs_ioctl+0x90/0x5f0 SyS_ioctl+0x74/0x80 do_syscall_64+0x74/0x140 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Once we find the page is dirty, we do not wait until it's clean, rather we use write_one_page() to write it back Link: http://lkml.kernel.org/r/20180829074740.9438-1-lchen@suse.com [lchen@suse.com: update comments] Link: http://lkml.kernel.org/r/20180830075041.14879-1-lchen@suse.com [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Larry Chen Acked-by: Changwei Ge Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- fs/ocfs2/refcounttree.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 1b1283f07941..824f407df1db 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2946,6 +2946,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, if (map_end & (PAGE_SIZE - 1)) to = map_end & (PAGE_SIZE - 1); +retry: page = find_or_create_page(mapping, page_index, GFP_NOFS); if (!page) { ret = -ENOMEM; @@ -2954,11 +2955,18 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, } /* - * In case PAGE_SIZE <= CLUSTER_SIZE, This page - * can't be dirtied before we CoW it out. + * In case PAGE_SIZE <= CLUSTER_SIZE, we do not expect a dirty + * page, so write it back. */ - if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) - BUG_ON(PageDirty(page)); + if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) { + if (PageDirty(page)) { + /* + * write_on_page will unlock the page on return + */ + ret = write_one_page(page); + goto retry; + } + } if (!PageUptodate(page)) { ret = block_read_full_page(page, ocfs2_get_block); From d1efab095c20ee5051d3273123d950da4e88361e Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 29 Oct 2017 10:46:39 +0200 Subject: [PATCH 0050/2608] iwlwifi: mvm: check for short GI only for OFDM [ Upstream commit 4c59ff5a9a9c54cc26c807dc2fa6933f7e9fa4ef ] This bit will be used in CCK to indicate short preamble. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/rx.c | 3 ++- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index 2d14a58cbdd7..c73e4be9bde3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -439,7 +439,8 @@ void iwl_mvm_rx_rx_mpdu(struct iwl_mvm *mvm, struct napi_struct *napi, rx_status->bw = RATE_INFO_BW_160; break; } - if (rate_n_flags & RATE_MCS_SGI_MSK) + if (!(rate_n_flags & RATE_MCS_CCK_MSK) && + rate_n_flags & RATE_MCS_SGI_MSK) rx_status->enc_flags |= RX_ENC_FLAG_SHORT_GI; if (rate_n_flags & RATE_HT_MCS_GF_MSK) rx_status->enc_flags |= RX_ENC_FLAG_HT_GF; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index e2196dc35dc6..8ba8c70571fb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -981,7 +981,9 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, rx_status->bw = RATE_INFO_BW_160; break; } - if (rate_n_flags & RATE_MCS_SGI_MSK) + + if (!(rate_n_flags & RATE_MCS_CCK_MSK) && + rate_n_flags & RATE_MCS_SGI_MSK) rx_status->enc_flags |= RX_ENC_FLAG_SHORT_GI; if (rate_n_flags & RATE_HT_MCS_GF_MSK) rx_status->enc_flags |= RX_ENC_FLAG_HT_GF; From 4187fbd60c3993ca4a26b20db218dd4023df841c Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Tue, 31 Oct 2017 15:54:50 +0200 Subject: [PATCH 0051/2608] iwlwifi: dbg: allow wrt collection before ALIVE [ Upstream commit dfd4b08cf44f27587e2053e006e43a1603328006 ] Even if no ALIVE was received, the WRT data can still be collected. Add this. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 2fa7ec466275..839010417241 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -950,7 +950,20 @@ int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt, if (trigger) delay = msecs_to_jiffies(le32_to_cpu(trigger->stop_delay)); - if (WARN(fwrt->trans->state == IWL_TRANS_NO_FW, + /* + * If the loading of the FW completed successfully, the next step is to + * get the SMEM config data. Thus, if fwrt->smem_cfg.num_lmacs is non + * zero, the FW was already loaded successully. If the state is "NO_FW" + * in such a case - WARN and exit, since FW may be dead. Otherwise, we + * can try to collect the data, since FW might just not be fully + * loaded (no "ALIVE" yet), and the debug data is accessible. + * + * Corner case: got the FW alive but crashed before getting the SMEM + * config. In such a case, due to HW access problems, we might + * collect garbage. + */ + if (WARN((fwrt->trans->state == IWL_TRANS_NO_FW) && + fwrt->smem_cfg.num_lmacs, "Can't collect dbg data when FW isn't alive\n")) return -EIO; From 18a83c0133928c5162c607a45fe541687adc03b1 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 7 Nov 2017 23:54:17 +0200 Subject: [PATCH 0052/2608] iwlwifi: fix the ALIVE notification layout [ Upstream commit 5cd2d8fc6c6bca979ac5dd8ad0e41153f1f982f9 ] The ucode_major and ucode_minor were swapped. This has no practical consequences since those fields are not used. Same goes for umac_major and umac_minor which were only printed under certain debug flags. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/fw/api/alive.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/alive.h b/drivers/net/wireless/intel/iwlwifi/fw/api/alive.h index 3684a3e180e5..007bfe7656a4 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/alive.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/alive.h @@ -95,8 +95,8 @@ enum { #define IWL_ALIVE_FLG_RFKILL BIT(0) struct iwl_lmac_alive { - __le32 ucode_minor; __le32 ucode_major; + __le32 ucode_minor; u8 ver_subtype; u8 ver_type; u8 mac; @@ -113,8 +113,8 @@ struct iwl_lmac_alive { } __packed; /* UCODE_ALIVE_NTFY_API_S_VER_3 */ struct iwl_umac_alive { - __le32 umac_minor; /* UMAC version: minor */ __le32 umac_major; /* UMAC version: major */ + __le32 umac_minor; /* UMAC version: minor */ __le32 error_info_addr; /* SRAM address for UMAC error log */ __le32 dbg_print_buff_addr; } __packed; /* UMAC_ALIVE_DATA_API_S_VER_2 */ From e37e383a4e6610b5a78111f88d2a74f2a533f78c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 12 Nov 2017 14:54:23 -0800 Subject: [PATCH 0053/2608] tools/testing/nvdimm: unit test clear-error commands [ Upstream commit fb2a1748355161e050e9f49f1ea9a0ae707a148b ] Validate command parsing in acpi_nfit_ctl for the clear error command. This tests for a crash condition introduced by commit 4b27db7e26cd "acpi, nfit: add support for the _LSI, _LSR, and _LSW label methods". Cc: Vishal Verma Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- tools/testing/nvdimm/test/nfit.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index bef419d4266d..3ad0b3a3317b 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1589,6 +1589,7 @@ static int nfit_ctl_test(struct device *dev) unsigned long mask, cmd_size, offset; union { struct nd_cmd_get_config_size cfg_size; + struct nd_cmd_clear_error clear_err; struct nd_cmd_ars_status ars_stat; struct nd_cmd_ars_cap ars_cap; char buf[sizeof(struct nd_cmd_ars_status) @@ -1767,6 +1768,23 @@ static int nfit_ctl_test(struct device *dev) return -EIO; } + /* test clear error */ + cmd_size = sizeof(cmds.clear_err); + cmds.clear_err = (struct nd_cmd_clear_error) { + .length = 512, + .cleared = 512, + }; + rc = setup_result(cmds.buf, cmd_size); + if (rc) + return rc; + rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CLEAR_ERROR, + cmds.buf, cmd_size, &cmd_rc); + if (rc < 0 || cmd_rc) { + dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", + __func__, __LINE__, rc, cmd_rc); + return -EIO; + } + return 0; } From 3b6c6d9cbdea50aed8f6bf22575461d8009e3951 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 18 Jan 2018 17:25:30 -0700 Subject: [PATCH 0054/2608] usbip: vhci_hcd: update 'status' file header and format [ Upstream commit 5468099c747240ed97dbb34340fece8ca87be34f ] Commit 2f2d0088eb93 ("usbip: prevent vhci_hcd driver from leaking a socket pointer address") in the /sys/devices/platform/vhci_hcd/status. Fix the header and field alignment to reflect the changes and make it easier to read. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/usbip/vhci_sysfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c index 4a22a9f06d96..eb7898353457 100644 --- a/drivers/usb/usbip/vhci_sysfs.c +++ b/drivers/usb/usbip/vhci_sysfs.c @@ -34,10 +34,10 @@ /* * output example: - * hub port sta spd dev sockfd local_busid - * hs 0000 004 000 00000000 3 1-2.3 + * hub port sta spd dev sockfd local_busid + * hs 0000 004 000 00000000 000003 1-2.3 * ................................................ - * ss 0008 004 000 00000000 4 2-3.4 + * ss 0008 004 000 00000000 000004 2-3.4 * ................................................ * * Output includes socket fd instead of socket pointer address to avoid @@ -61,13 +61,13 @@ static void port_show_vhci(char **out, int hub, int port, struct vhci_device *vd if (vdev->ud.status == VDEV_ST_USED) { *out += sprintf(*out, "%03u %08x ", vdev->speed, vdev->devid); - *out += sprintf(*out, "%u %s", + *out += sprintf(*out, "%06u %s", vdev->ud.sockfd, dev_name(&vdev->udev->dev)); } else { *out += sprintf(*out, "000 00000000 "); - *out += sprintf(*out, "0000000000000000 0-0"); + *out += sprintf(*out, "000000 0-0"); } *out += sprintf(*out, "\n"); @@ -165,7 +165,7 @@ static ssize_t status_show(struct device *dev, int pdev_nr; out += sprintf(out, - "hub port sta spd dev socket local_busid\n"); + "hub port sta spd dev sockfd local_busid\n"); pdev_nr = status_name_to_id(attr->attr.name); if (pdev_nr < 0) From c750773f3942863a37e7415934a646a0da7b1040 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Nov 2017 14:25:25 +0100 Subject: [PATCH 0055/2608] scsi: aacraid: address UBSAN warning regression [ Upstream commit d18539754d97876503275efc7d00a1901bb0cfad ] As reported by Meelis Roos, my previous patch causes an incorrect calculation of the timeout, through an undefined signed integer overflow: [ 12.228155] UBSAN: Undefined behaviour in drivers/scsi/aacraid/commsup.c:2514:49 [ 12.228229] signed integer overflow: [ 12.228283] 964297611 * 250 cannot be represented in type 'long int' The problem is that doing a multiplication with HZ first and then dividing by USEC_PER_SEC worked correctly for 32-bit microseconds, but not for 32-bit nanoseconds, which would require up to 41 bits. This reworks the calculation to first convert the nanoseconds into jiffies, which should give us the same result as before and not overflow. Unfortunately I did not understand the exact intention of the algorithm, in particular the part where we add half a second, so it's possible that there is still a preexisting problem in this function. I added a comment that this would be handled more nicely using usleep_range(), which generally works better for waking up at a particular time than the current schedule_timeout() based implementation. I did not feel comfortable trying to implement that without being sure what the intent is here though. Fixes: 820f18865912 ("scsi: aacraid: use timespec64 instead of timeval") Tested-by: Meelis Roos Signed-off-by: Arnd Bergmann Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/aacraid/commsup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 998788a967be..3e38bae6ecde 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -2506,8 +2506,8 @@ int aac_command_thread(void *data) /* Synchronize our watches */ if (((NSEC_PER_SEC - (NSEC_PER_SEC / HZ)) > now.tv_nsec) && (now.tv_nsec > (NSEC_PER_SEC / HZ))) - difference = (((NSEC_PER_SEC - now.tv_nsec) * HZ) - + NSEC_PER_SEC / 2) / NSEC_PER_SEC; + difference = HZ + HZ / 2 - + now.tv_nsec / (NSEC_PER_SEC / HZ); else { if (now.tv_nsec > NSEC_PER_SEC / 2) ++now.tv_sec; @@ -2531,6 +2531,10 @@ int aac_command_thread(void *data) if (kthread_should_stop()) break; + /* + * we probably want usleep_range() here instead of the + * jiffies computation + */ schedule_timeout(difference); if (kthread_should_stop()) From 8c954368de6915c4dcbbceca279758ab654bad37 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 21 Dec 2017 17:38:27 +0200 Subject: [PATCH 0056/2608] IB/ipoib: Fix lockdep issue found on ipoib_ib_dev_heavy_flush [ Upstream commit 1f80bd6a6cc8358b81194e1f5fc16449947396ec ] The locking order of vlan_rwsem (LOCK A) and then rtnl (LOCK B), contradicts other flows such as ipoib_open possibly causing a deadlock. To prevent this deadlock heavy flush is called with RTNL locked and only then tries to acquire vlan_rwsem. This deadlock is possible only when there are child interfaces. [ 140.941758] ====================================================== [ 140.946276] WARNING: possible circular locking dependency detected [ 140.950950] 4.15.0-rc1+ #9 Tainted: G O [ 140.954797] ------------------------------------------------------ [ 140.959424] kworker/u32:1/146 is trying to acquire lock: [ 140.963450] (rtnl_mutex){+.+.}, at: [] __ipoib_ib_dev_flush+0x2da/0x4e0 [ib_ipoib] [ 140.970006] but task is already holding lock: [ 140.975141] (&priv->vlan_rwsem){++++}, at: [] __ipoib_ib_dev_flush+0x51/0x4e0 [ib_ipoib] [ 140.982105] which lock already depends on the new lock. [ 140.990023] the existing dependency chain (in reverse order) is: [ 140.998650] -> #1 (&priv->vlan_rwsem){++++}: [ 141.005276] down_read+0x4d/0xb0 [ 141.009560] ipoib_open+0xad/0x120 [ib_ipoib] [ 141.014400] __dev_open+0xcb/0x140 [ 141.017919] __dev_change_flags+0x1a4/0x1e0 [ 141.022133] dev_change_flags+0x23/0x60 [ 141.025695] devinet_ioctl+0x704/0x7d0 [ 141.029156] sock_do_ioctl+0x20/0x50 [ 141.032526] sock_ioctl+0x221/0x300 [ 141.036079] do_vfs_ioctl+0xa6/0x6d0 [ 141.039656] SyS_ioctl+0x74/0x80 [ 141.042811] entry_SYSCALL_64_fastpath+0x1f/0x96 [ 141.046891] -> #0 (rtnl_mutex){+.+.}: [ 141.051701] lock_acquire+0xd4/0x220 [ 141.055212] __mutex_lock+0x88/0x970 [ 141.058631] __ipoib_ib_dev_flush+0x2da/0x4e0 [ib_ipoib] [ 141.063160] __ipoib_ib_dev_flush+0x71/0x4e0 [ib_ipoib] [ 141.067648] process_one_work+0x1f5/0x610 [ 141.071429] worker_thread+0x4a/0x3f0 [ 141.074890] kthread+0x141/0x180 [ 141.078085] ret_from_fork+0x24/0x30 [ 141.081559] other info that might help us debug this: [ 141.088967] Possible unsafe locking scenario: [ 141.094280] CPU0 CPU1 [ 141.097953] ---- ---- [ 141.101640] lock(&priv->vlan_rwsem); [ 141.104771] lock(rtnl_mutex); [ 141.109207] lock(&priv->vlan_rwsem); [ 141.114032] lock(rtnl_mutex); [ 141.116800] *** DEADLOCK *** Fixes: b4b678b06f6e ("IB/ipoib: Grab rtnl lock on heavy flush when calling ndo_open/stop") Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index c97384c914a4..d77e8e2ae05f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1203,13 +1203,10 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_ib_dev_down(dev); if (level == IPOIB_FLUSH_HEAVY) { - rtnl_lock(); if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) ipoib_ib_dev_stop(dev); - result = ipoib_ib_dev_open(dev); - rtnl_unlock(); - if (result) + if (ipoib_ib_dev_open(dev)) return; if (netif_queue_stopped(dev)) @@ -1249,7 +1246,9 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_heavy); + rtnl_lock(); __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0); + rtnl_unlock(); } void ipoib_ib_dev_cleanup(struct net_device *dev) From 18addd960fe854df99727139040535e72addc6ea Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Mon, 9 Oct 2017 09:11:32 -0400 Subject: [PATCH 0057/2608] IB/rxe: put the pool on allocation failure [ Upstream commit 6b9f8970cd30929cb6b372fa44fa66da9e59c650 ] If the allocation of elem fails, it is not sufficient to simply check for NULL and return. We need to also put our reference on the pool or else we will leave the pool with a permanent ref count and we will never be able to free it. Fixes: 4831ca9e4a8e ("IB/rxe: check for allocation failure on elem") Suggested-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_pool.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 3b4916680018..b4a8acc7bb7d 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -394,23 +394,25 @@ void *rxe_alloc(struct rxe_pool *pool) kref_get(&pool->rxe->ref_cnt); - if (atomic_inc_return(&pool->num_elem) > pool->max_elem) { - atomic_dec(&pool->num_elem); - rxe_dev_put(pool->rxe); - rxe_pool_put(pool); - return NULL; - } + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) + goto out_put_pool; elem = kmem_cache_zalloc(pool_cache(pool), (pool->flags & RXE_POOL_ATOMIC) ? GFP_ATOMIC : GFP_KERNEL); if (!elem) - return NULL; + goto out_put_pool; elem->pool = pool; kref_init(&elem->ref_cnt); return elem; + +out_put_pool: + atomic_dec(&pool->num_elem); + rxe_dev_put(pool->rxe); + rxe_pool_put(pool); + return NULL; } void rxe_elem_release(struct kref *kref) From 049fee28bba39dcf1bcec5fa56fa2a8eefd2e853 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 19 Apr 2018 12:52:06 +0200 Subject: [PATCH 0058/2608] s390/qeth: fix error handling in adapter command callbacks [ Upstream commit 686c97ee29c886ee07d17987d0059874c5c3b5af ] Make sure to check both return code fields before(!) processing the command response. Otherwise we risk operating on invalid data. This matches an earlier fix for SETASSPARMS commands, see commit ad3cbf613329 ("s390/qeth: fix error handling in checksum cmd callback"). Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/s390/net/qeth_core_main.c | 85 ++++++++++++++----------------- 1 file changed, 37 insertions(+), 48 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 4f2747cd15a6..169dd7127f9e 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3001,28 +3001,23 @@ static int qeth_send_startlan(struct qeth_card *card) return rc; } -static int qeth_default_setadapterparms_cb(struct qeth_card *card, - struct qeth_reply *reply, unsigned long data) +static int qeth_setadpparms_inspect_rc(struct qeth_ipa_cmd *cmd) { - struct qeth_ipa_cmd *cmd; - - QETH_CARD_TEXT(card, 4, "defadpcb"); - - cmd = (struct qeth_ipa_cmd *) data; - if (cmd->hdr.return_code == 0) + if (!cmd->hdr.return_code) cmd->hdr.return_code = cmd->data.setadapterparms.hdr.return_code; - return 0; + return cmd->hdr.return_code; } static int qeth_query_setadapterparms_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; QETH_CARD_TEXT(card, 3, "quyadpcb"); + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; - cmd = (struct qeth_ipa_cmd *) data; if (cmd->data.setadapterparms.data.query_cmds_supp.lan_type & 0x7f) { card->info.link_type = cmd->data.setadapterparms.data.query_cmds_supp.lan_type; @@ -3030,7 +3025,7 @@ static int qeth_query_setadapterparms_cb(struct qeth_card *card, } card->options.adp.supported_funcs = cmd->data.setadapterparms.data.query_cmds_supp.supported_cmds; - return qeth_default_setadapterparms_cb(card, reply, (unsigned long)cmd); + return 0; } static struct qeth_cmd_buffer *qeth_get_adapter_cmd(struct qeth_card *card, @@ -3122,22 +3117,20 @@ EXPORT_SYMBOL_GPL(qeth_query_ipassists); static int qeth_query_switch_attributes_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; - struct qeth_switch_info *sw_info; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct qeth_query_switch_attributes *attrs; + struct qeth_switch_info *sw_info; QETH_CARD_TEXT(card, 2, "qswiatcb"); - cmd = (struct qeth_ipa_cmd *) data; - sw_info = (struct qeth_switch_info *)reply->param; - if (cmd->data.setadapterparms.hdr.return_code == 0) { - attrs = &cmd->data.setadapterparms.data.query_switch_attributes; - sw_info->capabilities = attrs->capabilities; - sw_info->settings = attrs->settings; - QETH_CARD_TEXT_(card, 2, "%04x%04x", sw_info->capabilities, - sw_info->settings); - } - qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd); + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; + sw_info = (struct qeth_switch_info *)reply->param; + attrs = &cmd->data.setadapterparms.data.query_switch_attributes; + sw_info->capabilities = attrs->capabilities; + sw_info->settings = attrs->settings; + QETH_CARD_TEXT_(card, 2, "%04x%04x", sw_info->capabilities, + sw_info->settings); return 0; } @@ -4188,16 +4181,13 @@ EXPORT_SYMBOL_GPL(qeth_do_send_packet); static int qeth_setadp_promisc_mode_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct qeth_ipacmd_setadpparms *setparms; QETH_CARD_TEXT(card, 4, "prmadpcb"); - cmd = (struct qeth_ipa_cmd *) data; setparms = &(cmd->data.setadapterparms); - - qeth_default_setadapterparms_cb(card, reply, (unsigned long)cmd); - if (cmd->hdr.return_code) { + if (qeth_setadpparms_inspect_rc(cmd)) { QETH_CARD_TEXT_(card, 4, "prmrc%x", cmd->hdr.return_code); setparms->data.mode = SET_PROMISC_MODE_OFF; } @@ -4267,11 +4257,12 @@ EXPORT_SYMBOL_GPL(qeth_get_stats); static int qeth_setadpparms_change_macaddr_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; QETH_CARD_TEXT(card, 4, "chgmaccb"); + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; - cmd = (struct qeth_ipa_cmd *) data; if (!card->options.layer2 || !(card->info.mac_bits & QETH_LAYER2_MAC_READ)) { memcpy(card->dev->dev_addr, @@ -4279,7 +4270,6 @@ static int qeth_setadpparms_change_macaddr_cb(struct qeth_card *card, OSA_ADDR_LEN); card->info.mac_bits |= QETH_LAYER2_MAC_READ; } - qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd); return 0; } @@ -4310,13 +4300,15 @@ EXPORT_SYMBOL_GPL(qeth_setadpparms_change_macaddr); static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct qeth_set_access_ctrl *access_ctrl_req; int fallback = *(int *)reply->param; QETH_CARD_TEXT(card, 4, "setaccb"); + if (cmd->hdr.return_code) + return 0; + qeth_setadpparms_inspect_rc(cmd); - cmd = (struct qeth_ipa_cmd *) data; access_ctrl_req = &cmd->data.setadapterparms.data.set_access_ctrl; QETH_DBF_TEXT_(SETUP, 2, "setaccb"); QETH_DBF_TEXT_(SETUP, 2, "%s", card->gdev->dev.kobj.name); @@ -4389,7 +4381,6 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, card->options.isolation = card->options.prev_isolation; break; } - qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd); return 0; } @@ -4677,14 +4668,15 @@ out: static int qeth_setadpparms_query_oat_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *)data; struct qeth_qoat_priv *priv; char *resdata; int resdatalen; QETH_CARD_TEXT(card, 3, "qoatcb"); + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; - cmd = (struct qeth_ipa_cmd *)data; priv = (struct qeth_qoat_priv *)reply->param; resdatalen = cmd->data.setadapterparms.hdr.cmdlength; resdata = (char *)data + 28; @@ -4778,21 +4770,18 @@ out: static int qeth_query_card_info_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct carrier_info *carrier_info = (struct carrier_info *)reply->param; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *)data; struct qeth_query_card_info *card_info; - struct carrier_info *carrier_info; QETH_CARD_TEXT(card, 2, "qcrdincb"); - carrier_info = (struct carrier_info *)reply->param; - cmd = (struct qeth_ipa_cmd *)data; - card_info = &cmd->data.setadapterparms.data.card_info; - if (cmd->data.setadapterparms.hdr.return_code == 0) { - carrier_info->card_type = card_info->card_type; - carrier_info->port_mode = card_info->port_mode; - carrier_info->port_speed = card_info->port_speed; - } + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; - qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd); + card_info = &cmd->data.setadapterparms.data.card_info; + carrier_info->card_type = card_info->card_type; + carrier_info->port_mode = card_info->port_mode; + carrier_info->port_speed = card_info->port_speed; return 0; } From 71a9d1240a30504af52fdd48995a3c9ef2cc467c Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Thu, 12 Apr 2018 09:49:11 +0000 Subject: [PATCH 0059/2608] net/mlx5: Fix mlx5_get_vector_affinity function [ Upstream commit 6082d9c9c94a408d7409b5f2e4e42ac9e8b16d0d ] Adding the vector offset when calling to mlx5_vector2eqn() is wrong. This is because mlx5_vector2eqn() checks if EQ index is equal to vector number and the fact that the internal completion vectors that mlx5 allocates don't get an EQ index. The second problem here is that using effective_affinity_mask gives the same CPU for different vectors. This leads to unmapped queues when calling it from blk_mq_rdma_map_queues(). This doesn't happen when using affinity_hint mask. Fixes: 2572cf57d75a ("mlx5: fix mlx5_get_vector_affinity to start from completion vector 0") Fixes: 05e0cc84e00c ("net/mlx5: Fix get vector affinity helper function") Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/main.c | 2 +- include/linux/mlx5/driver.h | 12 +++--------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ab70194a73db..c3a4f5d92391 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3911,7 +3911,7 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - return mlx5_get_vector_affinity(dev->mdev, comp_vector); + return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector); } static void *mlx5_ib_add(struct mlx5_core_dev *mdev) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c4d19e77fea8..5eff332092bc 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1193,25 +1193,19 @@ enum { }; static inline const struct cpumask * -mlx5_get_vector_affinity(struct mlx5_core_dev *dev, int vector) +mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector) { - const struct cpumask *mask; struct irq_desc *desc; unsigned int irq; int eqn; int err; - err = mlx5_vector2eqn(dev, MLX5_EQ_VEC_COMP_BASE + vector, &eqn, &irq); + err = mlx5_vector2eqn(dev, vector, &eqn, &irq); if (err) return NULL; desc = irq_to_desc(irq); -#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK - mask = irq_data_get_effective_affinity_mask(&desc->irq_data); -#else - mask = desc->irq_common_data.affinity; -#endif - return mask; + return desc->affinity_hint; } #endif /* MLX5_DRIVER_H */ From aa2a0c23c9b7e6fb17db4c77cbe0967512f5d274 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 14 Feb 2018 12:17:47 +0000 Subject: [PATCH 0060/2608] powerpc/pseries: Add empty update_numa_cpu_lookup_table() for NUMA=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c1e150ceb61e4a585bad156da15c33bfe89f5858 ] When CONFIG_NUMA is not set, the build fails with: arch/powerpc/platforms/pseries/hotplug-cpu.c:335:4: error: déclaration implicite de la fonction « update_numa_cpu_lookup_table » So we have to add update_numa_cpu_lookup_table() as an empty function when CONFIG_NUMA is not set. Fixes: 1d9a090783be ("powerpc/numa: Invalidate numa_cpu_lookup_table on cpu remove") Signed-off-by: Corentin Labbe Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/topology.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index d5f2ee882f74..66c72b356ac0 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -81,6 +81,9 @@ static inline int numa_update_cpu_topology(bool cpus_locked) { return 0; } + +static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {} + #endif /* CONFIG_NUMA */ #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) From 4fbd26a1484c5819208b87e54d35515fd678a140 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Tue, 13 Feb 2018 14:50:50 +0100 Subject: [PATCH 0061/2608] dm integrity: fail early if required HMAC key is not available [ Upstream commit e16b4f99f0f79682a7efe191a8ce694d87ca9fc8 ] Since crypto API commit 9fa68f62004 ("crypto: hash - prevent using keyed hashes without setting key") dm-integrity cannot use keyed algorithms without the key being set. The dm-integrity recognizes this too late (during use of HMAC), so it allows creation and formatting of superblock, but the device is in fact unusable. Fix it by detecting the key requirement in integrity table constructor. Signed-off-by: Milan Broz Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-integrity.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 898286ed47a1..b10e4c5641ea 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2547,6 +2547,9 @@ static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error, *error = error_key; return r; } + } else if (crypto_shash_get_flags(*hash) & CRYPTO_TFM_NEED_KEY) { + *error = error_key; + return -ENOKEY; } } From 8d9d7f018464129de99ba5c63e00866a3a88750c Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 20 Mar 2018 09:44:53 +0800 Subject: [PATCH 0062/2608] net: phy: realtek: Use the dummy stubs for MMD register access for rtl8211b [ Upstream commit 0231b1a074c672f8c00da00a57144072890d816b ] The Ethernet on mpc8315erdb is broken since commit b6b5e8a69118 ("gianfar: Disable EEE autoneg by default"). The reason is that even though the rtl8211b doesn't support the MMD extended registers access, it does return some random values if we trying to access the MMD register via indirect method. This makes it seem that the EEE is supported by this phy device. And the subsequent writing to the MMD registers does cause the phy malfunction. So use the dummy stubs for the MMD register access to fix this issue. Fixes: b6b5e8a69118 ("gianfar: Disable EEE autoneg by default") Signed-off-by: Kevin Hao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 9cbe645e3d89..7d38af5ed4b5 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -138,6 +138,8 @@ static struct phy_driver realtek_drvs[] = { .read_status = &genphy_read_status, .ack_interrupt = &rtl821x_ack_interrupt, .config_intr = &rtl8211b_config_intr, + .read_mmd = &genphy_read_mmd_unsupported, + .write_mmd = &genphy_write_mmd_unsupported, }, { .phy_id = 0x001cc914, .name = "RTL8211DN Gigabit Ethernet", From 8b882dbba71768ff1d9a70e85488a9b5210f7b8f Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 20 Mar 2018 09:44:52 +0800 Subject: [PATCH 0063/2608] net: phy: Add general dummy stubs for MMD register access [ Upstream commit 5df7af85ecd88e8b5f1f31d6456c3cf38a8bbdda ] For some phy devices, even though they don't support the MMD extended register access, it does have some side effect if we are trying to read/write the MMD registers via indirect method. So introduce general dummy stubs for MMD register access which these devices can use to avoid such side effect. Fixes: b6b5e8a69118 ("gianfar: Disable EEE autoneg by default") Signed-off-by: Kevin Hao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/phy_device.c | 17 +++++++++++++++++ include/linux/phy.h | 4 ++++ 2 files changed, 21 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index a174d05a9752..fe76e2c4022a 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1641,6 +1641,23 @@ int genphy_config_init(struct phy_device *phydev) } EXPORT_SYMBOL(genphy_config_init); +/* This is used for the phy device which doesn't support the MMD extended + * register access, but it does have side effect when we are trying to access + * the MMD register via indirect method. + */ +int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad, u16 regnum) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(genphy_read_mmd_unsupported); + +int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, + u16 regnum, u16 val) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(genphy_write_mmd_unsupported); + int genphy_suspend(struct phy_device *phydev) { int value; diff --git a/include/linux/phy.h b/include/linux/phy.h index dca9e926b88f..efc04c2d92c9 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -879,6 +879,10 @@ static inline int genphy_no_soft_reset(struct phy_device *phydev) { return 0; } +int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad, + u16 regnum); +int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, + u16 regnum, u16 val); /* Clause 45 PHY */ int genphy_c45_restart_aneg(struct phy_device *phydev); From 651f11a83af35ea3c57d53dcb8a4a6640bb3fb5d Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Wed, 27 Jun 2018 15:43:07 +0300 Subject: [PATCH 0064/2608] net/mlx5e: Refine ets validation function [ Upstream commit e279d634f3d57452eb106a0c0e99a6add3fba1a6 ] Removed an error message received when configuring ETS total bandwidth to be zero. Our hardware doesn't support such configuration, so we shall reject it in the driver. Nevertheless, we removed the error message in order to eliminate error messages caused by old userspace tools who try to pass such configuration. Fixes: ff0891915cd7 ("net/mlx5e: Fix ETS BW check") Signed-off-by: Shay Agroskin Reviewed-by: Huy Nguyen Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 9d64d0759ee9..a5dd99aaf321 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -257,7 +257,8 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) } static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, - struct ieee_ets *ets) + struct ieee_ets *ets, + bool zero_sum_allowed) { bool have_ets_tc = false; int bw_sum = 0; @@ -282,8 +283,9 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, } if (have_ets_tc && bw_sum != 100) { - netdev_err(netdev, - "Failed to validate ETS: BW sum is illegal\n"); + if (bw_sum || (!bw_sum && !zero_sum_allowed)) + netdev_err(netdev, + "Failed to validate ETS: BW sum is illegal\n"); return -EINVAL; } return 0; @@ -298,7 +300,7 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev, if (!MLX5_CAP_GEN(priv->mdev, ets)) return -EOPNOTSUPP; - err = mlx5e_dbcnl_validate_ets(netdev, ets); + err = mlx5e_dbcnl_validate_ets(netdev, ets, false); if (err) return err; @@ -477,12 +479,9 @@ static u8 mlx5e_dcbnl_setall(struct net_device *netdev) ets.prio_tc[i] = cee_cfg->prio_to_pg_map[i]; } - err = mlx5e_dbcnl_validate_ets(netdev, &ets); - if (err) { - netdev_err(netdev, - "%s, Failed to validate ETS: %d\n", __func__, err); + err = mlx5e_dbcnl_validate_ets(netdev, &ets, true); + if (err) goto out; - } err = mlx5e_dcbnl_ieee_setets_core(priv, &ets); if (err) { From a972222a6a3004df0ba6295d9ea14617dff0964a Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 20 Mar 2018 21:05:48 +0000 Subject: [PATCH 0065/2608] scsi: qla2xxx: Avoid double completion of abort command [ Upstream commit 3a9910d7b686546dcc9986e790af17e148f1c888 ] qla2x00_tmf_sp_done() now deletes the timer that will run qla2x00_tmf_iocb_timeout(), but doesn't check whether the timer already expired. Check the return value from del_timer() to avoid calling complete() a second time. Fixes: 4440e46d5db7 ("[SCSI] qla2xxx: Add IOCB Abort command asynchronous ...") Fixes: 1514839b3664 ("scsi: qla2xxx: Fix NULL pointer crash due to active ...") Signed-off-by: Ben Hutchings Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 1d42d38f5a45..0e19f6bc24ff 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1365,8 +1365,8 @@ qla24xx_abort_sp_done(void *ptr, int res) srb_t *sp = ptr; struct srb_iocb *abt = &sp->u.iocb_cmd; - del_timer(&sp->u.iocb_cmd.timer); - complete(&abt->u.abt.comp); + if (del_timer(&sp->u.iocb_cmd.timer)) + complete(&abt->u.abt.comp); } int From 6cfb67394a42072981c2884114b3850a3cdd5b2b Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 19 Mar 2018 22:12:53 +0100 Subject: [PATCH 0066/2608] kbuild: set no-integrated-as before incl. arch Makefile [ Upstream commit 0f0e8de334c54c38818a4a5390a39aa09deff5bf ] In order to make sure compiler flag detection for ARM works correctly the no-integrated-as flags need to be set before including the arch specific Makefile. Fixes: cfe17c9bbe6a ("kbuild: move cc-option and cc-disable-warning after incl. arch Makefile") Signed-off-by: Stefan Agner Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 89574ee68d6b..7f6579d53911 100644 --- a/Makefile +++ b/Makefile @@ -487,6 +487,8 @@ CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) endif KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) +KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) +KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) endif RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register @@ -721,8 +723,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) # See modpost pattern 2 KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,) KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior) -KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) -KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) else # These warnings generated too much noise in a regular build. From 579493b9f689910b49c071d0acdec43c4f447d65 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sun, 25 Feb 2018 13:39:51 +0200 Subject: [PATCH 0067/2608] IB/mlx5: Avoid passing an invalid QP type to firmware [ Upstream commit e7b169f34403becd3c9fd3b6e46614ab788f2187 ] During QP creation, the mlx5 driver translates the QP type to an internal value which is passed on to FW. There was no check to make sure that the translated value is valid, and -EINVAL was coerced into the mailbox command. Current firmware refuses this as an invalid QP type, but future/past firmware may do something else. Fixes: 09a7d9eca1a6c ('{net,IB}/mlx5: QP/XRCD commands via mlx5 ifc') Reviewed-by: Ilya Lesokhin Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/qp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index ef9ee6c328a1..dfc190055167 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1527,6 +1527,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 uidx = MLX5_IB_DEFAULT_UIDX; struct mlx5_ib_create_qp ucmd; struct mlx5_ib_qp_base *base; + int mlx5_st; void *qpc; u32 *in; int err; @@ -1535,6 +1536,10 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); + mlx5_st = to_mlx5_st(init_attr->qp_type); + if (mlx5_st < 0) + return -EINVAL; + if (init_attr->rwq_ind_tbl) { if (!udata) return -ENOSYS; @@ -1688,7 +1693,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); - MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type)); + MLX5_SET(qpc, qpc, st, mlx5_st); MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR) From b39ac54215190bc178ae7de799e74d327a3c1a33 Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Thu, 22 Feb 2018 15:38:25 +0100 Subject: [PATCH 0068/2608] ARM: tegra: Fix ULPI regression on Tegra20 [ Upstream commit 4c9a27a6c66d4427f3cba4019d4ba738fe99fa87 ] Since commit f8f8f1d04494 ("clk: Don't touch hardware when reparenting during registration") ULPI has been broken on Tegra20 leading to the following error message during boot: [ 1.974698] ulpi_phy_power_on: ulpi write failed [ 1.979384] tegra-ehci c5004000.usb: Failed to power on the phy [ 1.985434] tegra-ehci: probe of c5004000.usb failed with error -110 Debugging through the changes and finally also consulting the TRM revealed that rather than the CDEV2 clock off OSC requiring such pin muxing actually the PLL_P_OUT4 clock is in use. It looks like so far it just worked by chance of that one having been enabled which Stephen's commit now changed when reparenting sclk away from pll_p_out4 leaving that one disabled. Fix this by properly assigning the PLL_P_OUT4 clock as the ULPI PHY clock. Signed-off-by: Marcel Ziswiler Reviewed-by: Dmitry Osipenko Reviewed-by: Rob Herring Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- arch/arm/boot/dts/tegra20.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 914f59166a99..2780e68a853b 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -706,7 +706,7 @@ phy_type = "ulpi"; clocks = <&tegra_car TEGRA20_CLK_USB2>, <&tegra_car TEGRA20_CLK_PLL_U>, - <&tegra_car TEGRA20_CLK_CDEV2>; + <&tegra_car TEGRA20_CLK_PLL_P_OUT4>; clock-names = "reg", "pll_u", "ulpi-link"; resets = <&tegra_car 58>, <&tegra_car 22>; reset-names = "usb", "utmi-pads"; From 5c097f55e980d5578bec4e5d99759ead834a6358 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Wed, 3 Jan 2018 22:48:06 +0000 Subject: [PATCH 0069/2608] l2tp: remove configurable payload offset [ Upstream commit 900631ee6a2651dc4fbaecb8ef9fa5f1e3378853 ] If L2TP_ATTR_OFFSET is set to a non-zero value in L2TPv3 tunnels, it results in L2TPv3 packets being transmitted which might not be compliant with the L2TPv3 RFC. This patch has l2tp ignore the offset setting and send all packets with no offset. In more detail: L2TPv2 supports a variable offset from the L2TPv2 header to the payload. The offset value is indicated by an optional field in the L2TP header. Our L2TP implementation already detects the presence of the optional offset and skips that many bytes when handling data received packets. All transmitted packets are always transmitted with no offset. L2TPv3 has no optional offset field in the L2TPv3 packet header. Instead, L2TPv3 defines optional fields in a "Layer-2 Specific Sublayer". At the time when the original L2TP code was written, there was talk at IETF of offset being implemented in a new Layer-2 Specific Sublayer. A L2TP_ATTR_OFFSET netlink attribute was added so that this offset could be configured and the intention was to allow it to be also used to set the tx offset for L2TPv2. However, no L2TPv3 offset was ever specified and the L2TP_ATTR_OFFSET parameter was forgotten about. Setting L2TP_ATTR_OFFSET results in L2TPv3 packets being transmitted with the specified number of bytes padding between L2TPv3 header and payload. This is not compliant with L2TPv3 RFC3931. This change removes the configurable offset altogether while retaining L2TP_ATTR_OFFSET for backwards compatibility. Any L2TP_ATTR_OFFSET value is ignored. Signed-off-by: James Chapman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/l2tp/l2tp_core.c | 14 ++++---------- net/l2tp/l2tp_core.h | 3 --- net/l2tp/l2tp_debugfs.c | 4 ++-- net/l2tp/l2tp_netlink.c | 3 --- 4 files changed, 6 insertions(+), 18 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 5c87f1d3e525..33ea389ee015 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -808,10 +808,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, } } - /* Session data offset is handled differently for L2TPv2 and - * L2TPv3. For L2TPv2, there is an optional 16-bit value in - * the header. For L2TPv3, the offset is negotiated using AVPs - * in the session setup control protocol. + /* Session data offset is defined only for L2TPv2 and is + * indicated by an optional 16-bit value in the header. */ if (tunnel->version == L2TP_HDR_VER_2) { /* If offset bit set, skip it. */ @@ -819,8 +817,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, offset = ntohs(*(__be16 *)ptr); ptr += 2 + offset; } - } else - ptr += session->offset; + } offset = ptr - optr; if (!pskb_may_pull(skb, offset)) @@ -1104,8 +1101,6 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf) } bufp += session->l2specific_len; } - if (session->offset) - bufp += session->offset; return bufp - optr; } @@ -1779,7 +1774,7 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version) if (session->send_seq) session->hdr_len += 4; } else { - session->hdr_len = 4 + session->cookie_len + session->l2specific_len + session->offset; + session->hdr_len = 4 + session->cookie_len + session->l2specific_len; if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP) session->hdr_len += 4; } @@ -1830,7 +1825,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn session->recv_seq = cfg->recv_seq; session->lns_mode = cfg->lns_mode; session->reorder_timeout = cfg->reorder_timeout; - session->offset = cfg->offset; session->l2specific_type = cfg->l2specific_type; session->l2specific_len = cfg->l2specific_len; session->cookie_len = cfg->cookie_len; diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 9e2f1fda1b03..0a58c0754526 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -59,7 +59,6 @@ struct l2tp_session_cfg { int debug; /* bitmask of debug message * categories */ u16 vlan_id; /* VLAN pseudowire only */ - u16 offset; /* offset to payload */ u16 l2specific_len; /* Layer 2 specific length */ u16 l2specific_type; /* Layer 2 specific type */ u8 cookie[8]; /* optional cookie */ @@ -86,8 +85,6 @@ struct l2tp_session { int cookie_len; u8 peer_cookie[8]; int peer_cookie_len; - u16 offset; /* offset from end of L2TP header - to beginning of data */ u16 l2specific_len; u16 l2specific_type; u16 hdr_len; diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 53bae54c4d6e..534cad03b9e9 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -180,8 +180,8 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v) session->lns_mode ? "LNS" : "LAC", session->debug, jiffies_to_msecs(session->reorder_timeout)); - seq_printf(m, " offset %hu l2specific %hu/%hu\n", - session->offset, session->l2specific_type, session->l2specific_len); + seq_printf(m, " offset 0 l2specific %hu/%hu\n", + session->l2specific_type, session->l2specific_len); if (session->cookie_len) { seq_printf(m, " cookie %02x%02x%02x%02x", session->cookie[0], session->cookie[1], diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index c28223d8092b..001797ce4084 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -549,9 +549,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf } if (tunnel->version > 2) { - if (info->attrs[L2TP_ATTR_OFFSET]) - cfg.offset = nla_get_u16(info->attrs[L2TP_ATTR_OFFSET]); - if (info->attrs[L2TP_ATTR_DATA_SEQ]) cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]); From 8f041940aaf2674dcccd15753d68ff885f90d5e7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 5 Apr 2018 14:57:11 +0200 Subject: [PATCH 0070/2608] cifs: Use ULL suffix for 64-bit constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3995bbf53bd2047f2720c6fdd4bf38f6d942a0c0 ] On 32-bit (e.g. with m68k-linux-gnu-gcc-4.1): fs/cifs/inode.c: In function ‘simple_hashstr’: fs/cifs/inode.c:713: warning: integer constant is too large for ‘long’ type Fixes: 7ea884c77e5c97f1 ("smb3: Fix root directory when server returns inode number of zero") Signed-off-by: Geert Uytterhoeven Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Signed-off-by: Sasha Levin --- fs/cifs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 2cd0b3053439..d01cbca84701 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -712,7 +712,7 @@ cgfi_exit: /* Simple function to return a 64 bit hash of string. Rarely called */ static __u64 simple_hashstr(const char *str) { - const __u64 hash_mult = 1125899906842597L; /* a big enough prime */ + const __u64 hash_mult = 1125899906842597ULL; /* a big enough prime */ __u64 hash = 0; while (*str) From 7d768c84ec311468d7fd61250700ef8b21f8cf6d Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 20 Mar 2018 09:58:51 -0300 Subject: [PATCH 0071/2608] test_bpf: Fix testing with CONFIG_BPF_JIT_ALWAYS_ON=y on other arches [ Upstream commit 52fda36d63bfc8c8e8ae5eda8eb5ac6f52cd67ed ] Function bpf_fill_maxinsns11 is designed to not be able to be JITed on x86_64. So, it fails when CONFIG_BPF_JIT_ALWAYS_ON=y, and commit 09584b406742 ("bpf: fix selftests/bpf test_kmod.sh failure when CONFIG_BPF_JIT_ALWAYS_ON=y") makes sure that failure is detected on that case. However, it does not fail on other architectures, which have a different JIT compiler design. So, test_bpf has started to fail to load on those. After this fix, test_bpf loads fine on both x86_64 and ppc64el. Fixes: 09584b406742 ("bpf: fix selftests/bpf test_kmod.sh failure when CONFIG_BPF_JIT_ALWAYS_ON=y") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Yonghong Song Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- lib/test_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 64701b4c9900..75ebf2bbc2ee 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5427,7 +5427,7 @@ static struct bpf_test tests[] = { { "BPF_MAXINSNS: Jump, gap, jump, ...", { }, -#ifdef CONFIG_BPF_JIT_ALWAYS_ON +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_X86) CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, #else CLASSIC | FLAG_NO_DATA, From 090246ff5f6c38b0219564d0d46ae52cac22e243 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Wed, 28 Feb 2018 19:06:48 +0100 Subject: [PATCH 0072/2608] KVM: x86: Update the exit_qualification access bits while walking an address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ddd6f0e94d3153951580d5b88b9d97c7e26a0e00 ] ... to avoid having a stale value when handling an EPT misconfig for MMIO regions. MMIO regions that are not passed-through to the guest are handled through EPT misconfigs. The first time a certain MMIO page is touched it causes an EPT violation, then KVM marks the EPT entry to cause an EPT misconfig instead. Any subsequent accesses to the entry will generate an EPT misconfig. Things gets slightly complicated with nested guest handling for MMIO regions that are not passed through from L0 (i.e. emulated by L0 user-space). An EPT violation for one of these MMIO regions from L2, exits to L0 hypervisor. L0 would then look at the EPT12 mapping for L1 hypervisor and realize it is not present (or not sufficient to serve the request). Then L0 injects an EPT violation to L1. L1 would then update its EPT mappings. The EXIT_QUALIFICATION value for L1 would come from exit_qualification variable in "struct vcpu". The problem is that this variable is only updated on EPT violation and not on EPT misconfig. So if an EPT violation because of a read happened first, then an EPT misconfig because of a write happened afterwards. The L0 hypervisor will still contain exit_qualification value from the previous read instead of the write and end up injecting an EPT violation to the L1 hypervisor with an out of date EXIT_QUALIFICATION. The EPT violation that is injected from L0 to L1 needs to have the correct EXIT_QUALIFICATION specially for the access bits because the individual access bits for MMIO EPTs are updated only on actual access of this specific type. So for the example above, the L1 hypervisor will keep updating only the read bit in the EPT then resume the L2 guest. The L2 guest would end up causing another exit where the L0 *again* will inject another EPT violation to L1 hypervisor with *again* an out of date exit_qualification which indicates a read and not a write. Then this ping-pong just keeps happening without making any forward progress. The behavior of mapping MMIO regions changed in: commit a340b3e229b24 ("kvm: Map PFN-type memory regions as writable (if possible)") ... where an EPT violation for a read would also fixup the write bits to avoid another EPT violation which by acciddent would fix the bug mentioned above. This commit fixes this situation and ensures that the access bits for the exit_qualifcation is up to date. That ensures that even L1 hypervisor running with a KVM version before the commit mentioned above would still work. ( The description above assumes EPT to be available and used by L1 hypervisor + the L1 hypervisor is passing through the MMIO region to the L2 guest while this MMIO region is emulated by the L0 user-space ). Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: x86@kernel.org Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: KarimAllah Ahmed Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin --- arch/x86/kvm/paging_tmpl.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 5abae72266b7..6288e9d7068e 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -452,14 +452,21 @@ error: * done by is_rsvd_bits_set() above. * * We set up the value of exit_qualification to inject: - * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation + * [2:0] - Derive from the access bits. The exit_qualification might be + * out of date if it is serving an EPT misconfiguration. * [5:3] - Calculated by the page walk of the guest EPT page tables * [7:8] - Derived from [7:8] of real exit_qualification * * The other bits are set to 0. */ if (!(errcode & PFERR_RSVD_MASK)) { - vcpu->arch.exit_qualification &= 0x187; + vcpu->arch.exit_qualification &= 0x180; + if (write_fault) + vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_WRITE; + if (user_fault) + vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_READ; + if (fetch_fault) + vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_INSTR; vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3; } #endif From c1fdafea627c843099518ebb5ec3fcc9396ae4a8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 15 Mar 2018 14:18:00 -0700 Subject: [PATCH 0073/2608] sparc64: Fix regression in pmdp_invalidate(). [ Upstream commit cfb61b5e3e09f8b49bc4d685429df75f45127adc ] pmdp_invalidate() was changed to update the pmd atomically (to not lose dirty/access bits) and return the original pmd value. However, in doing so, we lost a lot of the essential work that set_pmd_at() does, namely to update hugepage mapping counts and queuing up the batched TLB flush entry. Thus we were not flushing entries out of the TLB when making such PMD changes. Fix this by abstracting the accounting work of set_pmd_at() out into a separate function, and call it from pmdp_establish(). Fixes: a8e654f01cb7 ("sparc64: update pmdp_invalidate() to return old pmd value") Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- arch/sparc/mm/tlb.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 847ddffbf38a..b5cfab711651 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -163,13 +163,10 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr, pte_unmap(pte); } -void set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmd) + +static void __set_pmd_acct(struct mm_struct *mm, unsigned long addr, + pmd_t orig, pmd_t pmd) { - pmd_t orig = *pmdp; - - *pmdp = pmd; - if (mm == &init_mm) return; @@ -219,6 +216,15 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, } } +void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + pmd_t orig = *pmdp; + + *pmdp = pmd; + __set_pmd_acct(mm, addr, orig, pmd); +} + static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { @@ -227,6 +233,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, do { old = *pmdp; } while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd); + __set_pmd_acct(vma->vm_mm, address, old, pmd); return old; } From db9d15988afb923b6e559c8aa3ef56d596f74524 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Mon, 2 Apr 2018 21:50:06 +0530 Subject: [PATCH 0074/2608] tpm: move the delay_msec increment after sleep in tpm_transmit() [ Upstream commit 92980756979a9c51be0275f395f4e89c42cf199a ] Commit e2fb992d82c6 ("tpm: add retry logic") introduced a new loop to handle the TPM2_RC_RETRY error. The loop retries the command after sleeping for the specified time, which is incremented exponentially in every iteration. Unfortunately, the loop doubles the time before sleeping, causing the initial sleep to be doubled. This patch fixes the initial sleep time. Fixes: commit e2fb992d82c6 ("tpm: add retry logic") Signed-off-by: Nayna Jain Reviewed-by: Mimi Zohar Tested-by: Jarkko Sakkinen Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Sasha Levin --- drivers/char/tpm/tpm-interface.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index a2070ab86c82..89d5915b1a3f 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -611,12 +611,13 @@ ssize_t tpm_transmit(struct tpm_chip *chip, struct tpm_space *space, rc = be32_to_cpu(header->return_code); if (rc != TPM2_RC_RETRY) break; - delay_msec *= 2; + if (delay_msec > TPM2_DURATION_LONG) { dev_err(&chip->dev, "TPM is in retry loop\n"); break; } tpm_msleep(delay_msec); + delay_msec *= 2; memcpy(buf, save, save_size); } return ret; From 3c0cff34e91e4ea1fdf74df65db98ee455190dfa Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 23 Apr 2018 15:39:23 -0700 Subject: [PATCH 0075/2608] bpf: sockmap, map_release does not hold refcnt for pinned maps [ Upstream commit ba6b8de423f8d0dee48d6030288ed81c03ddf9f0 ] Relying on map_release hook to decrement the reference counts when a map is removed only works if the map is not being pinned. In the pinned case the ref is decremented immediately and the BPF programs released. After this BPF programs may not be in-use which is not what the user would expect. This patch moves the release logic into bpf_map_put_uref() and brings sockmap in-line with how a similar case is handled in prog array maps. Fixes: 3d9e952697de ("bpf: sockmap, fix leaking maps with attached but not detached progs") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- include/linux/bpf.h | 2 +- kernel/bpf/arraymap.c | 3 ++- kernel/bpf/sockmap.c | 4 ++-- kernel/bpf/syscall.c | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5c5be80ce802..c9d2a1a3ef11 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -27,6 +27,7 @@ struct bpf_map_ops { void (*map_release)(struct bpf_map *map, struct file *map_file); void (*map_free)(struct bpf_map *map); int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); + void (*map_release_uref)(struct bpf_map *map); /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); @@ -300,7 +301,6 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); -void bpf_fd_array_map_clear(struct bpf_map *map); int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index f57d0bdf3c9e..a8f55ea4146b 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -467,7 +467,7 @@ static u32 prog_fd_array_sys_lookup_elem(void *ptr) } /* decrement refcnt of all bpf_progs that are stored in this map */ -void bpf_fd_array_map_clear(struct bpf_map *map) +static void bpf_fd_array_map_clear(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; @@ -485,6 +485,7 @@ const struct bpf_map_ops prog_array_map_ops = { .map_fd_get_ptr = prog_fd_array_get_ptr, .map_fd_put_ptr = prog_fd_array_put_ptr, .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, + .map_release_uref = bpf_fd_array_map_clear, }; static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 20eaddfa691c..22991e19c01c 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -875,7 +875,7 @@ static int sock_map_update_elem(struct bpf_map *map, return err; } -static void sock_map_release(struct bpf_map *map, struct file *map_file) +static void sock_map_release(struct bpf_map *map) { struct bpf_stab *stab = container_of(map, struct bpf_stab, map); struct bpf_prog *orig; @@ -895,7 +895,7 @@ const struct bpf_map_ops sock_map_ops = { .map_get_next_key = sock_map_get_next_key, .map_update_elem = sock_map_update_elem, .map_delete_elem = sock_map_delete_elem, - .map_release = sock_map_release, + .map_release_uref = sock_map_release, }; BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4e933219fec6..ea22d0b6a9f0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -214,8 +214,8 @@ static void bpf_map_free_deferred(struct work_struct *work) static void bpf_map_put_uref(struct bpf_map *map) { if (atomic_dec_and_test(&map->usercnt)) { - if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) - bpf_fd_array_map_clear(map); + if (map->ops->map_release_uref) + map->ops->map_release_uref(map); } } From 9de255666ce3729cdd50e7defb649b82dbb538f6 Mon Sep 17 00:00:00 2001 From: "Winkler, Tomas" Date: Sat, 7 Apr 2018 19:12:36 +0300 Subject: [PATCH 0076/2608] tpm: tpm_crb: relinquish locality on error path. [ Upstream commit 1fbad3028664e114d210dc65d768947a3a553eaa ] In crb_map_io() function, __crb_request_locality() is called prior to crb_cmd_ready(), but if one of the consecutive function fails the flow bails out instead of trying to relinquish locality. This patch adds goto jump to __crb_relinquish_locality() on the error path. Fixes: 888d867df441 (tpm: cmd_ready command can be issued only after granting locality) Signed-off-by: Tomas Winkler Tested-by: Jarkko Sakkinen Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Sasha Levin --- drivers/char/tpm/tpm_crb.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index 5c7ce5aaaf6f..b4ad169836e9 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -520,8 +520,10 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv, priv->regs_t = crb_map_res(dev, priv, &io_res, buf->control_address, sizeof(struct crb_regs_tail)); - if (IS_ERR(priv->regs_t)) - return PTR_ERR(priv->regs_t); + if (IS_ERR(priv->regs_t)) { + ret = PTR_ERR(priv->regs_t); + goto out_relinquish_locality; + } /* * PTT HW bug w/a: wake up the device to access @@ -529,7 +531,7 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv, */ ret = __crb_cmd_ready(dev, priv); if (ret) - return ret; + goto out_relinquish_locality; pa_high = ioread32(&priv->regs_t->ctrl_cmd_pa_high); pa_low = ioread32(&priv->regs_t->ctrl_cmd_pa_low); @@ -574,6 +576,8 @@ out: __crb_go_idle(dev, priv); +out_relinquish_locality: + __crb_relinquish_locality(dev, priv, 0); return ret; From a0fcefb70022c7b39a873b16e36fca40eb354ec8 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Thu, 21 Jun 2018 14:00:21 +0100 Subject: [PATCH 0077/2608] xen-netfront: Update features after registering netdev [ Upstream commit 45c8184c1bed1ca8a7f02918552063a00b909bf5 ] Update the features after calling register_netdev() otherwise the device features are not set up correctly and it not possible to change the MTU of the device. After this change, the features reported by ethtool match the device's features before the commit which introduced the issue and it is possible to change the device's MTU. Fixes: f599c64fdf7d ("xen-netfront: Fix race between device setup and open") Reported-by: Liam Shepherd Signed-off-by: Ross Lagerwall Reviewed-by: Juergen Gross Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/xen-netfront.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 1a40fc3517a8..ca239912c0e6 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1964,10 +1964,6 @@ static int xennet_connect(struct net_device *dev) /* talk_to_netback() sets the correct number of queues */ num_queues = dev->real_num_tx_queues; - rtnl_lock(); - netdev_update_features(dev); - rtnl_unlock(); - if (dev->reg_state == NETREG_UNINITIALIZED) { err = register_netdev(dev); if (err) { @@ -1977,6 +1973,10 @@ static int xennet_connect(struct net_device *dev) } } + rtnl_lock(); + netdev_update_features(dev); + rtnl_unlock(); + /* * All public and private state should now be sane. Get * ready to start sending and receiving packets and give the driver From d29f27192a2d88d9255265b258abaeef1ee1726f Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Thu, 21 Jun 2018 14:00:20 +0100 Subject: [PATCH 0078/2608] xen-netfront: Fix mismatched rtnl_unlock [ Upstream commit cb257783c2927b73614b20f915a91ff78aa6f3e8 ] Fixes: f599c64fdf7d ("xen-netfront: Fix race between device setup and open") Reported-by: Ben Hutchings Signed-off-by: Ross Lagerwall Reviewed-by: Juergen Gross Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/xen-netfront.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index ca239912c0e6..6ea95b316256 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1824,7 +1824,7 @@ static int talk_to_netback(struct xenbus_device *dev, err = xen_net_read_mac(dev, info->netdev->dev_addr); if (err) { xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); - goto out; + goto out_unlocked; } rtnl_lock(); @@ -1939,6 +1939,7 @@ abort_transaction_no_dev_fatal: xennet_destroy_queues(info); out: rtnl_unlock(); +out_unlocked: device_unregister(&dev->dev); return err; } From e6df57b60b86698cd2e3cc00cb773e06e96df604 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 Jun 2018 11:19:42 -0600 Subject: [PATCH 0079/2608] IB/usnic: Update with bug fixes from core code [ Upstream commit 43cbd64b1fdc1da89abdad88a022d9e87a98e9c6 ] usnic has a modified version of the core codes' ib_umem_get() and related, and the copy misses many of the bug fixes done over the years: Commit bc3e53f682d9 ("mm: distinguish between mlocked and pinned pages") Commit 87773dd56d54 ("IB: ib_umem_release() should decrement mm->pinned_vm from ib_umem_get") Commit 8494057ab5e4 ("IB/uverbs: Prevent integer overflow in ib_umem_get address arithmetic") Commit 8abaae62f3fd ("IB/core: disallow registering 0-sized memory region") Commit 66578b0b2f69 ("IB/core: don't disallow registering region starting at 0x0") Commit 53376fedb9da ("RDMA/core: not to set page dirty bit if it's already set.") Commit 8e907ed48827 ("IB/umem: Use the correct mm during ib_umem_release") Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 2 +- drivers/infiniband/hw/usnic/usnic_uiom.c | 40 ++++++++++++++------ drivers/infiniband/hw/usnic/usnic_uiom.h | 5 ++- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index e4113ef09315..3c3453d213dc 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -642,7 +642,7 @@ int usnic_ib_dereg_mr(struct ib_mr *ibmr) usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length); - usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing); + usnic_uiom_reg_release(mr->umem, ibmr->uobject->context); kfree(mr); return 0; } diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 4381c0a9a873..9dd39daa602b 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "usnic_log.h" #include "usnic_uiom.h" @@ -88,7 +89,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty) for_each_sg(chunk->page_list, sg, chunk->nents, i) { page = sg_page(sg); pa = sg_phys(sg); - if (dirty) + if (!PageDirty(page) && dirty) set_page_dirty_lock(page); put_page(page); usnic_dbg("pa: %pa\n", &pa); @@ -114,6 +115,16 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, dma_addr_t pa; unsigned int gup_flags; + /* + * If the combination of the addr and size requested for this memory + * region causes an integer overflow, return error. + */ + if (((addr + size) < addr) || PAGE_ALIGN(addr + size) < (addr + size)) + return -EINVAL; + + if (!size) + return -EINVAL; + if (!can_do_mlock()) return -EPERM; @@ -127,7 +138,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, down_write(¤t->mm->mmap_sem); - locked = npages + current->mm->locked_vm; + locked = npages + current->mm->pinned_vm; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { @@ -143,7 +154,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, ret = 0; while (npages) { - ret = get_user_pages(cur_base, + ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof(struct page *)), gup_flags, page_list, NULL); @@ -186,7 +197,7 @@ out: if (ret < 0) usnic_uiom_put_pages(chunk_list, 0); else - current->mm->locked_vm = locked; + current->mm->pinned_vm = locked; up_write(¤t->mm->mmap_sem); free_page((unsigned long) page_list); @@ -420,18 +431,22 @@ out_free_uiomr: return ERR_PTR(err); } -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, + struct ib_ucontext *ucontext) { + struct task_struct *task; struct mm_struct *mm; unsigned long diff; __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); - mm = get_task_mm(current); - if (!mm) { - kfree(uiomr); - return; - } + task = get_pid_task(ucontext->tgid, PIDTYPE_PID); + if (!task) + goto out; + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) + goto out; diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; @@ -443,7 +458,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) * up here and not be able to take the mmap_sem. In that case * we defer the vm_locked accounting to the system workqueue. */ - if (closing) { + if (ucontext->closing) { if (!down_write_trylock(&mm->mmap_sem)) { INIT_WORK(&uiomr->work, usnic_uiom_reg_account); uiomr->mm = mm; @@ -455,9 +470,10 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) } else down_write(&mm->mmap_sem); - current->mm->locked_vm -= diff; + mm->pinned_vm -= diff; up_write(&mm->mmap_sem); mmput(mm); +out: kfree(uiomr); } diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h index 431efe4143f4..8c096acff123 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -39,6 +39,8 @@ #include "usnic_uiom_interval_tree.h" +struct ib_ucontext; + #define USNIC_UIOM_READ (1) #define USNIC_UIOM_WRITE (2) @@ -89,7 +91,8 @@ void usnic_uiom_free_dev_list(struct device **devs); struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, unsigned long addr, size_t size, int access, int dmasync); -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing); +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, + struct ib_ucontext *ucontext); int usnic_uiom_init(char *drv_name); void usnic_uiom_fini(void); #endif /* USNIC_UIOM_H_ */ From f274c57f0479e860f40bad85b046967e3af04f40 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Thu, 1 Mar 2018 10:36:25 +0000 Subject: [PATCH 0080/2608] mmc: dw_mmc-rockchip: correct property names in debug [ Upstream commit e988867fd774d00aeaf5d3c332032bf5b97a4147 ] Following up the device tree fixed in commits e78c637127ee ("ARM: dts: rockchip: Fix DWMMC clocks") and ca9eee95a2de ("arm64: dts: rockchip: Fix DWMMC clocks", 2018-02-15), avoid confusion by using the correct property name in the debug output if clocks are not found. Signed-off-by: John Keeping Reviewed-by: Robin Murphy Reviewed-by: Shawn Lin Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/dw_mmc-rockchip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c index 339295212935..40d7de2eea12 100644 --- a/drivers/mmc/host/dw_mmc-rockchip.c +++ b/drivers/mmc/host/dw_mmc-rockchip.c @@ -282,11 +282,11 @@ static int dw_mci_rk3288_parse_dt(struct dw_mci *host) priv->drv_clk = devm_clk_get(host->dev, "ciu-drive"); if (IS_ERR(priv->drv_clk)) - dev_dbg(host->dev, "ciu_drv not available\n"); + dev_dbg(host->dev, "ciu-drive not available\n"); priv->sample_clk = devm_clk_get(host->dev, "ciu-sample"); if (IS_ERR(priv->sample_clk)) - dev_dbg(host->dev, "ciu_sample not available\n"); + dev_dbg(host->dev, "ciu-sample not available\n"); host->priv = priv; From f18ed65d70f019aae09437e5b6a317d365571208 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 20 Aug 2018 15:36:18 -0700 Subject: [PATCH 0081/2608] MIPS: Workaround GCC __builtin_unreachable reordering bug [ Upstream commit 906d441febc0de974b2a6ef848a8f058f3bfada3 ] Some versions of GCC for the MIPS architecture suffer from a bug which can lead to instructions from beyond an unreachable statement being incorrectly reordered into earlier branch delay slots if the unreachable statement is the only content of a case in a switch statement. This can lead to seemingly random behaviour, such as invalid memory accesses from incorrectly reordered loads or stores, and link failures on microMIPS builds. See this potential GCC fix for details: https://gcc.gnu.org/ml/gcc-patches/2015-09/msg00360.html Runtime problems resulting from this bug were initially observed using a maltasmvp_defconfig v4.4 kernel built using GCC 4.9.2 (from a Codescape SDK 2015.06-05 toolchain), with the result being an address exception taken after log messages about the L1 caches (during probe of the L2 cache): Initmem setup node 0 [mem 0x0000000080000000-0x000000009fffffff] VPE topology {2,2} total 4 Primary instruction cache 64kB, VIPT, 4-way, linesize 32 bytes. Primary data cache 64kB, 4-way, PIPT, no aliases, linesize 32 bytes This is early enough that the kernel exception vectors are not in use, so any further output depends upon the bootloader. This is reproducible in QEMU where no further output occurs - ie. the system hangs here. Given the nature of the bug it may potentially be hit with differing symptoms. The bug is known to affect GCC versions as recent as 7.3, and it is unclear whether GCC 8 fixed it or just happens not to encounter the bug in the testcase found at the link above due to differing optimizations. This bug can be worked around by placing a volatile asm statement, which GCC is prevented from reordering past, prior to the __builtin_unreachable call. That was actually done already for other reasons by commit 173a3efd3edb ("bug.h: work around GCC PR82365 in BUG()"), but creates problems for microMIPS builds due to the lack of a .insn directive. The microMIPS ISA allows for interlinking with regular MIPS32 code by repurposing bit 0 of the program counter as an ISA mode bit. To switch modes one changes the value of this bit in the PC. However typical branch instructions encode their offsets as multiples of 2-byte instruction halfwords, which means they cannot change ISA mode - this must be done using either an indirect branch (a jump-register in MIPS terminology) or a dedicated jalx instruction. In order to ensure that regular branches don't attempt to target code in a different ISA which they can't actually switch to, the linker will check that branch targets are code in the same ISA as the branch. Unfortunately our empty asm volatile statements don't qualify as code, and the link for microMIPS builds fails with errors such as: arch/mips/mm/dma-default.s:3265: Error: branch to a symbol in another ISA mode arch/mips/mm/dma-default.s:5027: Error: branch to a symbol in another ISA mode Resolve this by adding a .insn directive within the asm statement which declares that what comes next is code. This may or may not be true, since we don't really know what comes next, but as this code is in an unreachable path anyway that doesn't matter since we won't execute it. We do this in asm/compiler.h & select CONFIG_HAVE_ARCH_COMPILER_H in order to have this included by linux/compiler_types.h after linux/compiler-gcc.h. This will result in asm/compiler.h being included in all C compilations via the -include linux/compiler_types.h argument in c_flags, which should be harmless. Signed-off-by: Paul Burton Fixes: 173a3efd3edb ("bug.h: work around GCC PR82365 in BUG()") Patchwork: https://patchwork.linux-mips.org/patch/20270/ Cc: James Hogan Cc: Ralf Baechle Cc: Arnd Bergmann Cc: linux-mips@linux-mips.org Signed-off-by: Sasha Levin --- arch/mips/Kconfig | 1 + arch/mips/include/asm/compiler.h | 35 ++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index c82457b0e733..23e3d3e0ee5b 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -29,6 +29,7 @@ config MIPS select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select HANDLE_DOMAIN_IRQ + select HAVE_ARCH_COMPILER_H select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS if MMU diff --git a/arch/mips/include/asm/compiler.h b/arch/mips/include/asm/compiler.h index e081a265f422..cc2eb1b06050 100644 --- a/arch/mips/include/asm/compiler.h +++ b/arch/mips/include/asm/compiler.h @@ -8,6 +8,41 @@ #ifndef _ASM_COMPILER_H #define _ASM_COMPILER_H +/* + * With GCC 4.5 onwards we can use __builtin_unreachable to indicate to the + * compiler that a particular code path will never be hit. This allows it to be + * optimised out of the generated binary. + * + * Unfortunately at least GCC 4.6.3 through 7.3.0 inclusive suffer from a bug + * that can lead to instructions from beyond an unreachable statement being + * incorrectly reordered into earlier delay slots if the unreachable statement + * is the only content of a case in a switch statement. This can lead to + * seemingly random behaviour, such as invalid memory accesses from incorrectly + * reordered loads or stores. See this potential GCC fix for details: + * + * https://gcc.gnu.org/ml/gcc-patches/2015-09/msg00360.html + * + * It is unclear whether GCC 8 onwards suffer from the same issue - nothing + * relevant is mentioned in GCC 8 release notes and nothing obviously relevant + * stands out in GCC commit logs, but these newer GCC versions generate very + * different code for the testcase which doesn't exhibit the bug. + * + * GCC also handles stack allocation suboptimally when calling noreturn + * functions or calling __builtin_unreachable(): + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365 + * + * We work around both of these issues by placing a volatile asm statement, + * which GCC is prevented from reordering past, prior to __builtin_unreachable + * calls. + * + * The .insn statement is required to ensure that any branches to the + * statement, which sadly must be kept due to the asm statement, are known to + * be branches to code and satisfy linker requirements for microMIPS kernels. + */ +#undef barrier_before_unreachable +#define barrier_before_unreachable() asm volatile(".insn") + #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) #define GCC_IMM_ASM() "n" #define GCC_REG_ACCUM "$0" From b9627a218e2b2a42b812499ccb5d92707482d5d0 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Tue, 10 Apr 2018 13:18:25 +0100 Subject: [PATCH 0082/2608] lan78xx: Don't reset the interface on open [ Upstream commit 47b998653fea4ef69e3e89574956386f262bccca ] Commit 92571a1aae40 ("lan78xx: Connect phy early") moves the PHY initialisation into lan78xx_probe, but lan78xx_open subsequently calls lan78xx_reset. As well as forcing a second round of link negotiation, this reset frequently prevents the phy interrupt from being generated (even though the link is up), rendering the interface unusable. Fix this issue by removing the lan78xx_reset call from lan78xx_open. Fixes: 92571a1aae40 ("lan78xx: Connect phy early") Signed-off-by: Phil Elwell Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 00ddcaf09014..50e2e10a9050 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2508,10 +2508,6 @@ static int lan78xx_open(struct net_device *net) if (ret < 0) goto out; - ret = lan78xx_reset(dev); - if (ret < 0) - goto done; - phy_start(net->phydev); netif_dbg(dev, ifup, dev->net, "phy initialised successfully"); From 1bee5f3d9530c5e4153c017190994b9a6b3144b7 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Mon, 18 Jun 2018 10:01:05 -0700 Subject: [PATCH 0083/2608] enic: do not overwrite error code [ Upstream commit 56f772279a762984f6e9ebbf24a7c829faba5712 ] In failure path, we overwrite err to what vnic_rq_disable() returns. In case it returns 0, enic_open() returns success in case of error. Reported-by: Ben Hutchings Fixes: e8588e268509 ("enic: enable rq before updating rq descriptors") Signed-off-by: Govindarajulu Varadarajan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/cisco/enic/enic_main.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 2bfaf3e118b1..03f4fee1bbc9 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1879,7 +1879,7 @@ static int enic_open(struct net_device *netdev) { struct enic *enic = netdev_priv(netdev); unsigned int i; - int err; + int err, ret; err = enic_request_intr(enic); if (err) { @@ -1936,10 +1936,9 @@ static int enic_open(struct net_device *netdev) err_out_free_rq: for (i = 0; i < enic->rq_count; i++) { - err = vnic_rq_disable(&enic->rq[i]); - if (err) - return err; - vnic_rq_clean(&enic->rq[i], enic_free_rq_buf); + ret = vnic_rq_disable(&enic->rq[i]); + if (!ret) + vnic_rq_clean(&enic->rq[i], enic_free_rq_buf); } enic_dev_notify_unset(enic); err_out_free_intr: From 672fdbd5966e8182a0afbc0b178f03bb133cad5a Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Tue, 5 Jun 2018 14:15:10 +0800 Subject: [PATCH 0084/2608] iio: buffer: fix the function signature to match implementation [ Upstream commit 92397a6c38d139d50fabbe9e2dc09b61d53b2377 ] linux/iio/buffer-dma.h was not updated to when length was changed to unsigned int. Fixes: c043ec1ca5ba ("iio:buffer: make length types match kfifo types") Signed-off-by: Phil Reid Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- include/linux/iio/buffer-dma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/iio/buffer-dma.h b/include/linux/iio/buffer-dma.h index 767467d886de..67c75372b691 100644 --- a/include/linux/iio/buffer-dma.h +++ b/include/linux/iio/buffer-dma.h @@ -141,7 +141,7 @@ int iio_dma_buffer_read(struct iio_buffer *buffer, size_t n, char __user *user_buffer); size_t iio_dma_buffer_data_available(struct iio_buffer *buffer); int iio_dma_buffer_set_bytes_per_datum(struct iio_buffer *buffer, size_t bpd); -int iio_dma_buffer_set_length(struct iio_buffer *buffer, int length); +int iio_dma_buffer_set_length(struct iio_buffer *buffer, unsigned int length); int iio_dma_buffer_request_update(struct iio_buffer *buffer); int iio_dma_buffer_init(struct iio_dma_buffer_queue *queue, From 07e2121f4f2b8f6534a3dc42d67fd32403c6b65e Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 22 May 2018 16:14:27 +1000 Subject: [PATCH 0085/2608] selftests/powerpc: Add ptrace hw breakpoint test [ Upstream commit 9c2ddfe55c42bf4b9bc336a0650ab78f9222a159 ] This test the ptrace hw breakpoints via PTRACE_SET_DEBUGREG and PPC_PTRACE_SETHWDEBUG. This test was use to find the bugs fixed by these recent commits: 4f7c06e26e powerpc/ptrace: Fix setting 512B aligned breakpoints with PTRACE_SET_DEBUGREG cd6ef7eebf powerpc/ptrace: Fix enforcement of DAWR constraints Signed-off-by: Michael Neuling [mpe: Add SPDX tag, clang format it] Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- .../selftests/powerpc/ptrace/.gitignore | 1 + .../testing/selftests/powerpc/ptrace/Makefile | 2 +- .../selftests/powerpc/ptrace/ptrace-hwbreak.c | 342 ++++++++++++++++++ 3 files changed, 344 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore index 349acfafc95b..9dcc16ea8179 100644 --- a/tools/testing/selftests/powerpc/ptrace/.gitignore +++ b/tools/testing/selftests/powerpc/ptrace/.gitignore @@ -8,3 +8,4 @@ ptrace-vsx ptrace-tm-vsx ptrace-tm-spd-vsx ptrace-tm-spr +ptrace-hwbreak diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 480305266504..0e2f4601d1a8 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \ - ptrace-tm-spd-vsx ptrace-tm-spr + ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c new file mode 100644 index 000000000000..3066d310f32b --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Ptrace test for hw breakpoints + * + * Based on tools/testing/selftests/breakpoints/breakpoint_test.c + * + * This test forks and the parent then traces the child doing various + * types of ptrace enabled breakpoints + * + * Copyright (C) 2018 Michael Neuling, IBM Corporation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ptrace.h" + +/* Breakpoint access modes */ +enum { + BP_X = 1, + BP_RW = 2, + BP_W = 4, +}; + +static pid_t child_pid; +static struct ppc_debug_info dbginfo; + +static void get_dbginfo(void) +{ + int ret; + + ret = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo); + if (ret) { + perror("Can't get breakpoint info\n"); + exit(-1); + } +} + +static bool hwbreak_present(void) +{ + return (dbginfo.num_data_bps != 0); +} + +static bool dawr_present(void) +{ + return !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_DAWR); +} + +static void set_breakpoint_addr(void *addr) +{ + int ret; + + ret = ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, addr); + if (ret) { + perror("Can't set breakpoint addr\n"); + exit(-1); + } +} + +static int set_hwbreakpoint_addr(void *addr, int range) +{ + int ret; + + struct ppc_hw_breakpoint info; + + info.version = 1; + info.trigger_type = PPC_BREAKPOINT_TRIGGER_RW; + info.addr_mode = PPC_BREAKPOINT_MODE_EXACT; + if (range > 0) + info.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; + info.condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + info.addr = (__u64)addr; + info.addr2 = (__u64)addr + range; + info.condition_value = 0; + + ret = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info); + if (ret < 0) { + perror("Can't set breakpoint\n"); + exit(-1); + } + return ret; +} + +static int del_hwbreakpoint_addr(int watchpoint_handle) +{ + int ret; + + ret = ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, watchpoint_handle); + if (ret < 0) { + perror("Can't delete hw breakpoint\n"); + exit(-1); + } + return ret; +} + +#define DAWR_LENGTH_MAX 512 + +/* Dummy variables to test read/write accesses */ +static unsigned long long + dummy_array[DAWR_LENGTH_MAX / sizeof(unsigned long long)] + __attribute__((aligned(512))); +static unsigned long long *dummy_var = dummy_array; + +static void write_var(int len) +{ + long long *plval; + char *pcval; + short *psval; + int *pival; + + switch (len) { + case 1: + pcval = (char *)dummy_var; + *pcval = 0xff; + break; + case 2: + psval = (short *)dummy_var; + *psval = 0xffff; + break; + case 4: + pival = (int *)dummy_var; + *pival = 0xffffffff; + break; + case 8: + plval = (long long *)dummy_var; + *plval = 0xffffffffffffffffLL; + break; + } +} + +static void read_var(int len) +{ + char cval __attribute__((unused)); + short sval __attribute__((unused)); + int ival __attribute__((unused)); + long long lval __attribute__((unused)); + + switch (len) { + case 1: + cval = *(char *)dummy_var; + break; + case 2: + sval = *(short *)dummy_var; + break; + case 4: + ival = *(int *)dummy_var; + break; + case 8: + lval = *(long long *)dummy_var; + break; + } +} + +/* + * Do the r/w accesses to trigger the breakpoints. And run + * the usual traps. + */ +static void trigger_tests(void) +{ + int len, ret; + + ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); + if (ret) { + perror("Can't be traced?\n"); + return; + } + + /* Wake up father so that it sets up the first test */ + kill(getpid(), SIGUSR1); + + /* Test write watchpoints */ + for (len = 1; len <= sizeof(long); len <<= 1) + write_var(len); + + /* Test read/write watchpoints (on read accesses) */ + for (len = 1; len <= sizeof(long); len <<= 1) + read_var(len); + + /* Test when breakpoint is unset */ + + /* Test write watchpoints */ + for (len = 1; len <= sizeof(long); len <<= 1) + write_var(len); + + /* Test read/write watchpoints (on read accesses) */ + for (len = 1; len <= sizeof(long); len <<= 1) + read_var(len); +} + +static void check_success(const char *msg) +{ + const char *msg2; + int status; + + /* Wait for the child to SIGTRAP */ + wait(&status); + + msg2 = "Failed"; + + if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { + msg2 = "Child process hit the breakpoint"; + } + + printf("%s Result: [%s]\n", msg, msg2); +} + +static void launch_watchpoints(char *buf, int mode, int len, + struct ppc_debug_info *dbginfo, bool dawr) +{ + const char *mode_str; + unsigned long data = (unsigned long)(dummy_var); + int wh, range; + + data &= ~0x7UL; + + if (mode == BP_W) { + data |= (1UL << 1); + mode_str = "write"; + } else { + data |= (1UL << 0); + data |= (1UL << 1); + mode_str = "read"; + } + + /* Set DABR_TRANSLATION bit */ + data |= (1UL << 2); + + /* use PTRACE_SET_DEBUGREG breakpoints */ + set_breakpoint_addr((void *)data); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len); + check_success(buf); + /* Unregister hw brkpoint */ + set_breakpoint_addr(NULL); + + data = (data & ~7); /* remove dabr control bits */ + + /* use PPC_PTRACE_SETHWDEBUG breakpoint */ + if (!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE)) + return; /* not supported */ + wh = set_hwbreakpoint_addr((void *)data, 0); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len); + check_success(buf); + /* Unregister hw brkpoint */ + del_hwbreakpoint_addr(wh); + + /* try a wider range */ + range = 8; + if (dawr) + range = 512 - ((int)data & (DAWR_LENGTH_MAX - 1)); + wh = set_hwbreakpoint_addr((void *)data, range); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len); + check_success(buf); + /* Unregister hw brkpoint */ + del_hwbreakpoint_addr(wh); +} + +/* Set the breakpoints and check the child successfully trigger them */ +static int launch_tests(bool dawr) +{ + char buf[1024]; + int len, i, status; + + struct ppc_debug_info dbginfo; + + i = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo); + if (i) { + perror("Can't set breakpoint info\n"); + exit(-1); + } + if (!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_RANGE)) + printf("WARNING: Kernel doesn't support PPC_PTRACE_SETHWDEBUG\n"); + + /* Write watchpoint */ + for (len = 1; len <= sizeof(long); len <<= 1) + launch_watchpoints(buf, BP_W, len, &dbginfo, dawr); + + /* Read-Write watchpoint */ + for (len = 1; len <= sizeof(long); len <<= 1) + launch_watchpoints(buf, BP_RW, len, &dbginfo, dawr); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + + /* + * Now we have unregistered the breakpoint, access by child + * should not cause SIGTRAP. + */ + + wait(&status); + + if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { + printf("FAIL: Child process hit the breakpoint, which is not expected\n"); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + return TEST_FAIL; + } + + if (WIFEXITED(status)) + printf("Child exited normally\n"); + + return TEST_PASS; +} + +static int ptrace_hwbreak(void) +{ + pid_t pid; + int ret; + bool dawr; + + pid = fork(); + if (!pid) { + trigger_tests(); + return 0; + } + + wait(NULL); + + child_pid = pid; + + get_dbginfo(); + SKIP_IF(!hwbreak_present()); + dawr = dawr_present(); + + ret = launch_tests(dawr); + + wait(NULL); + + return ret; +} + +int main(int argc, char **argv, char **envp) +{ + return test_harness(ptrace_hwbreak, "ptrace-hwbreak"); +} From 5aa0d86363b2f89789619907dd6f886989eb1c4a Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 14 Mar 2018 17:13:39 -0500 Subject: [PATCH 0086/2608] scsi: ibmvfc: Avoid unnecessary port relogin [ Upstream commit 09dd15e0d9547ca424de4043bcd429bab6f285c8 ] Following an RSCN, ibmvfc will issue an ADISC to determine if the underlying target has changed, comparing the SCSI ID, WWPN, and WWNN to determine how to handle the rport in discovery. However, the comparison of the WWPN and WWNN was performing a memcmp between a big endian field against a CPU endian field, which resulted in the wrong answer on LE systems. This was observed as unexpected errors getting logged at boot time as targets were getting relogins when not needed. Signed-off-by: Brian King Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ibmvscsi/ibmvfc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index b491af31a5f8..a06b24a61622 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -3580,11 +3580,9 @@ static void ibmvfc_tgt_implicit_logout(struct ibmvfc_target *tgt) static int ibmvfc_adisc_needs_plogi(struct ibmvfc_passthru_mad *mad, struct ibmvfc_target *tgt) { - if (memcmp(&mad->fc_iu.response[2], &tgt->ids.port_name, - sizeof(tgt->ids.port_name))) + if (wwn_to_u64((u8 *)&mad->fc_iu.response[2]) != tgt->ids.port_name) return 1; - if (memcmp(&mad->fc_iu.response[4], &tgt->ids.node_name, - sizeof(tgt->ids.node_name))) + if (wwn_to_u64((u8 *)&mad->fc_iu.response[4]) != tgt->ids.node_name) return 1; if (be32_to_cpu(mad->fc_iu.response[6]) != tgt->scsi_id) return 1; From 3c92c7e96fc447f62d3a7b8f6d1e932388b0b8df Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 14 Mar 2018 12:15:56 -0400 Subject: [PATCH 0087/2608] scsi: sd: Remember that READ CAPACITY(16) succeeded [ Upstream commit 597d74005ba85e87c256cd732128ebf7faf54247 ] The USB storage glue sets the try_rc_10_first flag in an attempt to avoid wedging poorly implemented legacy USB devices. If the device capacity is too large to be expressed in the provided response buffer field of READ CAPACITY(10), a well-behaved device will set the reported capacity to 0xFFFFFFFF. We will then attempt to issue a READ CAPACITY(16) to obtain the real capacity. Since this part of the discovery logic is not covered by the first_scan flag, a warning will be printed a couple of times times per revalidate attempt if we upgrade from READ CAPACITY(10) to READ CAPACITY(16). Remember that we have successfully issued READ CAPACITY(16) so we can take the fast path on subsequent revalidate attempts. Reported-by: Menion Reviewed-by: Laurence Oberman Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/sd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 6d3091ff9b92..c7b284587365 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2498,6 +2498,8 @@ sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer) sector_size = old_sector_size; goto got_data; } + /* Remember that READ CAPACITY(16) succeeded */ + sdp->try_rc_10_first = 0; } } From f72388e3670e870a32854058eff63d0021036457 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 27 Jun 2018 18:19:55 +0800 Subject: [PATCH 0088/2608] btrfs: quota: Set rescan progress to (u64)-1 if we hit last leaf [ Upstream commit 6f7de19ed3d4d3526ca5eca428009f97cf969c2f ] Commit ff3d27a048d9 ("btrfs: qgroup: Finish rescan when hit the last leaf of extent tree") added a new exit for rescan finish. However after finishing quota rescan, we set fs_info->qgroup_rescan_progress to (u64)-1 before we exit through the original exit path. While we missed that assignment of (u64)-1 in the new exit path. The end result is, the quota status item doesn't have the same value. (-1 vs the last bytenr + 1) Although it doesn't affect quota accounting, it's still better to keep the original behavior. Reported-by: Misono Tomohiro Fixes: ff3d27a048d9 ("btrfs: qgroup: Finish rescan when hit the last leaf of extent tree") Signed-off-by: Qu Wenruo Reviewed-by: Misono Tomohiro Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/qgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 473ad5985aa3..47dec283628d 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2603,8 +2603,10 @@ out: } btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); - if (done && !ret) + if (done && !ret) { ret = 1; + fs_info->qgroup_rescan_progress.objectid = (u64)-1; + } return ret; } From aa3aff5b469b7520932fa8816efb78a0eaf012d4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 20 May 2018 20:49:47 -0700 Subject: [PATCH 0089/2608] net: phy: phylink: Don't release NULL GPIO [ Upstream commit 3bcd47726c3b744fd08781795cca905cc59a1382 ] If CONFIG_GPIOLIB is disabled, gpiod_put() becomes a stub that produces a warning, this helped identify that we could be attempting to release a NULL pl->link_gpio GPIO descriptor, so guard against that. Fixes: daab3349ad1a ("net: phy: phylink: Release link GPIO") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/phylink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index e4a6ed88b9cf..79f28b9186c6 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -561,7 +561,7 @@ void phylink_destroy(struct phylink *pl) { if (pl->sfp_bus) sfp_unregister_upstream(pl->sfp_bus); - if (!IS_ERR(pl->link_gpio)) + if (!IS_ERR_OR_NULL(pl->link_gpio)) gpiod_put(pl->link_gpio); cancel_work_sync(&pl->resolve); From 726f05951941cc88e91b25b98921182cfc8d45f7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 19 Sep 2018 13:35:53 +0300 Subject: [PATCH 0090/2608] x86/paravirt: Fix some warning messages [ Upstream commit 571d0563c8881595f4ab027aef9ed1c55e3e7b7c ] The first argument to WARN_ONCE() is a condition. Fixes: 5800dc5c19f3 ("x86/paravirt: Fix spectre-v2 mitigations for paravirt guests") Signed-off-by: Dan Carpenter Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: Peter Zijlstra Cc: Alok Kataria Cc: "H. Peter Anvin" Cc: virtualization@lists.linux-foundation.org Cc: kernel-janitors@vger.kernel.org Link: https://lkml.kernel.org/r/20180919103553.GD9238@mwanda Signed-off-by: Sasha Levin --- arch/x86/kernel/paravirt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index f3559b84cd75..04da826381c9 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -90,7 +90,7 @@ unsigned paravirt_patch_call(void *insnbuf, if (len < 5) { #ifdef CONFIG_RETPOLINE - WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr); + WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr); #endif return len; /* call too long for patch site */ } @@ -110,7 +110,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target, if (len < 5) { #ifdef CONFIG_RETPOLINE - WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr); + WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr); #endif return len; /* call too long for patch site */ } From cbc38304947ea2e57440bb38b733613e3f003605 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 13 Aug 2018 23:50:41 +0200 Subject: [PATCH 0091/2608] net: stmmac: mark PM functions as __maybe_unused [ Upstream commit 81a8b0799632627b587af31ecd06112397e4ec36 ] The newly added suspend/resume functions cause a build warning when CONFIG_PM is disabled: drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c:324:12: error: 'stmmac_pci_resume' defined but not used [-Werror=unused-function] drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c:306:12: error: 'stmmac_pci_suspend' defined but not used [-Werror=unused-function] Mark them as __maybe_unused so gcc can drop them silently. Fixes: b7d0f08e9129 ("net: stmmac: Fix WoL for PCI-based setups") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 6a393b16a1fc..c54a50dbd5ac 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -303,7 +303,7 @@ static void stmmac_pci_remove(struct pci_dev *pdev) pci_disable_device(pdev); } -static int stmmac_pci_suspend(struct device *dev) +static int __maybe_unused stmmac_pci_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); int ret; @@ -321,7 +321,7 @@ static int stmmac_pci_suspend(struct device *dev) return 0; } -static int stmmac_pci_resume(struct device *dev) +static int __maybe_unused stmmac_pci_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); int ret; From 48f1b3b56ee4f936c105b550ca4357722d6460da Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 9 Aug 2018 15:47:06 +0900 Subject: [PATCH 0092/2608] kconfig: fix the rule of mainmenu_stmt symbol [ Upstream commit 56869d45e364244a721de34ce9c5dc9ed022779e ] The rule of mainmenu_stmt does not have debug print of zconf_lineno(), but if it had, it would print a wrong line number for the same reason as commit b2d00d7c61c8 ("kconfig: fix line numbers for if-entries in menu tree"). The mainmenu_stmt does not need to eat following empty lines because they are reduced to common_stmt. Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/kconfig/zconf.y | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y index 126e3f2e1ed7..2b0adeb5fc42 100644 --- a/scripts/kconfig/zconf.y +++ b/scripts/kconfig/zconf.y @@ -31,7 +31,7 @@ struct symbol *symbol_hash[SYMBOL_HASHSIZE]; static struct menu *current_menu, *current_entry; %} -%expect 31 +%expect 30 %union { @@ -112,7 +112,7 @@ start: mainmenu_stmt stmt_list | no_mainmenu_stmt stmt_list; /* mainmenu entry */ -mainmenu_stmt: T_MAINMENU prompt nl +mainmenu_stmt: T_MAINMENU prompt T_EOL { menu_add_prompt(P_MENU, $2, NULL); }; From 543f1084b67fb8ad62fc7398620e0ecd839594ea Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Mon, 8 Oct 2018 22:03:57 +0200 Subject: [PATCH 0093/2608] libertas: call into generic suspend code before turning off power [ Upstream commit 4f666675cdff0b986195413215eb062b7da6586f ] When powering down a SDIO connected card during suspend, make sure to call into the generic lbs_suspend() function before pulling the plug. This will make sure the card is successfully deregistered from the system to avoid communication to the card starving out. Fixes: 7444a8092906 ("libertas: fix suspend and resume for SDIO connected cards") Signed-off-by: Daniel Mack Reviewed-by: Ulf Hansson Acked-by: Kalle Valo Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/libertas/if_sdio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/marvell/libertas/if_sdio.c b/drivers/net/wireless/marvell/libertas/if_sdio.c index 43743c26c071..39bf85d0ade0 100644 --- a/drivers/net/wireless/marvell/libertas/if_sdio.c +++ b/drivers/net/wireless/marvell/libertas/if_sdio.c @@ -1317,6 +1317,10 @@ static int if_sdio_suspend(struct device *dev) if (priv->wol_criteria == EHS_REMOVE_WAKEUP) { dev_info(dev, "Suspend without wake params -- powering down card\n"); if (priv->fw_ready) { + ret = lbs_suspend(priv); + if (ret) + return ret; + priv->power_up_on_resume = true; if_sdio_power_off(card); } From b8f4d375cd1ebc1bb5fbb174a0237eff24dcbd68 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 26 Jul 2018 22:47:33 +0530 Subject: [PATCH 0094/2608] perf tests: Fix indexing when invoking subtests [ Upstream commit aa90f9f9554616d5738f7bedb4a8f0e5e14d1bc6 ] Recently, the subtest numbering was changed to start from 1. While it is fine for displaying results, this should not be the case when the subtests are actually invoked. Typically, the subtests are stored in zero-indexed arrays and invoked based on the index passed to the main test function. Since the index now starts from 1, the second subtest in the array (index 1) gets invoked instead of the first (index 0). This applies to all of the following subtests but for the last one, the subtest always fails because it does not meet the boundary condition of the subtest index being lesser than the number of subtests. This can be observed on powerpc64 and x86_64 systems running Fedora 28 as shown below. Before: # perf test "builtin clang support" 55: builtin clang support : 55.1: builtin clang compile C source to IR : Ok 55.2: builtin clang compile C source to ELF object : FAILED! # perf test "LLVM search and compile" 38: LLVM search and compile : 38.1: Basic BPF llvm compile : Ok 38.2: kbuild searching : Ok 38.3: Compile source for BPF prologue generation : Ok 38.4: Compile source for BPF relocation : FAILED! # perf test "BPF filter" 40: BPF filter : 40.1: Basic BPF filtering : Ok 40.2: BPF pinning : Ok 40.3: BPF prologue generation : Ok 40.4: BPF relocation checker : FAILED! After: # perf test "builtin clang support" 55: builtin clang support : 55.1: builtin clang compile C source to IR : Ok 55.2: builtin clang compile C source to ELF object : Ok # perf test "LLVM search and compile" 38: LLVM search and compile : 38.1: Basic BPF llvm compile : Ok 38.2: kbuild searching : Ok 38.3: Compile source for BPF prologue generation : Ok 38.4: Compile source for BPF relocation : Ok # perf test "BPF filter" 40: BPF filter : 40.1: Basic BPF filtering : Ok 40.2: BPF pinning : Ok 40.3: BPF prologue generation : Ok 40.4: BPF relocation checker : Ok Signed-off-by: Sandipan Das Reported-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Martin Schwidefsky Cc: Naveen N. Rao Cc: Ravi Bangoria Cc: Thomas Richter Fixes: 9ef0112442bd ("perf test: Fix subtest number when showing results") Link: http://lkml.kernel.org/r/20180726171733.33208-1-sandipan@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/builtin-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 5966f1f9b160..1c9bc3516f8b 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -375,7 +375,7 @@ static int test_and_print(struct test *t, bool force_skip, int subtest) if (!t->subtest.get_nr) pr_debug("%s:", t->desc); else - pr_debug("%s subtest %d:", t->desc, subtest); + pr_debug("%s subtest %d:", t->desc, subtest + 1); switch (err) { case TEST_OK: @@ -589,7 +589,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) for (subi = 0; subi < subn; subi++) { pr_info("%2d.%1d: %-*s:", i, subi + 1, subw, t->subtest.get_desc(subi)); - err = test_and_print(t, skip, subi + 1); + err = test_and_print(t, skip, subi); if (err != TEST_OK && t->subtest.skip_if_fail) skip = true; } From 3c3bec81e267a6be198c540ea8d6f80e8f51b433 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 20 Aug 2018 15:36:17 -0700 Subject: [PATCH 0095/2608] compiler.h: Allow arch-specific asm/compiler.h [ Upstream commit 04f264d3a8b0eb25d378127bd78c3c9a0261c828 ] We have a need to override the definition of barrier_before_unreachable() for MIPS, which means we either need to add architecture-specific code into linux/compiler-gcc.h or we need to allow the architecture to provide a header that can define the macro before the generic definition. The latter seems like the better approach. A straightforward approach to the per-arch header is to make use of asm-generic to provide a default empty header & adjust architectures which don't need anything specific to make use of that by adding the header to generic-y. Unfortunately this doesn't work so well due to commit 28128c61e08e ("kconfig.h: Include compiler types to avoid missed struct attributes") which caused linux/compiler_types.h to be included in the compilation of every C file via the -include linux/kconfig.h flag in c_flags. Because the -include flag is present for all C files we compile, we need the architecture-provided header to be present before any C files are compiled. If any C files can be compiled prior to the asm-generic header wrappers being generated then we hit a build failure due to missing header. Such cases do exist - one pointed out by the kbuild test robot is the compilation of arch/ia64/kernel/nr-irqs.c, which occurs as part of the archprepare target [1]. This leaves us with a few options: 1) Use generic-y & fix any build failures we find by enforcing ordering such that the asm-generic target occurs before any C compilation, such that linux/compiler_types.h can always include the generated asm-generic wrapper which in turn includes the empty asm-generic header. This would rely on us finding all the problematic cases - I don't know for sure that the ia64 issue is the only one. 2) Add an actual empty header to each architecture, so that we don't need the generated asm-generic wrapper. This seems messy. 3) Give up & add #ifdef CONFIG_MIPS or similar to linux/compiler_types.h. This seems messy too. 4) Include the arch header only when it's actually needed, removing the need for the asm-generic wrapper for all other architectures. This patch allows us to use approach 4, by including an asm/compiler.h header from linux/compiler_types.h after the inclusion of the compiler-specific linux/compiler-*.h header(s). We do this conditionally, only when CONFIG_HAVE_ARCH_COMPILER_H is selected, in order to avoid the need for asm-generic wrappers & the associated build ordering issue described above. The asm/compiler.h header is included after the generic linux/compiler-*.h header(s) for consistency with the way linux/compiler-intel.h & linux/compiler-clang.h are included after the linux/compiler-gcc.h header that they override. [1] https://lists.01.org/pipermail/kbuild-all/2018-August/051175.html Signed-off-by: Paul Burton Reviewed-by: Masahiro Yamada Patchwork: https://patchwork.linux-mips.org/patch/20269/ Cc: Arnd Bergmann Cc: James Hogan Cc: Masahiro Yamada Cc: Ralf Baechle Cc: linux-arch@vger.kernel.org Cc: linux-kbuild@vger.kernel.org Cc: linux-mips@linux-mips.org Signed-off-by: Sasha Levin --- arch/Kconfig | 8 ++++++++ include/linux/compiler_types.h | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 40dc31fea90c..77b3e21c4844 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -965,4 +965,12 @@ config REFCOUNT_FULL against various use-after-free conditions that can be used in security flaw exploits. +config HAVE_ARCH_COMPILER_H + bool + help + An architecture can select this if it provides an + asm/compiler.h header that should be included after + linux/compiler-*.h in order to override macro definitions that those + headers generally provide. + source "kernel/gcov/Kconfig" diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6b79a9bba9a7..4be464a07612 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -78,6 +78,18 @@ extern void __chk_io_ptr(const volatile void __iomem *); #include #endif +/* + * Some architectures need to provide custom definitions of macros provided + * by linux/compiler-*.h, and can do so using asm/compiler.h. We include that + * conditionally rather than using an asm-generic wrapper in order to avoid + * build failures if any C compilation, which will include this file via an + * -include argument in c_flags, occurs prior to the asm-generic wrappers being + * generated. + */ +#ifdef CONFIG_HAVE_ARCH_COMPILER_H +#include +#endif + /* * Generic compiler-dependent macros required for kernel * build go below this comment. Actual compiler/compiler version From c005e014fbc6391ed6f22b141865bf826bbc278d Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 12 Sep 2018 08:23:01 +0200 Subject: [PATCH 0096/2608] ARM: dts: imx53-qsb: disable 1.2GHz OPP [ Upstream commit eea96566c189c77e5272585984eb2729881a2f1d ] The maximum CPU frequency for the i.MX53 QSB is 1GHz, so disable the 1.2GHz OPP. This makes the board work again with configs that have cpufreq enabled like imx_v6_v7_defconfig on which the board stopped working with the addition of cpufreq-dt support. Fixes: 791f416608 ("ARM: dts: imx53: add cpufreq-dt support") Signed-off-by: Sascha Hauer Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx53-qsb-common.dtsi | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi index 683dcbe27cbd..8c11190c5218 100644 --- a/arch/arm/boot/dts/imx53-qsb-common.dtsi +++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi @@ -130,6 +130,17 @@ }; }; +&cpu0 { + /* CPU rated to 1GHz, not 1.2GHz as per the default settings */ + operating-points = < + /* kHz uV */ + 166666 850000 + 400000 900000 + 800000 1050000 + 1000000 1200000 + >; +}; + &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; From 4fdaadbce403a7cb20ac0c9c3be260d0cf17177d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Sep 2018 16:08:02 -0300 Subject: [PATCH 0097/2608] perf python: Use -Wno-redundant-decls to build with PYTHON=python3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 05a2f54679861deb188750ba2a70187000b2c71f ] When building in ClearLinux using 'make PYTHON=python3' with gcc 8.2.1 it fails with: GEN /tmp/build/perf/python/perf.so In file included from /usr/include/python3.7m/Python.h:126, from /git/linux/tools/perf/util/python.c:2: /usr/include/python3.7m/import.h:58:24: error: redundant redeclaration of ‘_PyImport_AddModuleObject’ [-Werror=redundant-decls] PyAPI_FUNC(PyObject *) _PyImport_AddModuleObject(PyObject *, PyObject *); ^~~~~~~~~~~~~~~~~~~~~~~~~ /usr/include/python3.7m/import.h:47:24: note: previous declaration of ‘_PyImport_AddModuleObject’ was here PyAPI_FUNC(PyObject *) _PyImport_AddModuleObject(PyObject *name, ^~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors error: command 'gcc' failed with exit status 1 And indeed there is a redundant declaration in that Python.h file, one with parameter names and the other without, so just add -Wno-error=redundant-decls to the python setup instructions. Now perf builds with gcc in ClearLinux with the following Dockerfile: # docker.io/acmel/linux-perf-tools-build-clearlinux:latest FROM docker.io/clearlinux:latest MAINTAINER Arnaldo Carvalho de Melo RUN swupd update && \ swupd bundle-add sysadmin-basic-dev RUN mkdir -m 777 -p /git /tmp/build/perf /tmp/build/objtool /tmp/build/linux && \ groupadd -r perfbuilder && \ useradd -m -r -g perfbuilder perfbuilder && \ chown -R perfbuilder.perfbuilder /tmp/build/ /git/ USER perfbuilder COPY rx_and_build.sh / ENV EXTRA_MAKE_ARGS=PYTHON=python3 ENTRYPOINT ["/rx_and_build.sh"] Now to figure out why the build fails with clang, that is present in the above container as detected by the rx_and_build.sh script: clang version 6.0.1 (tags/RELEASE_601/final) Target: x86_64-unknown-linux-gnu Thread model: posix InstalledDir: /usr/sbin make: Entering directory '/git/linux/tools/perf' BUILD: Doing 'make -j4' parallel build HOSTCC /tmp/build/perf/fixdep.o HOSTLD /tmp/build/perf/fixdep-in.o LINK /tmp/build/perf/fixdep Auto-detecting system features: ... dwarf: [ OFF ] ... dwarf_getlocations: [ OFF ] ... glibc: [ OFF ] ... gtk2: [ OFF ] ... libaudit: [ OFF ] ... libbfd: [ OFF ] ... libelf: [ OFF ] ... libnuma: [ OFF ] ... numa_num_possible_cpus: [ OFF ] ... libperl: [ OFF ] ... libpython: [ OFF ] ... libslang: [ OFF ] ... libcrypto: [ OFF ] ... libunwind: [ OFF ] ... libdw-dwarf-unwind: [ OFF ] ... zlib: [ OFF ] ... lzma: [ OFF ] ... get_cpuid: [ OFF ] ... bpf: [ OFF ] Makefile.config:331: *** No gnu/libc-version.h found, please install glibc-dev[el]. Stop. make[1]: *** [Makefile.perf:206: sub-make] Error 2 make: *** [Makefile:70: all] Error 2 make: Leaving directory '/git/linux/tools/perf' Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thiago Macieira Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-c3khb9ac86s00qxzjrueomme@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index da4df7fd43a2..23f1bf175179 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -27,7 +27,7 @@ class install_lib(_install_lib): cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) -cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ] +cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ] if cc != "clang": cflags += ['-Wno-cast-function-type' ] From 9a6d45700adf03085add4ab26bcdca6ee2487a05 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 8 Oct 2018 15:46:01 +0100 Subject: [PATCH 0098/2608] rxrpc: Don't check RXRPC_CALL_TX_LAST after calling rxrpc_rotate_tx_window() [ Upstream commit c479d5f2c2e1ce609da08c075054440d97ddff52 ] We should only call the function to end a call's Tx phase if we rotated the marked-last packet out of the transmission buffer. Make rxrpc_rotate_tx_window() return an indication of whether it just rotated the packet marked as the last out of the transmit buffer, carrying the information out of the locked section in that function. We can then check the return value instead of examining RXRPC_CALL_TX_LAST. Fixes: 70790dbe3f66 ("rxrpc: Pass the last Tx packet marker in the annotation buffer") Signed-off-by: David Howells Signed-off-by: Sasha Levin --- net/rxrpc/input.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 5edb636dbc4d..3a501bf0fc1a 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -216,10 +216,11 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, /* * Apply a hard ACK by advancing the Tx window. */ -static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, +static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, struct rxrpc_ack_summary *summary) { struct sk_buff *skb, *list = NULL; + bool rot_last = false; int ix; u8 annotation; @@ -243,15 +244,17 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, skb->next = list; list = skb; - if (annotation & RXRPC_TX_ANNO_LAST) + if (annotation & RXRPC_TX_ANNO_LAST) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); + rot_last = true; + } if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK) summary->nr_rot_new_acks++; } spin_unlock(&call->lock); - trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ? + trace_rxrpc_transmit(call, (rot_last ? rxrpc_transmit_rotate_last : rxrpc_transmit_rotate)); wake_up(&call->waitq); @@ -262,6 +265,8 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, skb->next = NULL; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); } + + return rot_last; } /* @@ -332,11 +337,11 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) ktime_get_real()); } - if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) - rxrpc_rotate_tx_window(call, top, &summary); if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - rxrpc_proto_abort("TXL", call, top); - return false; + if (!rxrpc_rotate_tx_window(call, top, &summary)) { + rxrpc_proto_abort("TXL", call, top); + return false; + } } if (!rxrpc_end_tx_phase(call, true, "ETD")) return false; @@ -837,8 +842,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, if (nr_acks > call->tx_top - hard_ack) return rxrpc_proto_abort("AKN", call, 0); - if (after(hard_ack, call->tx_hard_ack)) - rxrpc_rotate_tx_window(call, hard_ack, &summary); + if (after(hard_ack, call->tx_hard_ack)) { + if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { + rxrpc_end_tx_phase(call, false, "ETA"); + return; + } + } if (nr_acks > 0) { if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) @@ -847,11 +856,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, &summary); } - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - rxrpc_end_tx_phase(call, false, "ETA"); - return; - } - if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & RXRPC_TX_ANNO_LAST && summary.nr_acks == call->tx_top - hard_ack && @@ -873,8 +877,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) _proto("Rx ACKALL %%%u", sp->hdr.serial); - rxrpc_rotate_tx_window(call, call->tx_top, &summary); - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) rxrpc_end_tx_phase(call, false, "ETL"); } From f1d27ff6e027204e6bec8dd01153a196219d9d9b Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 8 Oct 2018 15:46:11 +0100 Subject: [PATCH 0099/2608] rxrpc: Only take the rwind and mtu values from latest ACK [ Upstream commit 298bc15b2079c324e82d0a6fda39c3d762af7282 ] Move the out-of-order and duplicate ACK packet check to before the call to rxrpc_input_ackinfo() so that the receive window size and MTU size are only checked in the latest ACK packet and don't regress. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: David Howells Signed-off-by: Sasha Levin --- net/rxrpc/input.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 3a501bf0fc1a..ea506a77f3c8 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -806,6 +806,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_propose_ack_respond_to_ack); } + /* Discard any out-of-order or duplicate ACKs. */ + if (before_eq(sp->hdr.serial, call->acks_latest)) { + _debug("discard ACK %d <= %d", + sp->hdr.serial, call->acks_latest); + return; + } + call->acks_latest_ts = skb->tstamp; + call->acks_latest = sp->hdr.serial; + + /* Parse rwind and mtu sizes if provided. */ ioffset = offset + nr_acks + 3; if (skb->len >= ioffset + sizeof(buf.info)) { if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) @@ -827,15 +837,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, return; } - /* Discard any out-of-order or duplicate ACKs. */ - if (before_eq(sp->hdr.serial, call->acks_latest)) { - _debug("discard ACK %d <= %d", - sp->hdr.serial, call->acks_latest); - return; - } - call->acks_latest_ts = skb->tstamp; - call->acks_latest = sp->hdr.serial; - if (before(hard_ack, call->tx_hard_ack) || after(hard_ack, call->tx_top)) return rxrpc_proto_abort("AKW", call, 0); From 252869e5fb49b6397584d9dbe46b246bb1787ef3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 8 Oct 2018 15:46:17 +0100 Subject: [PATCH 0100/2608] rxrpc: Fix connection-level abort handling [ Upstream commit 647530924f47c93db472ee3cf43b7ef1425581b6 ] Fix connection-level abort handling to cache the abort and error codes properly so that a new incoming call can be properly aborted if it races with the parent connection being aborted by another CPU. The abort_code and error parameters can then be dropped from rxrpc_abort_calls(). Fixes: f5c17aaeb2ae ("rxrpc: Calls should only have one terminal state") Signed-off-by: David Howells Signed-off-by: Sasha Levin --- net/rxrpc/ar-internal.h | 4 ++-- net/rxrpc/call_accept.c | 4 ++-- net/rxrpc/conn_event.c | 26 +++++++++++++++----------- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e6c2c4f56fb1..71c7f1dd4599 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -424,8 +424,7 @@ struct rxrpc_connection { spinlock_t state_lock; /* state-change lock */ enum rxrpc_conn_cache_state cache_state; enum rxrpc_conn_proto_state state; /* current state of connection */ - u32 local_abort; /* local abort code */ - u32 remote_abort; /* remote abort code */ + u32 abort_code; /* Abort code of connection abort */ int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ @@ -435,6 +434,7 @@ struct rxrpc_connection { u8 security_size; /* security header size */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ + short error; /* Local error code */ }; /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 62b1581d44a5..2dd13f5c47c8 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -418,11 +418,11 @@ found_service: case RXRPC_CONN_REMOTELY_ABORTED: rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - conn->remote_abort, -ECONNABORTED); + conn->abort_code, conn->error); break; case RXRPC_CONN_LOCALLY_ABORTED: rxrpc_abort_call("CON", call, sp->hdr.seq, - conn->local_abort, -ECONNABORTED); + conn->abort_code, conn->error); break; default: BUG(); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 0435c4167a1a..75ec1ad595b7 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -117,7 +117,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, switch (chan->last_type) { case RXRPC_PACKET_TYPE_ABORT: - _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort); + _proto("Tx ABORT %%%u { %d } [re]", serial, conn->abort_code); break; case RXRPC_PACKET_TYPE_ACK: trace_rxrpc_tx_ack(NULL, serial, chan->last_seq, 0, @@ -135,13 +135,12 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, * pass a connection-level abort onto all calls on that connection */ static void rxrpc_abort_calls(struct rxrpc_connection *conn, - enum rxrpc_call_completion compl, - u32 abort_code, int error) + enum rxrpc_call_completion compl) { struct rxrpc_call *call; int i; - _enter("{%d},%x", conn->debug_id, abort_code); + _enter("{%d},%x", conn->debug_id, conn->abort_code); spin_lock(&conn->channel_lock); @@ -153,9 +152,11 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, if (compl == RXRPC_CALL_LOCALLY_ABORTED) trace_rxrpc_abort("CON", call->cid, call->call_id, 0, - abort_code, error); + conn->abort_code, + conn->error); if (rxrpc_set_call_completion(call, compl, - abort_code, error)) + conn->abort_code, + conn->error)) rxrpc_notify_socket(call); } } @@ -188,10 +189,12 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, return 0; } + conn->error = error; + conn->abort_code = abort_code; conn->state = RXRPC_CONN_LOCALLY_ABORTED; spin_unlock_bh(&conn->state_lock); - rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error); + rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED); msg.msg_name = &conn->params.peer->srx.transport; msg.msg_namelen = conn->params.peer->srx.transport_len; @@ -210,7 +213,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, whdr._rsvd = 0; whdr.serviceId = htons(conn->service_id); - word = htonl(conn->local_abort); + word = htonl(conn->abort_code); iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); @@ -221,7 +224,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, serial = atomic_inc_return(&conn->serial); whdr.serial = htonl(serial); - _proto("Tx CONN ABORT %%%u { %d }", serial, conn->local_abort); + _proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code); ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); if (ret < 0) { @@ -289,9 +292,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); + conn->error = -ECONNABORTED; + conn->abort_code = abort_code; conn->state = RXRPC_CONN_REMOTELY_ABORTED; - rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, - abort_code, -ECONNABORTED); + rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: From ba4cf1eefd54fbeadf90cfe8236643bed5681548 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 9 Oct 2018 11:21:27 +0300 Subject: [PATCH 0101/2608] net: ena: fix warning in rmmod caused by double iounmap [ Upstream commit d79c3888bde6581da7ff9f9d6f581900ecb5e632 ] Memory mapped with devm_ioremap is automatically freed when the driver is disconnected from the device. Therefore there is no need to explicitly call devm_iounmap. Fixes: 0857d92f71b6 ("net: ena: add missing unmap bars on device removal") Fixes: 411838e7b41c ("net: ena: fix rare kernel crash when bar memory remap fails") Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 60b3ee29d82c..08c9c99a8331 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3059,15 +3059,8 @@ err_rss_init: static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) { - int release_bars; + int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; - if (ena_dev->mem_bar) - devm_iounmap(&pdev->dev, ena_dev->mem_bar); - - if (ena_dev->reg_bar) - devm_iounmap(&pdev->dev, ena_dev->reg_bar); - - release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; pci_release_selected_regions(pdev, release_bars); } From 1e4f8315a5c8e914fb6365c930b45fffd73b02cd Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 9 Oct 2018 11:21:29 +0300 Subject: [PATCH 0102/2608] net: ena: fix NULL dereference due to untimely napi initialization [ Upstream commit 78a55d05def95144ca5fa9a64c49b2a0636a9866 ] napi poll functions should be initialized before running request_irq(), to handle a rare condition where there is a pending interrupt, causing the ISR to fire immediately while the poll function wasn't set yet, causing a NULL dereference. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 08c9c99a8331..3c7813f04962 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1571,8 +1571,6 @@ static int ena_up_complete(struct ena_adapter *adapter) if (rc) return rc; - ena_init_napi(adapter); - ena_change_mtu(adapter->netdev, adapter->netdev->mtu); ena_refill_all_rx_bufs(adapter); @@ -1726,6 +1724,13 @@ static int ena_up(struct ena_adapter *adapter) ena_setup_io_intr(adapter); + /* napi poll functions should be initialized before running + * request_irq(), to handle a rare condition where there is a pending + * interrupt, causing the ISR to fire immediately while the poll + * function wasn't set yet, causing a null dereference + */ + ena_init_napi(adapter); + rc = ena_request_io_irq(adapter); if (rc) goto err_req_irq; From 5309191ec0496e58167421042d42263bd6e13dac Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 11 Oct 2018 10:54:52 +0200 Subject: [PATCH 0103/2608] selftests: rtnetlink.sh explicitly requires bash. [ Upstream commit 3c718e677c2b35b449992adc36ecce883c467e98 ] the script rtnetlink.sh requires a bash-only features (sleep with sub-second precision). This may cause random test failure if the default shell is not bash. Address the above explicitly requiring bash as the script interpreter. Fixes: 33b01b7b4f19 ("selftests: add rtnetlink test script") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/net/rtnetlink.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 57b5ff576240..891130daac7c 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # # This test is for checking rtnetlink callpaths, and get as much coverage as possible. # From 3d69b85e14be7a6604a8820f80c1b712c65241d7 Mon Sep 17 00:00:00 2001 From: Khazhismel Kumykov Date: Fri, 12 Oct 2018 21:34:40 -0700 Subject: [PATCH 0104/2608] fs/fat/fatent.c: add cond_resched() to fat_count_free_clusters() [ Upstream commit ac081c3be3fae6d0cc3e1862507fca3862d30b67 ] On non-preempt kernels this loop can take a long time (more than 50 ticks) processing through entries. Link: http://lkml.kernel.org/r/20181010172623.57033-1-khazhy@google.com Signed-off-by: Khazhismel Kumykov Acked-by: OGAWA Hirofumi Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- fs/fat/fatent.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index a40f36b1b292..9635df94db7d 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -681,6 +681,7 @@ int fat_count_free_clusters(struct super_block *sb) if (ops->ent_get(&fatent) == FAT_ENT_FREE) free++; } while (fat_ent_next(sbi, &fatent)); + cond_resched(); } sbi->free_clusters = free; sbi->free_clus_valid = 1; From bd6df7a19559f9b52049f97c3188a7d1544e16df Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Thu, 18 Oct 2018 13:38:40 -0700 Subject: [PATCH 0105/2608] sch_netem: restore skb->dev after dequeuing from the rbtree Upstream commit bffa72cf7f9d ("net: sk_buff rbnode reorg") got backported as commit 6b921536f170 ("net: sk_buff rbnode reorg") into the v4.14.x-tree. However, the backport does not include the changes in sch_netem.c We need these, as otherwise the skb->dev pointer is not set when dequeueing from the netem rbtree, resulting in a panic: [ 15.427748] BUG: unable to handle kernel NULL pointer dereference at 00000000000000d0 [ 15.428863] IP: netif_skb_features+0x24/0x230 [ 15.429402] PGD 0 P4D 0 [ 15.429733] Oops: 0000 [#1] SMP PTI [ 15.430169] Modules linked in: [ 15.430614] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.14.77.mptcp #77 [ 15.431497] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.5.1 01/01/2011 [ 15.432568] task: ffff88042db19680 task.stack: ffffc90000070000 [ 15.433356] RIP: 0010:netif_skb_features+0x24/0x230 [ 15.433977] RSP: 0018:ffff88043fd83e70 EFLAGS: 00010286 [ 15.434665] RAX: ffff880429ad80c0 RBX: ffff88042bd0e400 RCX: ffff880429ad8000 [ 15.435585] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88042bd0e400 [ 15.436551] RBP: ffff88042bd0e400 R08: ffff88042a4b6c9c R09: 0000000000000001 [ 15.437485] R10: 0000000000000004 R11: 0000000000000000 R12: ffff88042c700000 [ 15.438393] R13: ffff88042c700000 R14: ffff88042a4b6c00 R15: ffff88042c6bb000 [ 15.439315] FS: 0000000000000000(0000) GS:ffff88043fd80000(0000) knlGS:0000000000000000 [ 15.440314] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 15.441084] CR2: 00000000000000d0 CR3: 000000042c374000 CR4: 00000000000006e0 [ 15.442016] Call Trace: [ 15.442333] [ 15.442596] validate_xmit_skb+0x17/0x270 [ 15.443134] validate_xmit_skb_list+0x38/0x60 [ 15.443698] sch_direct_xmit+0x102/0x190 [ 15.444198] __qdisc_run+0xe3/0x240 [ 15.444671] net_tx_action+0x121/0x140 [ 15.445177] __do_softirq+0xe2/0x224 [ 15.445654] irq_exit+0xbf/0xd0 [ 15.446072] smp_apic_timer_interrupt+0x5d/0x90 [ 15.446654] apic_timer_interrupt+0x7d/0x90 [ 15.447185] [ 15.447460] RIP: 0010:native_safe_halt+0x2/0x10 [ 15.447992] RSP: 0018:ffffc90000073f10 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff10 [ 15.449008] RAX: ffffffff816667d0 RBX: ffffffff820946b0 RCX: 0000000000000000 [ 15.449895] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 15.450768] RBP: ffffffff82026940 R08: 00000004e858e5e1 R09: ffff88042a4b6d58 [ 15.451643] R10: 0000000000000000 R11: 000000d0d56879bb R12: 0000000000000000 [ 15.452478] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 15.453340] ? __sched_text_end+0x2/0x2 [ 15.453835] default_idle+0xf/0x20 [ 15.454259] do_idle+0x170/0x200 [ 15.454653] cpu_startup_entry+0x14/0x20 [ 15.455142] secondary_startup_64+0xa5/0xb0 [ 15.455715] Code: 1f 84 00 00 00 00 00 55 53 48 89 fd 48 83 ec 08 8b 87 bc 00 00 00 48 8b 8f c0 00 00 00 0f b6 97 81 00 00 00 48 8b 77 10 48 01 c8 <48> 8b 9 [ 15.458138] RIP: netif_skb_features+0x24/0x230 RSP: ffff88043fd83e70 [ 15.458933] CR2: 00000000000000d0 [ 15.459352] ---[ end trace 083925903ae60570 ]--- Fixes: 6b921536f170 ("net: sk_buff rbnode reorg") Cc: Stephen Hemminger Cc: Eric Dumazet Cc: Soheil Hassas Yeganeh Cc: Wei Wang Cc: Willem de Bruijn Signed-off-by: Christoph Paasch Signed-off-by: Sasha Levin --- net/sched/sch_netem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 2a2ab6bfe5d8..3d325b840802 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -624,6 +624,10 @@ deliver: skb->next = NULL; skb->prev = NULL; skb->tstamp = netem_skb_cb(skb)->tstamp_save; + /* skb->dev shares skb->rbnode area, + * we need to restore its value. + */ + skb->dev = qdisc_dev(sch); #ifdef CONFIG_NET_CLS_ACT /* From 3642e35447495208665e30ffbc2466a2a488b99e Mon Sep 17 00:00:00 2001 From: Kimmo Rautkoski Date: Sat, 20 Oct 2018 22:44:42 +0300 Subject: [PATCH 0106/2608] mtd: spi-nor: Add support for is25wp series chips [ Upstream commit d616f81cdd2a21edfa90a595a4e9b143f5ba8414 ] Added support for is25wp032, is25wp064 and is25wp128. Signed-off-by: Kimmo Rautkoski Reviewed-by: Marek Vasut Signed-off-by: Boris Brezillon [ Adrian Bunk: Trivial adaption to changed context. ] Signed-off-by: Adrian Bunk Signed-off-by: Sasha Levin --- drivers/mtd/spi-nor/spi-nor.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 19c000722cbc..34ecc12ee3d9 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1005,6 +1005,12 @@ static const struct flash_info spi_nor_ids[] = { /* ISSI */ { "is25cd512", INFO(0x7f9d20, 0, 32 * 1024, 2, SECT_4K) }, + { "is25wp032", INFO(0x9d7016, 0, 64 * 1024, 64, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, + { "is25wp064", INFO(0x9d7017, 0, 64 * 1024, 128, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, + { "is25wp128", INFO(0x9d7018, 0, 64 * 1024, 256, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, /* Macronix */ { "mx25l512e", INFO(0xc22010, 0, 64 * 1024, 1, SECT_4K) }, From 871424f0c3cfcf51727cb56a0a5f1cbfc9e429c6 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 12 Dec 2017 17:15:02 +0100 Subject: [PATCH 0107/2608] kvm: x86: fix WARN due to uninitialized guest FPU state commit 5663d8f9bbe4bf15488f7351efb61ea20fa6de06 upstream ------------[ cut here ]------------ Bad FPU state detected at kvm_put_guest_fpu+0xd8/0x2d0 [kvm], reinitializing FPU registers. WARNING: CPU: 1 PID: 4594 at arch/x86/mm/extable.c:103 ex_handler_fprestore+0x88/0x90 CPU: 1 PID: 4594 Comm: qemu-system-x86 Tainted: G B OE 4.15.0-rc2+ #10 RIP: 0010:ex_handler_fprestore+0x88/0x90 Call Trace: fixup_exception+0x4e/0x60 do_general_protection+0xff/0x270 general_protection+0x22/0x30 RIP: 0010:kvm_put_guest_fpu+0xd8/0x2d0 [kvm] RSP: 0018:ffff8803d5627810 EFLAGS: 00010246 kvm_vcpu_reset+0x3b4/0x3c0 [kvm] kvm_apic_accept_events+0x1c0/0x240 [kvm] kvm_arch_vcpu_ioctl_run+0x1658/0x2fb0 [kvm] kvm_vcpu_ioctl+0x479/0x880 [kvm] do_vfs_ioctl+0x142/0x9a0 SyS_ioctl+0x74/0x80 do_syscall_64+0x15f/0x600 where kvm_put_guest_fpu is called without a prior kvm_load_guest_fpu. To fix it, move kvm_load_guest_fpu to the very beginning of kvm_arch_vcpu_ioctl_run. Cc: stable@vger.kernel.org Fixes: f775b13eedee2f7f3c6fdd4e90fb79090ce5d339 Signed-off-by: Peter Xu Signed-off-by: Paolo Bonzini Signed-off-by: Sudip Mukherjee Acked-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3856828ee1dc..8d688b213504 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7393,13 +7393,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - struct fpu *fpu = ¤t->thread.fpu; int r; - fpu__initialize(fpu); - kvm_sigset_activate(vcpu); + kvm_load_guest_fpu(vcpu); + if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { if (kvm_run->immediate_exit) { r = -EINTR; @@ -7440,6 +7439,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) r = vcpu_run(vcpu); out: + kvm_put_guest_fpu(vcpu); post_kvm_run_save(vcpu); kvm_sigset_deactivate(vcpu); From cdb44bdafc3a846aa785102f65a26762fbe32037 Mon Sep 17 00:00:00 2001 From: Chris Paterson Date: Thu, 14 Dec 2017 09:08:39 +0000 Subject: [PATCH 0108/2608] ARM: dts: r8a7790: Correct critical CPU temperature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fcab5651fae4258a993170b7aaf443adbd3d4d84 upstream The R-Car H2 hardware manual states that Tc = –40°C to +105°C. The thermal sensor has an accuracy of ±5°C and there can be a temperature difference of 1 or 2 degrees between Tjmax and the thermal sensor due to the location of the latter. This means that 95°C is a safer value to use. Fixes: a8b805f3606f7af7 ("ARM: dts: r8a7790: enable to use thermal-zone") Signed-off-by: Chris Paterson Reviewed-by: Geert Uytterhoeven Signed-off-by: Simon Horman Signed-off-by: Sudip Mukherjee Signed-off-by: Sasha Levin --- arch/arm/boot/dts/r8a7790.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi index 16358bf8d1db..97e8b9b0b750 100644 --- a/arch/arm/boot/dts/r8a7790.dtsi +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -153,7 +153,7 @@ trips { cpu-crit { - temperature = <115000>; + temperature = <95000>; hysteresis = <0>; type = "critical"; }; From e75194d294421704baf4c577e504cb9c92ec7ce2 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 21 May 2018 06:24:58 -0400 Subject: [PATCH 0109/2608] media: uvcvideo: Fix driver reference counting commit f9ffcb0a21e1fa8e64d09ed613d884e054ae8191 upstream kref_init initializes the reference count to 1, not 0. This additional reference is never released since the conversion to reference counters. As a result, uvc_delete is not called anymore when UVC cameras are disconnected. Fix this by adding an additional kref_put in uvc_disconnect and in the probe error path. This also allows to remove the temporary additional reference in uvc_unregister_video. Fixes: 9d15cd958c17 ("media: uvcvideo: Convert from using an atomic variable to a reference count") Signed-off-by: Philipp Zabel Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sudip Mukherjee Signed-off-by: Sasha Levin --- drivers/media/usb/uvc/uvc_driver.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 6d22b22cb35b..064d88299adc 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -1865,13 +1865,6 @@ static void uvc_unregister_video(struct uvc_device *dev) { struct uvc_streaming *stream; - /* Unregistering all video devices might result in uvc_delete() being - * called from inside the loop if there's no open file handle. To avoid - * that, increment the refcount before iterating over the streams and - * decrement it when done. - */ - kref_get(&dev->ref); - list_for_each_entry(stream, &dev->streams, list) { if (!video_is_registered(&stream->vdev)) continue; @@ -1880,8 +1873,6 @@ static void uvc_unregister_video(struct uvc_device *dev) uvc_debugfs_cleanup_stream(stream); } - - kref_put(&dev->ref, uvc_delete); } static int uvc_register_video(struct uvc_device *dev, @@ -2129,6 +2120,7 @@ static int uvc_probe(struct usb_interface *intf, error: uvc_unregister_video(dev); + kref_put(&dev->ref, uvc_delete); return -ENODEV; } @@ -2146,6 +2138,7 @@ static void uvc_disconnect(struct usb_interface *intf) return; uvc_unregister_video(dev); + kref_put(&dev->ref, uvc_delete); } static int uvc_suspend(struct usb_interface *intf, pm_message_t message) From ee66ad5896aef9b67b7b1e3db6fa9430b907fa58 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 6 Nov 2017 10:47:14 +0100 Subject: [PATCH 0110/2608] ALSA: usx2y: Fix invalid stream URBs commit f9a1c372299fed53d4b72bb601f7f3bfe6f9999c upstream The us122l driver creates URBs per the fixed endpoints, and this may end up with URBs with inconsistent pipes when a fuzzer or a malicious program deals with the manipulated endpoints. It ends up with a kernel warning like: usb 1-1: BOGUS urb xfer, pipe 0 != type 3 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 24 at drivers/usb/core/urb.c:471 usb_submit_urb+0x113e/0x1400 Call Trace: usb_stream_start+0x48a/0x9f0 sound/usb/usx2y/usb_stream.c:690 us122l_start+0x116/0x290 sound/usb/usx2y/us122l.c:365 us122l_create_card sound/usb/usx2y/us122l.c:502 us122l_usb_probe sound/usb/usx2y/us122l.c:588 .... For avoiding the bad access, this patch adds a few sanity checks of the validity of created URBs like previous similar fixes using the new usb_urb_ep_type_check() helper function. Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Takashi Iwai Signed-off-by: Sudip Mukherjee Signed-off-by: Sasha Levin --- sound/usb/usx2y/usb_stream.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/sound/usb/usx2y/usb_stream.c b/sound/usb/usx2y/usb_stream.c index e229abd21652..b0f8979ff2d2 100644 --- a/sound/usb/usx2y/usb_stream.c +++ b/sound/usb/usx2y/usb_stream.c @@ -56,7 +56,7 @@ check: lb, s->period_size); } -static void init_pipe_urbs(struct usb_stream_kernel *sk, unsigned use_packsize, +static int init_pipe_urbs(struct usb_stream_kernel *sk, unsigned use_packsize, struct urb **urbs, char *transfer, struct usb_device *dev, int pipe) { @@ -77,6 +77,8 @@ static void init_pipe_urbs(struct usb_stream_kernel *sk, unsigned use_packsize, urb->interval = 1; if (usb_pipeout(pipe)) continue; + if (usb_urb_ep_type_check(urb)) + return -EINVAL; urb->transfer_buffer_length = transfer_length; desc = urb->iso_frame_desc; @@ -87,9 +89,11 @@ static void init_pipe_urbs(struct usb_stream_kernel *sk, unsigned use_packsize, desc[p].length = maxpacket; } } + + return 0; } -static void init_urbs(struct usb_stream_kernel *sk, unsigned use_packsize, +static int init_urbs(struct usb_stream_kernel *sk, unsigned use_packsize, struct usb_device *dev, int in_pipe, int out_pipe) { struct usb_stream *s = sk->s; @@ -103,9 +107,12 @@ static void init_urbs(struct usb_stream_kernel *sk, unsigned use_packsize, sk->outurb[u] = usb_alloc_urb(sk->n_o_ps, GFP_KERNEL); } - init_pipe_urbs(sk, use_packsize, sk->inurb, indata, dev, in_pipe); - init_pipe_urbs(sk, use_packsize, sk->outurb, sk->write_page, dev, - out_pipe); + if (init_pipe_urbs(sk, use_packsize, sk->inurb, indata, dev, in_pipe) || + init_pipe_urbs(sk, use_packsize, sk->outurb, sk->write_page, dev, + out_pipe)) + return -EINVAL; + + return 0; } @@ -226,7 +233,11 @@ struct usb_stream *usb_stream_new(struct usb_stream_kernel *sk, else sk->freqn = get_usb_high_speed_rate(sample_rate); - init_urbs(sk, use_packsize, dev, in_pipe, out_pipe); + if (init_urbs(sk, use_packsize, dev, in_pipe, out_pipe) < 0) { + usb_stream_free(sk); + return NULL; + } + sk->s->state = usb_stream_stopped; out: return sk->s; From ac7c2bb59870b8fd41c09502e73e72fcb63b85c3 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 31 Oct 2018 11:05:19 -0400 Subject: [PATCH 0111/2608] Revert "netfilter: ipv6: nf_defrag: drop skb dst before queueing" This reverts commit 28c74ff85efd192aeca9005499ca50c24d795f61. From Florian Westphal : It causes kernel crash for locally generated ipv6 fragments when netfilter ipv6 defragmentation is used. The faulty commit is not essential for -stable, it only delays netns teardown for longer than needed when that netns still has ipv6 frags queued. Much better than crash :-/ Signed-off-by: Sasha Levin --- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 2ed8536e10b6..611d406c4656 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -598,8 +598,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) fq->q.meat == fq->q.len && nf_ct_frag6_reasm(fq, skb, dev)) ret = 0; - else - skb_dst_drop(skb); out_unlock: spin_unlock_bh(&fq->q.lock); From 2980235974cdb243b06d8eb9d12a3fce44f401a1 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 5 Jul 2018 15:15:27 +0200 Subject: [PATCH 0112/2608] perf tools: Disable parallelism for 'make clean' [ Upstream commit da15fc2fa9c07b23db8f5e479bd8a9f0d741ca07 ] The Yocto build system does a 'make clean' when rebuilding due to changed dependencies, and that consistently fails for me (causing the whole BSP build to fail) with errors such as | find: '[...]/perf/1.0-r9/perf-1.0/plugin_mac80211.so': No such file or directory | find: '[...]/perf/1.0-r9/perf-1.0/plugin_mac80211.so': No such file or directory | find: find: '[...]/perf/1.0-r9/perf-1.0/libtraceevent.a''[...]/perf/1.0-r9/perf-1.0/libtraceevent.a': No such file or directory: No such file or directory | [...] | find: cannot delete '/mnt/xfs/devel/pil/yocto/tmp-glibc/work/wandboard-oe-linux-gnueabi/perf/1.0-r9/perf-1.0/util/.pstack.o.cmd': No such file or directory Apparently (despite the comment), 'make clean' ends up launching multiple sub-makes that all want to remove the same things - perhaps this only happens in combination with a O=... parameter. In any case, we don't lose much by explicitly disabling the parallelism for the clean target, and it makes automated builds much more reliable. Signed-off-by: Rasmus Villemoes Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180705131527.19749-1-linux@rasmusvillemoes.dk Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 225454416ed5..7902a5681fc8 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -84,10 +84,10 @@ endif # has_clean endif # MAKECMDGOALS # -# The clean target is not really parallel, don't print the jobs info: +# Explicitly disable parallelism for the clean target. # clean: - $(make) + $(make) -j1 # # The build-test target is not really parallel, don't print the jobs info, From b6534b3e08e8e10e871b53ce5a1e9ce4bcb94907 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 10 Apr 2018 13:33:12 +0100 Subject: [PATCH 0113/2608] drm/i915/gvt: fix memory leak of a cmd_entry struct on error exit path commit ffdf16edfbbe77f5f5c3c87fe8d7387ecd16241b upstream The error exit path when a duplicate is found does not kfree and cmd_entry struct and hence there is a small memory leak. Fix this by kfree'ing it. Detected by CoverityScan, CID#1370198 ("Resource Leak") Fixes: be1da7070aea ("drm/i915/gvt: vGPU command scanner") Signed-off-by: Colin Ian King Reviewed-by: Chris Wilson Signed-off-by: Zhenyu Wang Signed-off-by: Sudip Mukherjee Acked-by: Zhenyu Wang Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index d4726a3358a4..d6993c2707d1 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2802,6 +2802,7 @@ static int init_cmd_table(struct intel_gvt *gvt) if (info) { gvt_err("%s %s duplicated\n", e->info->name, info->name); + kfree(e); return -EEXIST; } From 1bd68d5612af83ed392f066861eb5033bfcad28c Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 26 Oct 2018 10:28:43 +0800 Subject: [PATCH 0114/2608] bridge: do not add port to router list when receives query with source 0.0.0.0 commit 5a2de63fd1a59c30c02526d427bc014b98adf508 upstream. Based on RFC 4541, 2.1.1. IGMP Forwarding Rules The switch supporting IGMP snooping must maintain a list of multicast routers and the ports on which they are attached. This list can be constructed in any combination of the following ways: a) This list should be built by the snooping switch sending Multicast Router Solicitation messages as described in IGMP Multicast Router Discovery [MRDISC]. It may also snoop Multicast Router Advertisement messages sent by and to other nodes. b) The arrival port for IGMP Queries (sent by multicast routers) where the source address is not 0.0.0.0. We should not add the port to router list when receives query with source 0.0.0.0. Reported-by: Ying Xu Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Acked-by: Roopa Prabhu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_multicast.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 8dc5c8d69bcd..efbcc811297c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1390,7 +1390,15 @@ static void br_multicast_query_received(struct net_bridge *br, return; br_multicast_update_query_timer(br, query, max_delay); - br_multicast_mark_router(br, port); + + /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules, + * the arrival port for IGMP Queries where the source address + * is 0.0.0.0 should not be added to router port list. + */ + if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) || + (saddr->proto == htons(ETH_P_IPV6) && + !ipv6_addr_any(&saddr->u.ip6))) + br_multicast_mark_router(br, port); } static int br_ip4_multicast_query(struct net_bridge *br, From c3cf86dac56dbacab38d27c072b492cbbe89f4f1 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 27 Oct 2018 12:07:47 +0300 Subject: [PATCH 0115/2608] net: bridge: remove ipv6 zero address check in mcast queries commit 0fe5119e267f3e3d8ac206895f5922195ec55a8a upstream. Recently a check was added which prevents marking of routers with zero source address, but for IPv6 that cannot happen as the relevant RFCs actually forbid such packets: RFC 2710 (MLDv1): "To be valid, the Query message MUST come from a link-local IPv6 Source Address, be at least 24 octets long, and have a correct MLD checksum." Same goes for RFC 3810. And also it can be seen as a requirement in ipv6_mc_check_mld_query() which is used by the bridge to validate the message before processing it. Thus any queries with :: source address won't be processed anyway. So just remove the check for zero IPv6 source address from the query processing function. Fixes: 5a2de63fd1a5 ("bridge: do not add port to router list when receives query with source 0.0.0.0") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Cc: Hangbin Liu Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_multicast.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index efbcc811297c..a813dfe2dc2c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1396,8 +1396,7 @@ static void br_multicast_query_received(struct net_bridge *br, * is 0.0.0.0 should not be added to router port list. */ if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) || - (saddr->proto == htons(ETH_P_IPV6) && - !ipv6_addr_any(&saddr->u.ip6))) + saddr->proto == htons(ETH_P_IPV6)) br_multicast_mark_router(br, port); } From 5d4c5861211f62be25e36254981521c1e5030b19 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Oct 2018 18:58:53 -0700 Subject: [PATCH 0116/2608] ipv6: mcast: fix a use-after-free in inet6_mc_check [ Upstream commit dc012f3628eaecfb5ba68404a5c30ef501daf63d ] syzbot found a use-after-free in inet6_mc_check [1] The problem here is that inet6_mc_check() uses rcu and read_lock(&iml->sflock) So the fact that ip6_mc_leave_src() is called under RTNL and the socket lock does not help us, we need to acquire iml->sflock in write mode. In the future, we should convert all this stuff to RCU. [1] BUG: KASAN: use-after-free in ipv6_addr_equal include/net/ipv6.h:521 [inline] BUG: KASAN: use-after-free in inet6_mc_check+0xae7/0xb40 net/ipv6/mcast.c:649 Read of size 8 at addr ffff8801ce7f2510 by task syz-executor0/22432 CPU: 1 PID: 22432 Comm: syz-executor0 Not tainted 4.19.0-rc7+ #280 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 ipv6_addr_equal include/net/ipv6.h:521 [inline] inet6_mc_check+0xae7/0xb40 net/ipv6/mcast.c:649 __raw_v6_lookup+0x320/0x3f0 net/ipv6/raw.c:98 ipv6_raw_deliver net/ipv6/raw.c:183 [inline] raw6_local_deliver+0x3d3/0xcb0 net/ipv6/raw.c:240 ip6_input_finish+0x467/0x1aa0 net/ipv6/ip6_input.c:345 NF_HOOK include/linux/netfilter.h:289 [inline] ip6_input+0xe9/0x600 net/ipv6/ip6_input.c:426 ip6_mc_input+0x48a/0xd20 net/ipv6/ip6_input.c:503 dst_input include/net/dst.h:450 [inline] ip6_rcv_finish+0x17a/0x330 net/ipv6/ip6_input.c:76 NF_HOOK include/linux/netfilter.h:289 [inline] ipv6_rcv+0x120/0x640 net/ipv6/ip6_input.c:271 __netif_receive_skb_one_core+0x14d/0x200 net/core/dev.c:4913 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:5023 netif_receive_skb_internal+0x12c/0x620 net/core/dev.c:5126 napi_frags_finish net/core/dev.c:5664 [inline] napi_gro_frags+0x75a/0xc90 net/core/dev.c:5737 tun_get_user+0x3189/0x4250 drivers/net/tun.c:1923 tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:1968 call_write_iter include/linux/fs.h:1808 [inline] do_iter_readv_writev+0x8b0/0xa80 fs/read_write.c:680 do_iter_write+0x185/0x5f0 fs/read_write.c:959 vfs_writev+0x1f1/0x360 fs/read_write.c:1004 do_writev+0x11a/0x310 fs/read_write.c:1039 __do_sys_writev fs/read_write.c:1112 [inline] __se_sys_writev fs/read_write.c:1109 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1109 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457421 Code: 75 14 b8 14 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 34 b5 fb ff c3 48 83 ec 08 e8 1a 2d 00 00 48 89 04 24 b8 14 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 63 2d 00 00 48 89 d0 48 83 c4 08 48 3d 01 RSP: 002b:00007f2d30ecaba0 EFLAGS: 00000293 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 000000000000003e RCX: 0000000000457421 RDX: 0000000000000001 RSI: 00007f2d30ecabf0 RDI: 00000000000000f0 RBP: 0000000020000500 R08: 00000000000000f0 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000293 R12: 00007f2d30ecb6d4 R13: 00000000004c4890 R14: 00000000004d7b90 R15: 00000000ffffffff Allocated by task 22437: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 __do_kmalloc mm/slab.c:3718 [inline] __kmalloc+0x14e/0x760 mm/slab.c:3727 kmalloc include/linux/slab.h:518 [inline] sock_kmalloc+0x15a/0x1f0 net/core/sock.c:1983 ip6_mc_source+0x14dd/0x1960 net/ipv6/mcast.c:427 do_ipv6_setsockopt.isra.9+0x3afb/0x45d0 net/ipv6/ipv6_sockglue.c:743 ipv6_setsockopt+0xbd/0x170 net/ipv6/ipv6_sockglue.c:933 rawv6_setsockopt+0x59/0x140 net/ipv6/raw.c:1069 sock_common_setsockopt+0x9a/0xe0 net/core/sock.c:3038 __sys_setsockopt+0x1ba/0x3c0 net/socket.c:1902 __do_sys_setsockopt net/socket.c:1913 [inline] __se_sys_setsockopt net/socket.c:1910 [inline] __x64_sys_setsockopt+0xbe/0x150 net/socket.c:1910 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 22430: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kfree+0xcf/0x230 mm/slab.c:3813 __sock_kfree_s net/core/sock.c:2004 [inline] sock_kfree_s+0x29/0x60 net/core/sock.c:2010 ip6_mc_leave_src+0x11a/0x1d0 net/ipv6/mcast.c:2448 __ipv6_sock_mc_close+0x20b/0x4e0 net/ipv6/mcast.c:310 ipv6_sock_mc_close+0x158/0x1d0 net/ipv6/mcast.c:328 inet6_release+0x40/0x70 net/ipv6/af_inet6.c:452 __sock_release+0xd7/0x250 net/socket.c:579 sock_close+0x19/0x20 net/socket.c:1141 __fput+0x385/0xa30 fs/file_table.c:278 ____fput+0x15/0x20 fs/file_table.c:309 task_work_run+0x1e8/0x2a0 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:193 [inline] exit_to_usermode_loop+0x318/0x380 arch/x86/entry/common.c:166 prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline] syscall_return_slowpath arch/x86/entry/common.c:268 [inline] do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8801ce7f2500 which belongs to the cache kmalloc-192 of size 192 The buggy address is located 16 bytes inside of 192-byte region [ffff8801ce7f2500, ffff8801ce7f25c0) The buggy address belongs to the page: page:ffffea000739fc80 count:1 mapcount:0 mapping:ffff8801da800040 index:0x0 flags: 0x2fffc0000000100(slab) raw: 02fffc0000000100 ffffea0006f6e548 ffffea000737b948 ffff8801da800040 raw: 0000000000000000 ffff8801ce7f2000 0000000100000010 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801ce7f2400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801ce7f2480: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc >ffff8801ce7f2500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8801ce7f2580: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff8801ce7f2600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/mcast.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d112762b4cb8..bd269e78272a 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2412,17 +2412,17 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, { int err; - /* callers have the socket lock and rtnl lock - * so no other readers or writers of iml or its sflist - */ + write_lock_bh(&iml->sflock); if (!iml->sflist) { /* any-source empty exclude case */ - return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); + err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); + } else { + err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, + iml->sflist->sl_count, iml->sflist->sl_addr, 0); + sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); + iml->sflist = NULL; } - err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, - iml->sflist->sl_count, iml->sflist->sl_addr, 0); - sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); - iml->sflist = NULL; + write_unlock_bh(&iml->sflock); return err; } From 1e42e97af9832848bbeba5c1f7f35dbcffec5642 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 24 Oct 2018 14:37:21 +0200 Subject: [PATCH 0117/2608] ipv6/ndisc: Preserve IPv6 control buffer if protocol error handlers are called [ Upstream commit ee1abcf689353f36d9322231b4320926096bdee0 ] Commit a61bbcf28a8c ("[NET]: Store skb->timestamp as offset to a base timestamp") introduces a neighbour control buffer and zeroes it out in ndisc_rcv(), as ndisc_recv_ns() uses it. Commit f2776ff04722 ("[IPV6]: Fix address/interface handling in UDP and DCCP, according to the scoping architecture.") introduces the usage of the IPv6 control buffer in protocol error handlers (e.g. inet6_iif() in present-day __udp6_lib_err()). Now, with commit b94f1c0904da ("ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect()."), we call protocol error handlers from ndisc_redirect_rcv(), after the control buffer is already stolen and some parts are already zeroed out. This implies that inet6_iif() on this path will always return zero. This gives unexpected results on UDP socket lookup in __udp6_lib_err(), as we might actually need to match sockets for a given interface. Instead of always claiming the control buffer in ndisc_rcv(), do that only when needed. Fixes: b94f1c0904da ("ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect().") Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ndisc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 528218460bc5..5f80e57e93ed 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1722,10 +1722,9 @@ int ndisc_rcv(struct sk_buff *skb) return 0; } - memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); - switch (msg->icmph.icmp6_type) { case NDISC_NEIGHBOUR_SOLICITATION: + memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); ndisc_recv_ns(skb); break; From 6bb1381cc8650c9b86e41ea639db4dfaed750ad4 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 11 Oct 2018 11:15:13 -0700 Subject: [PATCH 0118/2608] llc: set SOCK_RCU_FREE in llc_sap_add_socket() [ Upstream commit 5a8e7aea953bdb6d4da13aff6f1e7f9c62023499 ] WHen an llc sock is added into the sk_laddr_hash of an llc_sap, it is not marked with SOCK_RCU_FREE. This causes that the sock could be freed while it is still being read by __llc_lookup_established() with RCU read lock. sock is refcounted, but with RCU read lock, nothing prevents the readers getting a zero refcnt. Fix it by setting SOCK_RCU_FREE in llc_sap_add_socket(). Reported-by: syzbot+11e05f04c15e03be5254@syzkaller.appspotmail.com Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/llc/llc_conn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index b084fd19ad32..56c3fb5cc805 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -734,6 +734,7 @@ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk) llc_sk(sk)->sap = sap; spin_lock_bh(&sap->sk_lock); + sock_set_flag(sk, SOCK_RCU_FREE); sap->sk_count++; sk_nulls_add_node_rcu(sk, laddr_hb); hlist_add_head(&llc->dev_hash_node, dev_hb); From ff96a397744084e6eb13ab4afe47add5c03435bf Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Mon, 15 Oct 2018 05:19:00 +0000 Subject: [PATCH 0119/2608] net: fec: don't dump RX FIFO register when not available [ Upstream commit ec20a63aa8b8ec3223fb25cdb2a49f9f9dfda88c ] Commit db65f35f50e0 ("net: fec: add support of ethtool get_regs") introduce ethool "--register-dump" interface to dump all FEC registers. But not all silicon implementations of the Freescale FEC hardware module have the FRBR (FIFO Receive Bound Register) and FRSR (FIFO Receive Start Register) register, so we should not be trying to dump them on those that don't. To fix it we create a quirk flag, FEC_QUIRK_HAS_RFREG, and check it before dump those RX FIFO registers. Signed-off-by: Fugang Duan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/fec.h | 4 ++++ drivers/net/ethernet/freescale/fec_main.c | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 44720f83af27..4d4f16ad88c3 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -451,6 +451,10 @@ struct bufdesc_ex { * initialisation. */ #define FEC_QUIRK_MIB_CLEAR (1 << 15) +/* Only i.MX25/i.MX27/i.MX28 controller supports FRBR,FRSR registers, + * those FIFO receive registers are resolved in other platforms. + */ +#define FEC_QUIRK_HAS_FRREG (1 << 16) struct bufdesc_prop { int qid; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 8bfa6ef826a9..ce55c8f7f33a 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -89,14 +89,16 @@ static struct platform_device_id fec_devtype[] = { .driver_data = 0, }, { .name = "imx25-fec", - .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR, + .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR | + FEC_QUIRK_HAS_FRREG, }, { .name = "imx27-fec", - .driver_data = FEC_QUIRK_MIB_CLEAR, + .driver_data = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG, }, { .name = "imx28-fec", .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME | - FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC, + FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC | + FEC_QUIRK_HAS_FRREG, }, { .name = "imx6q-fec", .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | @@ -2166,7 +2168,13 @@ static void fec_enet_get_regs(struct net_device *ndev, memset(buf, 0, regs->len); for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) { - off = fec_enet_register_offset[i] / 4; + off = fec_enet_register_offset[i]; + + if ((off == FEC_R_BOUND || off == FEC_R_FSTART) && + !(fep->quirks & FEC_QUIRK_HAS_FRREG)) + continue; + + off >>= 2; buf[off] = readl(&theregs[off]); } } From e581e28f9dcbb461751df6eeb6d24335f94e9f11 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 19 Oct 2018 10:00:19 -0700 Subject: [PATCH 0120/2608] net/ipv6: Fix index counter for unicast addresses in in6_dump_addrs [ Upstream commit 4ba4c566ba8448a05e6257e0b98a21f1a0d55315 ] The loop wants to skip previously dumped addresses, so loops until current index >= saved index. If the message fills it wants to save the index for the next address to dump - ie., the one that did not fit in the current message. Currently, it is incrementing the index counter before comparing to the saved index, and then the saved index is off by 1 - it assumes the current address is going to fit in the message. Change the index handling to increment only after a succesful dump. Fixes: 502a2ffd7376a ("ipv6: convert idev_list to list macros") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 569f7c3f6b95..9ac6f6232294 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4793,8 +4793,8 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, /* unicast address incl. temp addr */ list_for_each_entry(ifa, &idev->addr_list, if_list) { - if (++ip_idx < s_ip_idx) - continue; + if (ip_idx < s_ip_idx) + goto next; err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -4803,6 +4803,8 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (err < 0) break; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +next: + ip_idx++; } break; } From 29d871195e9e63ff567fe3fb02b01be60e015877 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 26 Oct 2018 15:51:06 -0700 Subject: [PATCH 0121/2608] net: sched: gred: pass the right attribute to gred_change_table_def() [ Upstream commit 38b4f18d56372e1e21771ab7b0357b853330186c ] gred_change_table_def() takes a pointer to TCA_GRED_DPS attribute, and expects it will be able to interpret its contents as struct tc_gred_sopt. Pass the correct gred attribute, instead of TCA_OPTIONS. This bug meant the table definition could never be changed after Qdisc was initialized (unless whatever TCA_OPTIONS contained both passed netlink validation and was a valid struct tc_gred_sopt...). Old behaviour: $ ip link add type dummy $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 RTNETLINK answers: Invalid argument Now: $ ip link add type dummy $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 Fixes: f62d6b936df5 ("[PKT_SCHED]: GRED: Use central VQ change procedure") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_gred.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index bc30f9186ac6..d3105ee8decf 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -411,7 +411,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) { if (tb[TCA_GRED_LIMIT] != NULL) sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); - return gred_change_table_def(sch, opt); + return gred_change_table_def(sch, tb[TCA_GRED_DPS]); } if (tb[TCA_GRED_PARMS] == NULL || From 7d58456872c4a65926541e8aa983056bd91f8ed6 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Thu, 18 Oct 2018 09:36:46 -0500 Subject: [PATCH 0122/2608] net: socket: fix a missing-check bug [ Upstream commit b6168562c8ce2bd5a30e213021650422e08764dc ] In ethtool_ioctl(), the ioctl command 'ethcmd' is checked through a switch statement to see whether it is necessary to pre-process the ethtool structure, because, as mentioned in the comment, the structure ethtool_rxnfc is defined with padding. If yes, a user-space buffer 'rxnfc' is allocated through compat_alloc_user_space(). One thing to note here is that, if 'ethcmd' is ETHTOOL_GRXCLSRLALL, the size of the buffer 'rxnfc' is partially determined by 'rule_cnt', which is actually acquired from the user-space buffer 'compat_rxnfc', i.e., 'compat_rxnfc->rule_cnt', through get_user(). After 'rxnfc' is allocated, the data in the original user-space buffer 'compat_rxnfc' is then copied to 'rxnfc' through copy_in_user(), including the 'rule_cnt' field. However, after this copy, no check is re-enforced on 'rxnfc->rule_cnt'. So it is possible that a malicious user race to change the value in the 'compat_rxnfc->rule_cnt' between these two copies. Through this way, the attacker can bypass the previous check on 'rule_cnt' and inject malicious data. This can cause undefined behavior of the kernel and introduce potential security risk. This patch avoids the above issue via copying the value acquired by get_user() to 'rxnfc->rule_cn', if 'ethcmd' is ETHTOOL_GRXCLSRLALL. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/socket.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/socket.c b/net/socket.c index d27922639a20..a401578f3f28 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2879,9 +2879,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) copy_in_user(&rxnfc->fs.ring_cookie, &compat_rxnfc->fs.ring_cookie, (void __user *)(&rxnfc->fs.location + 1) - - (void __user *)&rxnfc->fs.ring_cookie) || - copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt, - sizeof(rxnfc->rule_cnt))) + (void __user *)&rxnfc->fs.ring_cookie)) + return -EFAULT; + if (ethcmd == ETHTOOL_GRXCLSRLALL) { + if (put_user(rule_cnt, &rxnfc->rule_cnt)) + return -EFAULT; + } else if (copy_in_user(&rxnfc->rule_cnt, + &compat_rxnfc->rule_cnt, + sizeof(rxnfc->rule_cnt))) return -EFAULT; } From 441d0e7540824732f25f38cc270343d34427bebe Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 31 Oct 2018 16:08:10 +0100 Subject: [PATCH 0123/2608] net: stmmac: Fix stmmac_mdio_reset() when building stmmac as modules [ Upstream commit 30549aab146ccb1275230c3b4b4bc6b4181fd54e ] When building stmmac, it is only possible to select CONFIG_DWMAC_GENERIC, or any of the glue drivers, when CONFIG_STMMAC_PLATFORM is set. The only exception is CONFIG_STMMAC_PCI. When calling of_mdiobus_register(), it will call our ->reset() callback, which is set to stmmac_mdio_reset(). Most of the code in stmmac_mdio_reset() is protected by a "#if defined(CONFIG_STMMAC_PLATFORM)", which will evaluate to false when CONFIG_STMMAC_PLATFORM=m. Because of this, the phy reset gpio will only be pulled when stmmac is built as built-in, but not when built as modules. Fix this by using "#if IS_ENABLED()" instead of "#if defined()". Signed-off-by: Niklas Cassel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index f5f37bfa1d58..ff2eeddf588e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -133,7 +133,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, */ int stmmac_mdio_reset(struct mii_bus *bus) { -#if defined(CONFIG_STMMAC_PLATFORM) +#if IS_ENABLED(CONFIG_STMMAC_PLATFORM) struct net_device *ndev = bus->priv; struct stmmac_priv *priv = netdev_priv(ndev); unsigned int mii_address = priv->hw->mii.addr; From 0ecebdfb2e3a8a22ec157c5d388a6ac3a557919b Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Tue, 23 Oct 2018 16:04:31 -0600 Subject: [PATCH 0124/2608] net: udp: fix handling of CHECKSUM_COMPLETE packets [ Upstream commit db4f1be3ca9b0ef7330763d07bf4ace83ad6f913 ] Current handling of CHECKSUM_COMPLETE packets by the UDP stack is incorrect for any packet that has an incorrect checksum value. udp4/6_csum_init() will both make a call to __skb_checksum_validate_complete() to initialize/validate the csum field when receiving a CHECKSUM_COMPLETE packet. When this packet fails validation, skb->csum will be overwritten with the pseudoheader checksum so the packet can be fully validated by software, but the skb->ip_summed value will be left as CHECKSUM_COMPLETE so that way the stack can later warn the user about their hardware spewing bad checksums. Unfortunately, leaving the SKB in this state can cause problems later on in the checksum calculation. Since the the packet is still marked as CHECKSUM_COMPLETE, udp_csum_pull_header() will SUBTRACT the checksum of the UDP header from skb->csum instead of adding it, leaving us with a garbage value in that field. Once we try to copy the packet to userspace in the udp4/6_recvmsg(), we'll make a call to skb_copy_and_csum_datagram_msg() to checksum the packet data and add it in the garbage skb->csum value to perform our final validation check. Since the value we're validating is not the proper checksum, it's possible that the folded value could come out to 0, causing us not to drop the packet. Instead, we believe that the packet was checksummed incorrectly by hardware since skb->ip_summed is still CHECKSUM_COMPLETE, and we attempt to warn the user with netdev_rx_csum_fault(skb->dev); Unfortunately, since this is the UDP path, skb->dev has been overwritten by skb->dev_scratch and is no longer a valid pointer, so we end up reading invalid memory. This patch addresses this problem in two ways: 1) Do not use the dev pointer when calling netdev_rx_csum_fault() from skb_copy_and_csum_datagram_msg(). Since this gets called from the UDP path where skb->dev has been overwritten, we have no way of knowing if the pointer is still valid. Also for the sake of consistency with the other uses of netdev_rx_csum_fault(), don't attempt to call it if the packet was checksummed by software. 2) Add better CHECKSUM_COMPLETE handling to udp4/6_csum_init(). If we receive a packet that's CHECKSUM_COMPLETE that fails verification (i.e. skb->csum_valid == 0), check who performed the calculation. It's possible that the checksum was done in software by the network stack earlier (such as Netfilter's CONNTRACK module), and if that says the checksum is bad, we can drop the packet immediately instead of waiting until we try and copy it to userspace. Otherwise, we need to mark the SKB as CHECKSUM_NONE, since the skb->csum field no longer contains the full packet checksum after the call to __skb_checksum_validate_complete(). Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Fixes: c84d949057ca ("udp: copy skb->truesize in the first cache line") Cc: Sam Kumar Cc: Eric Dumazet Signed-off-by: Sean Tranchetti Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/datagram.c | 5 +++-- net/ipv4/udp.c | 20 ++++++++++++++++++-- net/ipv6/ip6_checksum.c | 20 ++++++++++++++++++-- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index 3964c108b169..d8a0774f7608 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -810,8 +810,9 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, return -EINVAL; } - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) - netdev_rx_csum_fault(skb->dev); + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) + netdev_rx_csum_fault(NULL); } return 0; fault: diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index dc0ec227b9d2..b89920c0f226 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2045,8 +2045,24 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, /* Note, we are only interested in != 0 or == 0, thus the * force to int. */ - return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, - inet_compute_pseudo); + err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + inet_compute_pseudo); + if (err) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) { + /* If SW calculated the value, we know it's bad */ + if (skb->csum_complete_sw) + return 1; + + /* HW says the value is bad. Let's validate that. + * skb->csum is no longer the full packet checksum, + * so don't treat it as such. + */ + skb_checksum_complete_unset(skb); + } + + return 0; } /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index 547515e8450a..377717045f8f 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -88,8 +88,24 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) * Note, we are only interested in != 0 or == 0, thus the * force to int. */ - return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, - ip6_compute_pseudo); + err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + ip6_compute_pseudo); + if (err) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) { + /* If SW calculated the value, we know it's bad */ + if (skb->csum_complete_sw) + return 1; + + /* HW says the value is bad. Let's validate that. + * skb->csum is no longer the full packet checksum, + * so don't treat is as such. + */ + skb_checksum_complete_unset(skb); + } + + return 0; } EXPORT_SYMBOL(udp6_csum_init); From aa8d067c8547ad146ef18baecabc032618f7efb6 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 18 Oct 2018 19:56:01 +0200 Subject: [PATCH 0125/2608] r8169: fix NAPI handling under high load [ Upstream commit 6b839b6cf9eada30b086effb51e5d6076bafc761 ] rtl_rx() and rtl_tx() are called only if the respective bits are set in the interrupt status register. Under high load NAPI may not be able to process all data (work_done == budget) and it will schedule subsequent calls to the poll callback. rtl_ack_events() however resets the bits in the interrupt status register, therefore subsequent calls to rtl8169_poll() won't call rtl_rx() and rtl_tx() - chip interrupts are still disabled. Fix this by calling rtl_rx() and rtl_tx() independent of the bits set in the interrupt status register. Both functions will detect if there's nothing to do for them. Fixes: da78dbff2e05 ("r8169: remove work from irq handler.") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index f7e540eeb877..1b61ce310132 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -7579,17 +7579,15 @@ static int rtl8169_poll(struct napi_struct *napi, int budget) struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi); struct net_device *dev = tp->dev; u16 enable_mask = RTL_EVENT_NAPI | tp->event_slow; - int work_done= 0; + int work_done; u16 status; status = rtl_get_events(tp); rtl_ack_events(tp, status & ~tp->event_slow); - if (status & RTL_EVENT_NAPI_RX) - work_done = rtl_rx(dev, tp, (u32) budget); + work_done = rtl_rx(dev, tp, (u32) budget); - if (status & RTL_EVENT_NAPI_TX) - rtl_tx(dev, tp); + rtl_tx(dev, tp); if (status & tp->event_slow) { enable_mask &= ~tp->event_slow; From 606694e5ec81d598c0300f9e16db0464659df557 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 16 Oct 2018 15:18:17 -0300 Subject: [PATCH 0126/2608] sctp: fix race on sctp_id2asoc [ Upstream commit b336decab22158937975293aea79396525f92bb3 ] syzbot reported an use-after-free involving sctp_id2asoc. Dmitry Vyukov helped to root cause it and it is because of reading the asoc after it was freed: CPU 1 CPU 2 (working on socket 1) (working on socket 2) sctp_association_destroy sctp_id2asoc spin lock grab the asoc from idr spin unlock spin lock remove asoc from idr spin unlock free(asoc) if asoc->base.sk != sk ... [*] This can only be hit if trying to fetch asocs from different sockets. As we have a single IDR for all asocs, in all SCTP sockets, their id is unique on the system. An application can try to send stuff on an id that matches on another socket, and the if in [*] will protect from such usage. But it didn't consider that as that asoc may belong to another socket, it may be freed in parallel (read: under another socket lock). We fix it by moving the checks in [*] into the protected region. This fixes it because the asoc cannot be freed while the lock is held. Reported-by: syzbot+c7dd55d7aec49d48e49a@syzkaller.appspotmail.com Acked-by: Dmitry Vyukov Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 790094311143..d87d56978b4c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -250,11 +250,10 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id) spin_lock_bh(&sctp_assocs_id_lock); asoc = (struct sctp_association *)idr_find(&sctp_assocs_id, (int)id); + if (asoc && (asoc->base.sk != sk || asoc->base.dead)) + asoc = NULL; spin_unlock_bh(&sctp_assocs_id_lock); - if (!asoc || (asoc->base.sk != sk) || asoc->base.dead) - return NULL; - return asoc; } From 189771d69e14404b0e6eb57aa40bed2a95f7e2c2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 17 Oct 2018 11:44:04 +0200 Subject: [PATCH 0127/2608] udp6: fix encap return code for resubmitting [ Upstream commit 84dad55951b0d009372ec21760b650634246e144 ] The commit eb63f2964dbe ("udp6: add missing checks on edumux packet processing") used the same return code convention of the ipv4 counterpart, but ipv6 uses the opposite one: positive values means resubmit. This change addresses the issue, using positive return value for resubmitting. Also update the related comment, which was broken, too. Fixes: eb63f2964dbe ("udp6: add missing checks on edumux packet processing") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/udp.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5cee941ab0a9..8d185a0fc5af 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -794,11 +794,9 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, ret = udpv6_queue_rcv_skb(sk, skb); - /* a return value > 0 means to resubmit the input, but - * it wants the return to be -protocol, or 0 - */ + /* a return value > 0 means to resubmit the input */ if (ret > 0) - return -ret; + return ret; return 0; } From b522f279f91b8ba916d941e0e73a01ecee5da77a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 30 Oct 2018 14:10:49 +0800 Subject: [PATCH 0128/2608] vhost: Fix Spectre V1 vulnerability [ Upstream commit ff002269a4ee9c769dbf9365acef633ebcbd6cbe ] The idx in vhost_vring_ioctl() was controlled by userspace, hence a potential exploitation of the Spectre variant 1 vulnerability. Fixing this by sanitizing idx before using it to index d->vqs. Cc: Michael S. Tsirkin Cc: Josh Poimboeuf Cc: Andrea Arcangeli Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 7ee3167bc083..ffdd4e937d1d 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "vhost.h" @@ -1366,6 +1367,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) if (idx >= d->nvqs) return -ENOBUFS; + idx = array_index_nospec(idx, d->nvqs); vq = d->vqs[idx]; mutex_lock(&vq->mutex); From 574af67123c3da1e094813d122baa6cd37ccc4af Mon Sep 17 00:00:00 2001 From: Ake Koomsin Date: Wed, 17 Oct 2018 19:44:12 +0900 Subject: [PATCH 0129/2608] virtio_net: avoid using netif_tx_disable() for serializing tx routine [ Upstream commit 05c998b738fdd3e5d6a257bcacc8f34b6284d795 ] Commit 713a98d90c5e ("virtio-net: serialize tx routine during reset") introduces netif_tx_disable() after netif_device_detach() in order to avoid use-after-free of tx queues. However, there are two issues. 1) Its operation is redundant with netif_device_detach() in case the interface is running. 2) In case of the interface is not running before suspending and resuming, the tx does not get resumed by netif_device_attach(). This results in losing network connectivity. It is better to use netif_tx_lock_bh()/netif_tx_unlock_bh() instead for serializing tx routine during reset. This also preserves the symmetry of netif_device_detach() and netif_device_attach(). Fixes commit 713a98d90c5e ("virtio-net: serialize tx routine during reset") Signed-off-by: Ake Koomsin Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 910c46b47769..f528e9ac3413 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1872,8 +1872,9 @@ static void virtnet_freeze_down(struct virtio_device *vdev) /* Make sure no work handler is accessing the device */ flush_work(&vi->config_work); + netif_tx_lock_bh(vi->dev); netif_device_detach(vi->dev); - netif_tx_disable(vi->dev); + netif_tx_unlock_bh(vi->dev); cancel_delayed_work_sync(&vi->refill); if (netif_running(vi->dev)) { @@ -1909,7 +1910,9 @@ static int virtnet_restore_up(struct virtio_device *vdev) } } + netif_tx_lock_bh(vi->dev); netif_device_attach(vi->dev); + netif_tx_unlock_bh(vi->dev); return err; } From 95c337646585c27b5e2d94a1daff53ed5e294cb7 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Mon, 8 Oct 2018 10:49:35 -0500 Subject: [PATCH 0130/2608] ethtool: fix a privilege escalation bug [ Upstream commit 58f5bbe331c566f49c9559568f982202a278aa78 ] In dev_ethtool(), the eth command 'ethcmd' is firstly copied from the use-space buffer 'useraddr' and checked to see whether it is ETHTOOL_PERQUEUE. If yes, the sub-command 'sub_cmd' is further copied from the user space. Otherwise, 'sub_cmd' is the same as 'ethcmd'. Next, according to 'sub_cmd', a permission check is enforced through the function ns_capable(). For example, the permission check is required if 'sub_cmd' is ETHTOOL_SCOALESCE, but it is not necessary if 'sub_cmd' is ETHTOOL_GCOALESCE, as suggested in the comment "Allow some commands to be done by anyone". The following execution invokes different handlers according to 'ethcmd'. Specifically, if 'ethcmd' is ETHTOOL_PERQUEUE, ethtool_set_per_queue() is called. In ethtool_set_per_queue(), the kernel object 'per_queue_opt' is copied again from the user-space buffer 'useraddr' and 'per_queue_opt.sub_command' is used to determine which operation should be performed. Given that the buffer 'useraddr' is in the user space, a malicious user can race to change the sub-command between the two copies. In particular, the attacker can supply ETHTOOL_PERQUEUE and ETHTOOL_GCOALESCE to bypass the permission check in dev_ethtool(). Then before ethtool_set_per_queue() is called, the attacker changes ETHTOOL_GCOALESCE to ETHTOOL_SCOALESCE. In this way, the attacker can bypass the permission check and execute ETHTOOL_SCOALESCE. This patch enforces a check in ethtool_set_per_queue() after the second copy from 'useraddr'. If the sub-command is different from the one obtained in the first copy in dev_ethtool(), an error code EINVAL will be returned. Fixes: f38d138a7da6 ("net/ethtool: support set coalesce per queue") Signed-off-by: Wenwen Wang Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/ethtool.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 0ae5ac5e090f..3469f5053c79 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -2410,13 +2410,17 @@ roll_back: return ret; } -static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr) +static int ethtool_set_per_queue(struct net_device *dev, + void __user *useraddr, u32 sub_cmd) { struct ethtool_per_queue_op per_queue_opt; if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt))) return -EFAULT; + if (per_queue_opt.sub_command != sub_cmd) + return -EINVAL; + switch (per_queue_opt.sub_command) { case ETHTOOL_GCOALESCE: return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt); @@ -2787,7 +2791,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) rc = ethtool_get_phy_stats(dev, useraddr); break; case ETHTOOL_PERQUEUE: - rc = ethtool_set_per_queue(dev, useraddr); + rc = ethtool_set_per_queue(dev, useraddr, sub_cmd); break; case ETHTOOL_GLINKSETTINGS: rc = ethtool_get_link_ksettings(dev, useraddr); From 8a865095d399f3318f30176e5d2f677d0354f8d8 Mon Sep 17 00:00:00 2001 From: Tobias Jungel Date: Sun, 28 Oct 2018 12:54:10 +0100 Subject: [PATCH 0131/2608] bonding: fix length of actor system [ Upstream commit 414dd6fb9a1a1b59983aea7bf0f79f0085ecc5b8 ] The attribute IFLA_BOND_AD_ACTOR_SYSTEM is sent to user space having the length of sizeof(bond->params.ad_actor_system) which is 8 byte. This patch aligns the length to ETH_ALEN to have the same MAC address exposed as using sysfs. Fixes: f87fda00b6ed2 ("bonding: prevent out of bound accesses") Signed-off-by: Tobias Jungel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_netlink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index a1b33aa6054a..77babf1417a7 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -638,8 +638,7 @@ static int bond_fill_info(struct sk_buff *skb, goto nla_put_failure; if (nla_put(skb, IFLA_BOND_AD_ACTOR_SYSTEM, - sizeof(bond->params.ad_actor_system), - &bond->params.ad_actor_system)) + ETH_ALEN, &bond->params.ad_actor_system)) goto nla_put_failure; } if (!bond_3ad_get_active_agg_info(bond, &info)) { From f288424ea85ef806c0f35adc9fd904ac636ffe81 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 18 Oct 2018 21:25:07 +0200 Subject: [PATCH 0132/2608] ip6_tunnel: Fix encapsulation layout [ Upstream commit d4d576f5ab7edcb757bb33e6a5600666a0b1232d ] Commit 058214a4d1df ("ip6_tun: Add infrastructure for doing encapsulation") added the ip6_tnl_encap() call in ip6_tnl_xmit(), before the call to ipv6_push_frag_opts() to append the IPv6 Tunnel Encapsulation Limit option (option 4, RFC 2473, par. 5.1) to the outer IPv6 header. As long as the option didn't actually end up in generated packets, this wasn't an issue. Then commit 89a23c8b528b ("ip6_tunnel: Fix missing tunnel encapsulation limit option") fixed sending of this option, and the resulting layout, e.g. for FoU, is: .-------------------.------------.----------.-------------------.----- - - | Outer IPv6 Header | UDP header | Option 4 | Inner IPv6 Header | Payload '-------------------'------------'----------'-------------------'----- - - Needless to say, FoU and GUE (at least) won't work over IPv6. The option is appended by default, and I couldn't find a way to disable it with the current iproute2. Turn this into a more reasonable: .-------------------.----------.------------.-------------------.----- - - | Outer IPv6 Header | Option 4 | UDP header | Inner IPv6 Header | Payload '-------------------'----------'------------'-------------------'----- - - With this, and with 84dad55951b0 ("udp6: fix encap return code for resubmitting"), FoU and GUE work again over IPv6. Fixes: 058214a4d1df ("ip6_tun: Add infrastructure for doing encapsulation") Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_tunnel.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 0e9296f44ee4..948f304db0a3 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1185,10 +1185,6 @@ route_lookup: } skb_dst_set(skb, dst); - if (encap_limit >= 0) { - init_tel_txopt(&opt, encap_limit); - ipv6_push_frag_opts(skb, &opt.ops, &proto); - } hop_limit = hop_limit ? : ip6_dst_hoplimit(dst); /* Calculate max headroom for all the headers and adjust @@ -1203,6 +1199,11 @@ route_lookup: if (err) return err; + if (encap_limit >= 0) { + init_tel_txopt(&opt, encap_limit); + ipv6_push_frag_opts(skb, &opt.ops, &proto); + } + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); From 1d50b74a33d0b201a4bef20461b585317cbf3eb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Wed, 31 Oct 2018 18:52:03 +0100 Subject: [PATCH 0133/2608] openvswitch: Fix push/pop ethernet validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 46ebe2834ba5b541f28ee72e556a3fed42c47570 ] When there are both pop and push ethernet header actions among the actions to be applied to a packet, an unexpected EINVAL (Invalid argument) error is obtained. This is due to mac_proto not being reset correctly when those actions are validated. Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2018-October/047554.html Fixes: 91820da6ae85 ("openvswitch: add Ethernet push and pop actions") Signed-off-by: Jaime Caamaño Ruiz Tested-by: Greg Rose Reviewed-by: Greg Rose Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/flow_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 4c9c9458374a..f70e9cbf33d5 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2622,7 +2622,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, * is already present */ if (mac_proto != MAC_PROTO_NONE) return -EINVAL; - mac_proto = MAC_PROTO_NONE; + mac_proto = MAC_PROTO_ETHERNET; break; case OVS_ACTION_ATTR_POP_ETH: @@ -2630,7 +2630,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return -EINVAL; if (vlan_tci & htons(VLAN_TAG_PRESENT)) return -EINVAL; - mac_proto = MAC_PROTO_ETHERNET; + mac_proto = MAC_PROTO_NONE; break; default: From bc478700368780c5460391282a098d5ba57464db Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 11 Sep 2018 14:58:22 -0500 Subject: [PATCH 0134/2608] net/mlx5: Take only bit 24-26 of wqe.pftype_wq for page fault type [ Upstream commit a48bc513159d4767f9988f0d857b2b0c38a4d614 ] The HW spec defines only bits 24-26 of pftype_wq as the page fault type, use the required mask to ensure that. Fixes: d9aaed838765 ("{net,IB}/mlx5: Refactor page fault handling") Signed-off-by: Huy Nguyen Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index eb91de86202b..5da0b6e11530 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -262,7 +262,7 @@ static void eq_pf_process(struct mlx5_eq *eq) case MLX5_PFAULT_SUBTYPE_WQE: /* WQE based event */ pfault->type = - be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24; + (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; pfault->token = be32_to_cpu(pf_eqe->wqe.token); pfault->wqe.wq_num = From fd090ba395fb60c71abb0c35bd026c4dcf5a4a1d Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 18 Oct 2018 10:34:26 +0200 Subject: [PATCH 0135/2608] net: sched: Fix for duplicate class dump [ Upstream commit 3c53ed8fef6881a864f0ee8240ed2793ef73ad0d ] When dumping classes by parent, kernel would return classes twice: | # tc qdisc add dev lo root prio | # tc class show dev lo | class prio 8001:1 parent 8001: | class prio 8001:2 parent 8001: | class prio 8001:3 parent 8001: | # tc class show dev lo parent 8001: | class prio 8001:1 parent 8001: | class prio 8001:2 parent 8001: | class prio 8001:3 parent 8001: | class prio 8001:1 parent 8001: | class prio 8001:2 parent 8001: | class prio 8001:3 parent 8001: This comes from qdisc_match_from_root() potentially returning the root qdisc itself if its handle matched. Though in that case, root's classes were already dumped a few lines above. Fixes: cb395b2010879 ("net: sched: optimize class dumps") Signed-off-by: Phil Sutter Reviewed-by: Jiri Pirko Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index cd69aa067543..691ca96f7460 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1917,7 +1917,8 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, if (tcm->tcm_parent) { q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); - if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) + if (q && q != root && + tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) return -1; return 0; } From 623670a9f20755b389f958b85bcb5b1eb17cdaed Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 1 Nov 2018 12:02:37 -0700 Subject: [PATCH 0136/2608] net: drop skb on failure in ip_check_defrag() [ Upstream commit 7de414a9dd91426318df7b63da024b2b07e53df5 ] Most callers of pskb_trim_rcsum() simply drop the skb when it fails, however, ip_check_defrag() still continues to pass the skb up to stack. This is suspicious. In ip_check_defrag(), after we learn the skb is an IP fragment, passing the skb to callers makes no sense, because callers expect fragments are defrag'ed on success. So, dropping the skb when we can't defrag it is reasonable. Note, prior to commit 88078d98d1bb, this is not a big problem as checksum will be fixed up anyway. After it, the checksum is not correct on failure. Found this during code review. Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends") Cc: Eric Dumazet Signed-off-by: Cong Wang Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index e7227128df2c..cb8fa5d7afe1 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -720,10 +720,14 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) if (ip_is_fragment(&iph)) { skb = skb_share_check(skb, GFP_ATOMIC); if (skb) { - if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) - return skb; - if (pskb_trim_rcsum(skb, netoff + len)) - return skb; + if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) { + kfree_skb(skb); + return NULL; + } + if (pskb_trim_rcsum(skb, netoff + len)) { + kfree_skb(skb); + return NULL; + } memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); if (ip_defrag(net, skb, user)) return NULL; From 855cb69f4e6d3068ad18738f0cd6ff3cd099ccfc Mon Sep 17 00:00:00 2001 From: Dimitris Michailidis Date: Fri, 19 Oct 2018 17:07:13 -0700 Subject: [PATCH 0137/2608] net: fix pskb_trim_rcsum_slow() with odd trim offset [ Upstream commit d55bef5059dd057bd077155375c581b49d25be7e ] We've been getting checksum errors involving small UDP packets, usually 59B packets with 1 extra non-zero padding byte. netdev_rx_csum_fault() has been complaining that HW is providing bad checksums. Turns out the problem is in pskb_trim_rcsum_slow(), introduced in commit 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends"). The source of the problem is that when the bytes we are trimming start at an odd address, as in the case of the 1 padding byte above, skb_checksum() returns a byte-swapped value. We cannot just combine this with skb->csum using csum_sub(). We need to use csum_block_sub() here that takes into account the parity of the start address and handles the swapping. Matches existing code in __skb_postpull_rcsum() and esp_remove_trailer(). Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends") Signed-off-by: Dimitris Michailidis Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9f80b947f53b..c19a118f9f82 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1843,8 +1843,9 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len) if (skb->ip_summed == CHECKSUM_COMPLETE) { int delta = skb->len - len; - skb->csum = csum_sub(skb->csum, - skb_checksum(skb, len, delta, 0)); + skb->csum = csum_block_sub(skb->csum, + skb_checksum(skb, len, delta, 0), + len); } return __pskb_trim(skb, len); } From ac65fd7094ac5ec86d167801f2101d02bbe1da6e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Oct 2018 00:57:25 -0700 Subject: [PATCH 0138/2608] net/mlx5e: fix csum adjustments caused by RXFCS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d48051c5b8376038c2b287c3b1bd55b8d391d567 ] As shown by Dmitris, we need to use csum_block_add() instead of csum_add() when adding the FCS contribution to skb csum. Before 4.18 (more exactly commit 88078d98d1bb "net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends"), the whole skb csum was thrown away, so RXFCS changes were ignored. Then before commit d55bef5059dd ("net: fix pskb_trim_rcsum_slow() with odd trim offset") both mlx5 and pskb_trim_rcsum_slow() bugs were canceling each other. Now we fixed pskb_trim_rcsum_slow() we need to fix mlx5. Note that this patch also rewrites mlx5e_get_fcs() to : - Use skb_header_pointer() instead of reinventing it. - Use __get_unaligned_cpu32() to avoid possible non aligned accesses as Dmitris pointed out. Fixes: 902a545904c7 ("net/mlx5e: When RXFCS is set, add FCS data into checksum calculation") Reported-by: Paweł Staszewski Signed-off-by: Eric Dumazet Cc: Eran Ben Elisha Cc: Saeed Mahameed Cc: Dimitris Michailidis Cc: Cong Wang Cc: Paweł Staszewski Reviewed-by: Eran Ben Elisha Tested-By: Maria Pasechnik Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 45 ++++--------------- 1 file changed, 9 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 8285e6d24f30..3d3fd03fa450 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -635,43 +635,15 @@ static inline bool is_first_ethertype_ip(struct sk_buff *skb) return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6)); } -static __be32 mlx5e_get_fcs(struct sk_buff *skb) +static u32 mlx5e_get_fcs(const struct sk_buff *skb) { - int last_frag_sz, bytes_in_prev, nr_frags; - u8 *fcs_p1, *fcs_p2; - skb_frag_t *last_frag; - __be32 fcs_bytes; + const void *fcs_bytes; + u32 _fcs_bytes; - if (!skb_is_nonlinear(skb)) - return *(__be32 *)(skb->data + skb->len - ETH_FCS_LEN); + fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN, + ETH_FCS_LEN, &_fcs_bytes); - nr_frags = skb_shinfo(skb)->nr_frags; - last_frag = &skb_shinfo(skb)->frags[nr_frags - 1]; - last_frag_sz = skb_frag_size(last_frag); - - /* If all FCS data is in last frag */ - if (last_frag_sz >= ETH_FCS_LEN) - return *(__be32 *)(skb_frag_address(last_frag) + - last_frag_sz - ETH_FCS_LEN); - - fcs_p2 = (u8 *)skb_frag_address(last_frag); - bytes_in_prev = ETH_FCS_LEN - last_frag_sz; - - /* Find where the other part of the FCS is - Linear or another frag */ - if (nr_frags == 1) { - fcs_p1 = skb_tail_pointer(skb); - } else { - skb_frag_t *prev_frag = &skb_shinfo(skb)->frags[nr_frags - 2]; - - fcs_p1 = skb_frag_address(prev_frag) + - skb_frag_size(prev_frag); - } - fcs_p1 -= bytes_in_prev; - - memcpy(&fcs_bytes, fcs_p1, bytes_in_prev); - memcpy(((u8 *)&fcs_bytes) + bytes_in_prev, fcs_p2, last_frag_sz); - - return fcs_bytes; + return __get_unaligned_cpu32(fcs_bytes); } static inline void mlx5e_handle_csum(struct net_device *netdev, @@ -693,8 +665,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); if (unlikely(netdev->features & NETIF_F_RXFCS)) - skb->csum = csum_add(skb->csum, - (__force __wsum)mlx5e_get_fcs(skb)); + skb->csum = csum_block_add(skb->csum, + (__force __wsum)mlx5e_get_fcs(skb), + skb->len - ETH_FCS_LEN); rq->stats.csum_complete++; return; } From 4ca72d6336df434171218f047dbcaf86d65253fe Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 29 Oct 2018 20:36:43 +0000 Subject: [PATCH 0139/2608] rtnetlink: Disallow FDB configuration for non-Ethernet device [ Upstream commit da71577545a52be3e0e9225a946e5fd79cfab015 ] When an FDB entry is configured, the address is validated to have the length of an Ethernet address, but the device for which the address is configured can be of any type. The above can result in the use of uninitialized memory when the address is later compared against existing addresses since 'dev->addr_len' is used and it may be greater than ETH_ALEN, as with ip6tnl devices. Fix this by making sure that FDB entries are only configured for Ethernet devices. BUG: KMSAN: uninit-value in memcmp+0x11d/0x180 lib/string.c:863 CPU: 1 PID: 4318 Comm: syz-executor998 Not tainted 4.19.0-rc3+ #49 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x14b/0x190 lib/dump_stack.c:113 kmsan_report+0x183/0x2b0 mm/kmsan/kmsan.c:956 __msan_warning+0x70/0xc0 mm/kmsan/kmsan_instr.c:645 memcmp+0x11d/0x180 lib/string.c:863 dev_uc_add_excl+0x165/0x7b0 net/core/dev_addr_lists.c:464 ndo_dflt_fdb_add net/core/rtnetlink.c:3463 [inline] rtnl_fdb_add+0x1081/0x1270 net/core/rtnetlink.c:3558 rtnetlink_rcv_msg+0xa0b/0x1530 net/core/rtnetlink.c:4715 netlink_rcv_skb+0x36e/0x5f0 net/netlink/af_netlink.c:2454 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4733 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x1638/0x1720 net/netlink/af_netlink.c:1343 netlink_sendmsg+0x1205/0x1290 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe70/0x1290 net/socket.c:2114 __sys_sendmsg net/socket.c:2152 [inline] __do_sys_sendmsg net/socket.c:2161 [inline] __se_sys_sendmsg+0x2a3/0x3d0 net/socket.c:2159 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2159 do_syscall_64+0xb8/0x100 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x440ee9 Code: e8 cc ab 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 bb 0a fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fff6a93b518 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000440ee9 RDX: 0000000000000000 RSI: 0000000020000240 RDI: 0000000000000003 RBP: 0000000000000000 R08: 00000000004002c8 R09: 00000000004002c8 R10: 00000000004002c8 R11: 0000000000000213 R12: 000000000000b4b0 R13: 0000000000401ec0 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:256 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:181 kmsan_kmalloc+0x98/0x100 mm/kmsan/kmsan_hooks.c:91 kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan_hooks.c:100 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2718 [inline] __kmalloc_node_track_caller+0x9e7/0x1160 mm/slub.c:4351 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2f5/0x9e0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:996 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1189 [inline] netlink_sendmsg+0xb49/0x1290 net/netlink/af_netlink.c:1883 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe70/0x1290 net/socket.c:2114 __sys_sendmsg net/socket.c:2152 [inline] __do_sys_sendmsg net/socket.c:2161 [inline] __se_sys_sendmsg+0x2a3/0x3d0 net/socket.c:2159 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2159 do_syscall_64+0xb8/0x100 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 v2: * Make error message more specific (David) Fixes: 090096bf3db1 ("net: generic fdb support for drivers without ndo_fdb_") Signed-off-by: Ido Schimmel Reported-and-tested-by: syzbot+3a288d5f5530b901310e@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+d53ab4e92a1db04110ff@syzkaller.appspotmail.com Cc: Vlad Yasevich Cc: David Ahern Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 760364526dc1..c392a77ff788 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3080,6 +3080,11 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } + if (dev->type != ARPHRD_ETHER) { + NL_SET_ERR_MSG(extack, "FDB add only supported for Ethernet devices"); + return -EINVAL; + } + addr = nla_data(tb[NDA_LLADDR]); err = fdb_vid_parse(tb[NDA_VLAN], &vid); @@ -3184,6 +3189,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } + if (dev->type != ARPHRD_ETHER) { + NL_SET_ERR_MSG(extack, "FDB delete only supported for Ethernet devices"); + return -EINVAL; + } + addr = nla_data(tb[NDA_LLADDR]); err = fdb_vid_parse(tb[NDA_VLAN], &vid); From 71944eb54288509c7c19ac54b485ac774c8253fe Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 17 Oct 2018 22:34:34 +0300 Subject: [PATCH 0140/2608] net: ipmr: fix unresolved entry dumps [ Upstream commit eddf016b910486d2123675a6b5fd7d64f77cdca8 ] If the skb space ends in an unresolved entry while dumping we'll miss some unresolved entries. The reason is due to zeroing the entry counter between dumping resolved and unresolved mfc entries. We should just keep counting until the whole table is dumped and zero when we move to the next as we have a separate table counter. Reported-by: Colin Ian King Fixes: 8fb472c09b9d ("ipmr: improve hash scalability") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipmr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index cbd9c0d8a788..9f314a5e9f27 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2499,8 +2499,6 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) next_entry: e++; } - e = 0; - s_e = 0; spin_lock_bh(&mfc_unres_lock); list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { From c8c2df18eedfcb504ed2f5d8ab24e87da7fd3959 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 11 Oct 2018 15:06:33 -0700 Subject: [PATCH 0141/2608] net: bcmgenet: Poll internal PHY for GENETv5 [ Upstream commit 64bd9c8135751b561f27edaaffe93d07093f81af ] On GENETv5, there is a hardware issue which prevents the GENET hardware from generating a link UP interrupt when the link is operating at 10Mbits/sec. Since we do not have any way to configure the link detection logic, fallback to polling in that case. Fixes: 421380856d9c ("net: bcmgenet: add support for the GENETv5 hardware") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 6ad0ca7ed3e9..abbd2894f870 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -339,9 +339,12 @@ int bcmgenet_mii_probe(struct net_device *dev) phydev->advertising = phydev->supported; /* The internal PHY has its link interrupts routed to the - * Ethernet MAC ISRs - */ - if (priv->internal_phy) + * Ethernet MAC ISRs. On GENETv5 there is a hardware issue + * that prevents the signaling of link UP interrupts when + * the link operates at 10Mbps, so fallback to polling for + * those versions of GENET. + */ + if (priv->internal_phy && !GENET_IS_V5(priv)) priv->phydev->irq = PHY_IGNORE_INTERRUPT; return 0; From b0ee9bd40d74001cd98de6bac252dd728130dbaa Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 10 Oct 2018 22:00:58 +0200 Subject: [PATCH 0142/2608] net/sched: cls_api: add missing validation of netlink attributes [ Upstream commit e331473fee3d500bb0d2582a1fe598df3326d8cd ] Similarly to what has been done in 8b4c3cdd9dd8 ("net: sched: Add policy validation for tc attributes"), fix classifier code to add validation of TCA_CHAIN and TCA_KIND netlink attributes. tested with: # ./tdc.py -c filter v2: Let sch_api and cls_api share nla_policy they have in common, thanks to David Ahern. v3: Avoid EXPORT_SYMBOL(), as validation of those attributes is not done by TC modules, thanks to Cong Wang. While at it, restore the 'Delete / get qdisc' comment to its orginal position, just above tc_get_qdisc() function prototype. Fixes: 5bc1701881e39 ("net: sched: introduce multichain support for filters") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_api.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2f4e1483aced..04a70793c1fe 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -31,6 +31,8 @@ #include #include +extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; + /* The list of all installed classifier types */ static LIST_HEAD(tcf_proto_base); @@ -559,7 +561,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, replay: tp_created = 0; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; @@ -836,7 +838,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; - err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); + err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, + NULL); if (err) return err; From fe54a7c4f0d1f313ea341ebd92d1f3ff9eb65007 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 14 May 2018 15:38:10 -0700 Subject: [PATCH 0143/2608] net/mlx5: Fix build break when CONFIG_SMP=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e3ca34880652250f524022ad89e516f8ba9a805b ] Avoid using the kernel's irq_descriptor and return IRQ vector affinity directly from the driver. This fixes the following build break when CONFIG_SMP=n include/linux/mlx5/driver.h: In function ‘mlx5_get_vector_affinity_hint’: include/linux/mlx5/driver.h:1299:13: error: ‘struct irq_desc’ has no member named ‘affinity_hint’ Fixes: 6082d9c9c94a ("net/mlx5: Fix mlx5_get_vector_affinity function") Signed-off-by: Saeed Mahameed CC: Randy Dunlap CC: Guenter Roeck CC: Thomas Gleixner Tested-by: Israel Rukshin Reported-by: kbuild test robot Reported-by: Randy Dunlap Tested-by: Randy Dunlap Acked-by: Thomas Gleixner Tested-by: Guenter Roeck Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/mlx5/driver.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5eff332092bc..fb677e4f902d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1195,17 +1195,7 @@ enum { static inline const struct cpumask * mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector) { - struct irq_desc *desc; - unsigned int irq; - int eqn; - int err; - - err = mlx5_vector2eqn(dev, vector, &eqn, &irq); - if (err) - return NULL; - - desc = irq_to_desc(irq); - return desc->affinity_hint; + return dev->priv.irq_info[vector].mask; } #endif /* MLX5_DRIVER_H */ From 50961e4888a1d53544ac4ea6f185fc27ee4fee4f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 4 Nov 2018 14:52:51 +0100 Subject: [PATCH 0144/2608] Linux 4.14.79 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7f6579d53911..57a007bf1181 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 78 +SUBLEVEL = 79 EXTRAVERSION = NAME = Petit Gorille From f63387b9ae1ddf1e121b220ebe917aeed7941054 Mon Sep 17 00:00:00 2001 From: Alan Chiang Date: Wed, 25 Jul 2018 11:20:22 +0800 Subject: [PATCH 0145/2608] eeprom: at24: Add support for address-width property [ Upstream commit a2b3bf4846e5eed62ea6abb096af2c950961033c ] Provide a flexible way to determine the addressing bits of eeprom. Pass the addressing bits to driver through address-width property. Signed-off-by: Alan Chiang Signed-off-by: Andy Yeh Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/misc/eeprom/at24.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 4cc0b42f2acc..ded48a0c77ee 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -577,6 +577,23 @@ static void at24_get_pdata(struct device *dev, struct at24_platform_data *chip) if (device_property_present(dev, "read-only")) chip->flags |= AT24_FLAG_READONLY; + err = device_property_read_u32(dev, "address-width", &val); + if (!err) { + switch (val) { + case 8: + if (chip->flags & AT24_FLAG_ADDR16) + dev_warn(dev, "Override address width to be 8, while default is 16\n"); + chip->flags &= ~AT24_FLAG_ADDR16; + break; + case 16: + chip->flags |= AT24_FLAG_ADDR16; + break; + default: + dev_warn(dev, "Bad \"address-width\" property: %u\n", + val); + } + } + err = device_property_read_u32(dev, "pagesize", &val); if (!err) { chip->page_size = val; From 8a6cee344cc0236d6171bc24506cb593d84b8214 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 22 Oct 2018 20:56:46 +0300 Subject: [PATCH 0146/2608] vfs: swap names of {do,vfs}_clone_file_range() commit a725356b6659469d182d662f22d770d83d3bc7b5 upstream. Commit 031a072a0b8a ("vfs: call vfs_clone_file_range() under freeze protection") created a wrapper do_clone_file_range() around vfs_clone_file_range() moving the freeze protection to former, so overlayfs could call the latter. The more common vfs practice is to call do_xxx helpers from vfs_xxx helpers, where freeze protecction is taken in the vfs_xxx helper, so this anomality could be a source of confusion. It seems that commit 8ede205541ff ("ovl: add reflink/copyfile/dedup support") may have fallen a victim to this confusion - ovl_clone_file_range() calls the vfs_clone_file_range() helper in the hope of getting freeze protection on upper fs, but in fact results in overlayfs allowing to bypass upper fs freeze protection. Swap the names of the two helpers to conform to common vfs practice and call the correct helpers from overlayfs and nfsd. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Fixes: 031a072a0b8a ("vfs: call vfs_clone_file_range() under freeze...") Signed-off-by: Amir Goldstein Signed-off-by: Sasha Levin --- fs/ioctl.c | 2 +- fs/nfsd/vfs.c | 3 ++- fs/overlayfs/copy_up.c | 2 +- fs/read_write.c | 17 +++++++++++++++-- include/linux/fs.h | 17 +++-------------- 5 files changed, 22 insertions(+), 19 deletions(-) diff --git a/fs/ioctl.c b/fs/ioctl.c index 5ace7efb0d04..9db5ddaf7ef0 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -229,7 +229,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, ret = -EXDEV; if (src_file.file->f_path.mnt != dst_file->f_path.mnt) goto fdput; - ret = do_clone_file_range(src_file.file, off, dst_file, destoff, olen); + ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen); fdput: fdput(src_file); return ret; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a3c9bfa77def..f55527ef21e8 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -541,7 +541,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { - return nfserrno(do_clone_file_range(src, src_pos, dst, dst_pos, count)); + return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos, + count)); } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index c441f9387a1b..321eae740148 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -157,7 +157,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) } /* Try to use clone_file_range to clone up within the same fs */ - error = vfs_clone_file_range(old_file, 0, new_file, 0, len); + error = do_clone_file_range(old_file, 0, new_file, 0, len); if (!error) goto out; /* Couldn't clone, so now we try to copy the data */ diff --git a/fs/read_write.c b/fs/read_write.c index 0046d72efe94..57a00ef895b2 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1812,8 +1812,8 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, } EXPORT_SYMBOL(vfs_clone_file_prep_inodes); -int vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) +int do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -1860,6 +1860,19 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in, return ret; } +EXPORT_SYMBOL(do_clone_file_range); + +int vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len) +{ + int ret; + + file_start_write(file_out); + ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len); + file_end_write(file_out); + + return ret; +} EXPORT_SYMBOL(vfs_clone_file_range); /* diff --git a/include/linux/fs.h b/include/linux/fs.h index cc613f20e5a6..7374639f0aa0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1792,8 +1792,10 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, struct inode *inode_out, loff_t pos_out, u64 *len, bool is_dedupe); +extern int do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len); extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); + struct file *file_out, loff_t pos_out, u64 len); extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, loff_t destoff, loff_t len, bool *is_same); @@ -2712,19 +2714,6 @@ static inline void file_end_write(struct file *file) __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); } -static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 len) -{ - int ret; - - file_start_write(file_out); - ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); - file_end_write(file_out); - - return ret; -} - /* * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. From 86534d3d824351868fb0992e13777933edb91e6f Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Thu, 1 Nov 2018 20:52:46 +0100 Subject: [PATCH 0147/2608] USB: serial: option: improve Quectel EP06 detection commit 36cae568404a298a19a6e8a3f18641075d4cab04 upstream The Quectel EP06 (and EM06/EG06) LTE modem supports updating the USB configuration, without the VID/PID or configuration number changing. When the configuration is updated and interfaces are added/removed, the interface numbers are updated. This causes our current code for matching EP06 not to work as intended, as the assumption about reserved interfaces no longer holds. If for example the diagnostic (first) interface is removed, option will (try to) bind to the QMI interface. This patch improves EP06 detection by replacing the current match with two matches, and those matches check class, subclass and protocol as well as VID and PID. The diag interface exports class, subclass and protocol as 0xff. For the other serial interfaces, class is 0xff and subclass and protocol are both 0x0. The modem can export the following devices and always in this order: diag, nmea, at, ppp. qmi and adb. This means that diag can only ever be interface 0, and interface numbers 1-5 should be marked as reserved. The three other serial devices can have interface numbers 0-3, but I have not marked any interfaces as reserved. The reason is that the serial devices are the only interfaces exported by the device where subclass and protocol is 0x0. QMI exports the same class, subclass and protocol values as the diag interface. However, the two interfaces have different number of endpoints, QMI has three and diag two. I have added a check for number of interfaces if VID/PID matches the EP06, and we ignore the device if number of interfaces equals three (and subclass is set). The upstream commit does not apply cleanly to the 4.14-tree because of differences in option_probe(). In order to make the commit apply, a slight reshuffeling of the code was needed. Signed-off-by: Kristian Evensen Acked-by: Dan Williams [ johan: drop uneeded RSVD(5) for ADB ] Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Kristian Evensen Signed-off-by: Sasha Levin --- drivers/usb/serial/option.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 0600dadd6a0c..d8d3cb18e9ea 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1084,8 +1084,9 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) }, { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96), .driver_info = RSVD(4) }, - { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06), - .driver_info = RSVD(4) | RSVD(5) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), + .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), @@ -2010,6 +2011,18 @@ static int option_probe(struct usb_serial *serial, iface_desc->bInterfaceClass != USB_CLASS_CDC_DATA) return -ENODEV; + /* + * Don't bind to the QMI device of the Quectel EP06/EG06/EM06. Class, + * subclass and protocol is 0xff for both the diagnostic port and the + * QMI interface, but the diagnostic port only has two endpoints (QMI + * has three). + */ + if (dev_desc->idVendor == cpu_to_le16(QUECTEL_VENDOR_ID) && + dev_desc->idProduct == cpu_to_le16(QUECTEL_PRODUCT_EP06) && + iface_desc->bInterfaceSubClass && iface_desc->bNumEndpoints == 3) { + return -ENODEV; + } + /* Store the device flags so we can use them during attach. */ usb_set_serial_data(serial, (void *)device_flags); From cc2526f1f5544a440c88f48c4681707ca76e3df1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 1 Nov 2018 20:52:47 +0100 Subject: [PATCH 0148/2608] USB: serial: option: add two-endpoints device-id flag commit 35aecc02b5b621782111f64cbb032c7f6a90bb32 upstream Allow matching on interfaces having two endpoints by adding a new device-id flag. This allows for the handling of devices whose interface numbers can change (e.g. Quectel EP06) to be contained in the device-id table. The upstream commit removes a variable that is still in use in the 4.14 version of the option-driver, so the removal is undone. Tested-by: Kristian Evensen Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Kristian Evensen Signed-off-by: Sasha Levin --- drivers/usb/serial/option.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index d8d3cb18e9ea..392fddc80c44 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -564,6 +564,9 @@ static void option_instat_callback(struct urb *urb); /* Interface is reserved */ #define RSVD(ifnum) ((BIT(ifnum) & 0xff) << 0) +/* Interface must have two endpoints */ +#define NUMEP2 BIT(16) + static const struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, @@ -1085,7 +1088,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96), .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), - .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) }, + .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, @@ -2012,16 +2015,11 @@ static int option_probe(struct usb_serial *serial, return -ENODEV; /* - * Don't bind to the QMI device of the Quectel EP06/EG06/EM06. Class, - * subclass and protocol is 0xff for both the diagnostic port and the - * QMI interface, but the diagnostic port only has two endpoints (QMI - * has three). + * Allow matching on bNumEndpoints for devices whose interface numbers + * can change (e.g. Quectel EP06). */ - if (dev_desc->idVendor == cpu_to_le16(QUECTEL_VENDOR_ID) && - dev_desc->idProduct == cpu_to_le16(QUECTEL_PRODUCT_EP06) && - iface_desc->bInterfaceSubClass && iface_desc->bNumEndpoints == 3) { + if (device_flags & NUMEP2 && iface_desc->bNumEndpoints != 2) return -ENODEV; - } /* Store the device flags so we can use them during attach. */ usb_set_serial_data(serial, (void *)device_flags); From eb9b195c53db75c694bf78576925fcb3eed9d0e1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 1 Nov 2018 22:30:38 +0100 Subject: [PATCH 0149/2608] bpf: fix partial copy of map_ptr when dst is scalar commit 0962590e553331db2cc0aef2dc35c57f6300dbbe upstream. ALU operations on pointers such as scalar_reg += map_value_ptr are handled in adjust_ptr_min_max_vals(). Problem is however that map_ptr and range in the register state share a union, so transferring state through dst_reg->range = ptr_reg->range is just buggy as any new map_ptr in the dst_reg is then truncated (or null) for subsequent checks. Fix this by adding a raw member and use it for copying state over to dst_reg. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: Daniel Borkmann Cc: Edward Cree Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Acked-by: Edward Cree Signed-off-by: Sasha Levin --- include/linux/bpf_verifier.h | 3 +++ kernel/bpf/verifier.c | 10 ++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 73bec75b74c8..a3333004fd2b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -50,6 +50,9 @@ struct bpf_reg_state { * PTR_TO_MAP_VALUE_OR_NULL */ struct bpf_map *map_ptr; + + /* Max size from any of the above. */ + unsigned long raw; }; /* Fixed part of pointer offset, pointer types only */ s32 off; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a0ffc62e7677..013b0cd1958e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1935,7 +1935,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst_reg->umax_value = umax_ptr; dst_reg->var_off = ptr_reg->var_off; dst_reg->off = ptr_reg->off + smin_val; - dst_reg->range = ptr_reg->range; + dst_reg->raw = ptr_reg->raw; break; } /* A new variable offset is created. Note that off_reg->off @@ -1965,10 +1965,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off); dst_reg->off = ptr_reg->off; + dst_reg->raw = ptr_reg->raw; if (ptr_reg->type == PTR_TO_PACKET) { dst_reg->id = ++env->id_gen; /* something was added to pkt_ptr, set range to zero */ - dst_reg->range = 0; + dst_reg->raw = 0; } break; case BPF_SUB: @@ -1999,7 +2000,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst_reg->var_off = ptr_reg->var_off; dst_reg->id = ptr_reg->id; dst_reg->off = ptr_reg->off - smin_val; - dst_reg->range = ptr_reg->range; + dst_reg->raw = ptr_reg->raw; break; } /* A new variable offset is created. If the subtrahend is known @@ -2025,11 +2026,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off); dst_reg->off = ptr_reg->off; + dst_reg->raw = ptr_reg->raw; if (ptr_reg->type == PTR_TO_PACKET) { dst_reg->id = ++env->id_gen; /* something was added to pkt_ptr, set range to zero */ if (smin_val < 0) - dst_reg->range = 0; + dst_reg->raw = 0; } break; case BPF_AND: From 0502f1366921284a8f418db753d9fd7af4cd5ba8 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 6 Nov 2018 01:23:39 -0500 Subject: [PATCH 0150/2608] Revert "ARM: tegra: Fix ULPI regression on Tegra20" This reverts commit b39ac54215190bc178ae7de799e74d327a3c1a33. The issue was fixed by upstream commit 5d797111afe1 ("clk: tegra: Add quirk for getting CDEV1/2 clocks on Tegra20"). Signed-off-by: Sasha Levin --- arch/arm/boot/dts/tegra20.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 2780e68a853b..914f59166a99 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -706,7 +706,7 @@ phy_type = "ulpi"; clocks = <&tegra_car TEGRA20_CLK_USB2>, <&tegra_car TEGRA20_CLK_PLL_U>, - <&tegra_car TEGRA20_CLK_PLL_P_OUT4>; + <&tegra_car TEGRA20_CLK_CDEV2>; clock-names = "reg", "pll_u", "ulpi-link"; resets = <&tegra_car 58>, <&tegra_car 22>; reset-names = "usb", "utmi-pads"; From 6eb5633da44ef52e37c27e25400d427b9774ae6e Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 1 Sep 2018 09:40:01 +0300 Subject: [PATCH 0151/2608] fsnotify: fix ignore mask logic in fsnotify() [ Upstream commit 9bdda4e9cf2dcecb60a0683b10ffb8cd7e5f2f45 ] Commit 92183a42898d ("fsnotify: fix ignore mask logic in send_to_group()") acknoledges the use case of ignoring an event on an inode mark, because of an ignore mask on a mount mark of the same group (i.e. I want to get all events on this file, except for the events that came from that mount). This change depends on correctly merging the inode marks and mount marks group lists, so that the mount mark ignore mask would be tested in send_to_group(). Alas, the merging of the lists did not take into account the case where event in question is not in the mask of any of the mount marks. To fix this, completely remove the tests for inode and mount event masks from the lists merging code. Fixes: 92183a42898d ("fsnotify: fix ignore mask logic in send_to_group") Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara [amir: backport to v4.14.y] Signed-off-by: Amir Goldstein Signed-off-by: Sasha Levin --- fs/notify/fsnotify.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index d76c81323dc1..2bc61e7543dd 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -286,17 +286,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); - if ((mask & FS_MODIFY) || - (test_mask & to_tell->i_fsnotify_mask)) { - inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, + inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, + &fsnotify_mark_srcu); + if (inode_conn) + inode_node = srcu_dereference(inode_conn->list.first, &fsnotify_mark_srcu); - if (inode_conn) - inode_node = srcu_dereference(inode_conn->list.first, - &fsnotify_mark_srcu); - } - if (mnt && ((mask & FS_MODIFY) || - (test_mask & mnt->mnt_fsnotify_mask))) { + if (mnt) { inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, &fsnotify_mark_srcu); if (inode_conn) From 43607eb9f70823072e1bfc27f84ed81551ac2adb Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 29 Aug 2018 17:02:16 +0200 Subject: [PATCH 0152/2608] gpio: mxs: Get rid of external API call [ Upstream commit 833eacc7b5913da9896bacd30db7d490aa777868 ] The MXS driver was calling back into the GPIO API from its irqchip. This is not very elegant, as we are a driver, let's just shortcut back into the gpio_chip .get() function instead. This is a tricky case since the .get() callback is not in this file, instead assigned by bgpio_init(). Calling the function direcly in the gpio_chip is however the lesser evil. Cc: Sascha Hauer Cc: Janusz Uzycki Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-mxs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c index 435def22445d..f66395524d0e 100644 --- a/drivers/gpio/gpio-mxs.c +++ b/drivers/gpio/gpio-mxs.c @@ -32,8 +32,6 @@ #include #include #include -/* FIXME: for gpio_get_value(), replace this by direct register read */ -#include #include #define MXS_SET 0x4 @@ -100,7 +98,7 @@ static int mxs_gpio_set_irq_type(struct irq_data *d, unsigned int type) port->both_edges &= ~pin_mask; switch (type) { case IRQ_TYPE_EDGE_BOTH: - val = gpio_get_value(port->gc.base + d->hwirq); + val = port->gc.get(&port->gc, d->hwirq); if (val) edge = GPIO_INT_FALL_EDGE; else From 33e52eb2d00582fad17177918b83a8df19958989 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sat, 3 Nov 2018 19:15:24 +0200 Subject: [PATCH 0153/2608] xfs: truncate transaction does not modify the inobt [ Upstream commit a606ebdb859e78beb757dfefa08001df366e2ef5 ] The truncate transaction does not ever modify the inode btree, but includes an associated log reservation. Update xfs_calc_itruncate_reservation() to remove the reservation associated with inobt updates. [Amir: This commit was merged for kernel v4.16 and a twin commit was merged for xfsprogs v4.16. As a result, a small xfs filesystem formatted with features -m rmapbt=1,reflink=1 using mkfs.xfs version >= v4.16 cannot be mounted with kernel < v4.16. For example, xfstests generic/17{1,2,3} format a small fs and when trying to mount it, they fail with an assert on this very demonic line: XFS (vdc): Log size 3075 blocks too small, minimum size is 3717 blocks XFS (vdc): AAIEEE! Log failed size checks. Abort! XFS: Assertion failed: 0, file: src/linux/fs/xfs/xfs_log.c, line: 666 The simple solution for stable kernels is to apply this patch, because mkfs.xfs v4.16 is already in the wild, so we have to assume that xfs filesystems with a "too small" log exist. Regardless, xfsprogs maintainers should also consider reverting the twin patch to stop creating those filesystems for the sake of users with unpatched kernels.] Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Cc: # v4.9+ Signed-off-by: Amir Goldstein Reviewed-by: Darrick J . Wong Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_trans_resv.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 6bd916bd35e2..48eff18c5496 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -232,8 +232,6 @@ xfs_calc_write_reservation( * the super block to reflect the freed blocks: sector size * worst case split in allocation btrees per extent assuming 4 extents: * 4 exts * 2 trees * (2 * max depth - 1) * block size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size */ STATIC uint xfs_calc_itruncate_reservation( @@ -245,12 +243,7 @@ xfs_calc_itruncate_reservation( XFS_FSB_TO_B(mp, 1))), (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(5, 0) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(2 + mp->m_ialloc_blks + - mp->m_in_maxlevels, 0))); + XFS_FSB_TO_B(mp, 1)))); } /* From ddb595dfe47278fd8ff3fd171b3729ec45d51b74 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 17 Oct 2018 15:23:26 +0100 Subject: [PATCH 0154/2608] cachefiles: fix the race between cachefiles_bury_object() and rmdir(2) commit 169b803397499be85bdd1e3d07d6f5e3d4bd669e upstream. the victim might've been rmdir'ed just before the lock_rename(); unlike the normal callers, we do not look the source up after the parents are locked - we know it beforehand and just recheck that it's still the child of what used to be its parent. Unfortunately, the check is too weak - we don't spot a dead directory since its ->d_parent is unchanged, dentry is positive, etc. So we sail all the way to ->rename(), with hosting filesystems _not_ expecting to be asked renaming an rmdir'ed subdirectory. The fix is easy, fortunately - the lock on parent is sufficient for making IS_DEADDIR() on child safe. Cc: stable@vger.kernel.org Fixes: 9ae326a69004 (CacheFiles: A cache that backs onto a mounted filesystem) Signed-off-by: Al Viro Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- fs/cachefiles/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 5f2f67d220fa..f6ed795f4af6 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -340,7 +340,7 @@ try_again: trap = lock_rename(cache->graveyard, dir); /* do some checks before getting the grave dentry */ - if (rep->d_parent != dir) { + if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) { /* the entry was probably culled when we dropped the parent dir * lock */ unlock_rename(cache->graveyard, dir); From 7a4b046b0ce973f641d1b6ba4c6947ed94d77a80 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 15:06:41 +0200 Subject: [PATCH 0155/2608] ptp: fix Spectre v1 vulnerability commit efa61c8cf2950ab5c0e66cff3cabe2a2b24e81ba upstream. pin_index can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/ptp/ptp_chardev.c:253 ptp_ioctl() warn: potential spectre issue 'ops->pin_config' [r] (local cap) Fix this by sanitizing pin_index before using it to index ops->pin_config, and before passing it as an argument to function ptp_set_pinfunc(), in which it is used to index info->pin_config. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/ptp/ptp_chardev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 51364621f77c..a421d6c551b6 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -24,6 +24,8 @@ #include #include +#include + #include "ptp_private.h" static int ptp_disable_pinfunc(struct ptp_clock_info *ops, @@ -248,6 +250,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = -EINVAL; break; } + pin_index = array_index_nospec(pin_index, ops->n_pins); if (mutex_lock_interruptible(&ptp->pincfg_mux)) return -ERESTARTSYS; pd = ops->pin_config[pin_index]; @@ -266,6 +269,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = -EINVAL; break; } + pin_index = array_index_nospec(pin_index, ops->n_pins); if (mutex_lock_interruptible(&ptp->pincfg_mux)) return -ERESTARTSYS; err = ptp_set_pinfunc(ptp, pin_index, pd.func, pd.chan); From fd3127215babb993c7470dc556e547f5572ca371 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 2 Oct 2018 23:29:11 +0800 Subject: [PATCH 0156/2608] drm/edid: Add 6 bpc quirk for BOE panel in HP Pavilion 15-n233sl commit 0711a43b6d84ff9189adfbf83c8bbf56eef794bf upstream. There's another panel that reports "DFP 1.x compliant TMDS" but it supports 6bpc instead of 8 bpc. Apply 6 bpc quirk for the panel to fix it. BugLink: https://bugs.launchpad.net/bugs/1794387 Cc: # v4.8+ Signed-off-by: Kai-Heng Feng Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20181002152911.4370-1-kai.heng.feng@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index c29dea895605..12be61308ba2 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -111,6 +111,9 @@ static const struct edid_quirk { /* AEO model 0 reports 8 bpc, but is a 6 bpc panel */ { "AEO", 0, EDID_QUIRK_FORCE_6BPC }, + /* BOE model on HP Pavilion 15-n233sl reports 8 bpc, but is a 6 bpc panel */ + { "BOE", 0x78b, EDID_QUIRK_FORCE_6BPC }, + /* CPT panel of Asus UX303LA reports 8 bpc, but is a 6 bpc panel */ { "CPT", 0x17df, EDID_QUIRK_FORCE_6BPC }, From 20ff18553ed84b2bc57567e1ac51bf432719c289 Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Fri, 5 Oct 2018 14:52:15 -0700 Subject: [PATCH 0157/2608] drm/edid: VSDB yCBCr420 Deep Color mode bit definitions commit 9068e02f58740778d8270840657f1e250a2cc60f upstream. HDMI Forum VSDB YCBCR420 deep color capability bits are 2:0. Correct definitions in the header for the mask to work correctly. Fixes: e6a9a2c3dc43 ("drm/edid: parse ycbcr 420 deep color information") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107893 Cc: # v4.14+ Signed-off-by: Clint Taylor Reviewed-by: Jani Nikula Reviewed-by: Shashank Sharma Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1538776335-12569-1-git-send-email-clinton.a.taylor@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_edid.c | 2 +- include/drm/drm_edid.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 12be61308ba2..d1191ebed072 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4223,7 +4223,7 @@ static void drm_parse_ycbcr420_deep_color_info(struct drm_connector *connector, struct drm_hdmi_info *hdmi = &connector->display_info.hdmi; dc_mask = db[7] & DRM_EDID_YCBCR420_DC_MASK; - hdmi->y420_dc_modes |= dc_mask; + hdmi->y420_dc_modes = dc_mask; } static void drm_parse_hdmi_forum_vsdb(struct drm_connector *connector, diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index a992434ded99..267e0426c479 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -214,9 +214,9 @@ struct detailed_timing { #define DRM_EDID_HDMI_DC_Y444 (1 << 3) /* YCBCR 420 deep color modes */ -#define DRM_EDID_YCBCR420_DC_48 (1 << 6) -#define DRM_EDID_YCBCR420_DC_36 (1 << 5) -#define DRM_EDID_YCBCR420_DC_30 (1 << 4) +#define DRM_EDID_YCBCR420_DC_48 (1 << 2) +#define DRM_EDID_YCBCR420_DC_36 (1 << 1) +#define DRM_EDID_YCBCR420_DC_30 (1 << 0) #define DRM_EDID_YCBCR420_DC_MASK (DRM_EDID_YCBCR420_DC_48 | \ DRM_EDID_YCBCR420_DC_36 | \ DRM_EDID_YCBCR420_DC_30) From c219c2cd34cc1bbf7dc9ffe04c5d7d411349e7c0 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 3 Oct 2018 19:45:38 +0300 Subject: [PATCH 0158/2608] drm: fb-helper: Reject all pixel format changing requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit db05c481977599236f12a85e55de9f5ab37b0a2c upstream. drm fbdev emulation doesn't support changing the pixel format at all, so reject all pixel format changing requests. Cc: stable@vger.kernel.org Signed-off-by: Eugeniy Paltsev Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20181003164538.5534-1-Eugeniy.Paltsev@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_fb_helper.c | 91 ++++++++++----------------------- 1 file changed, 26 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 5e93589c335c..29d1d3df3164 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1490,6 +1490,25 @@ unlock: } EXPORT_SYMBOL(drm_fb_helper_ioctl); +static bool drm_fb_pixel_format_equal(const struct fb_var_screeninfo *var_1, + const struct fb_var_screeninfo *var_2) +{ + return var_1->bits_per_pixel == var_2->bits_per_pixel && + var_1->grayscale == var_2->grayscale && + var_1->red.offset == var_2->red.offset && + var_1->red.length == var_2->red.length && + var_1->red.msb_right == var_2->red.msb_right && + var_1->green.offset == var_2->green.offset && + var_1->green.length == var_2->green.length && + var_1->green.msb_right == var_2->green.msb_right && + var_1->blue.offset == var_2->blue.offset && + var_1->blue.length == var_2->blue.length && + var_1->blue.msb_right == var_2->blue.msb_right && + var_1->transp.offset == var_2->transp.offset && + var_1->transp.length == var_2->transp.length && + var_1->transp.msb_right == var_2->transp.msb_right; +} + /** * drm_fb_helper_check_var - implementation for &fb_ops.fb_check_var * @var: screeninfo to check @@ -1500,7 +1519,6 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, { struct drm_fb_helper *fb_helper = info->par; struct drm_framebuffer *fb = fb_helper->fb; - int depth; if (var->pixclock != 0 || in_dbg_master()) return -EINVAL; @@ -1520,72 +1538,15 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, return -EINVAL; } - switch (var->bits_per_pixel) { - case 16: - depth = (var->green.length == 6) ? 16 : 15; - break; - case 32: - depth = (var->transp.length > 0) ? 32 : 24; - break; - default: - depth = var->bits_per_pixel; - break; - } - - switch (depth) { - case 8: - var->red.offset = 0; - var->green.offset = 0; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 15: - var->red.offset = 10; - var->green.offset = 5; - var->blue.offset = 0; - var->red.length = 5; - var->green.length = 5; - var->blue.length = 5; - var->transp.length = 1; - var->transp.offset = 15; - break; - case 16: - var->red.offset = 11; - var->green.offset = 5; - var->blue.offset = 0; - var->red.length = 5; - var->green.length = 6; - var->blue.length = 5; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 24: - var->red.offset = 16; - var->green.offset = 8; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 32: - var->red.offset = 16; - var->green.offset = 8; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 8; - var->transp.offset = 24; - break; - default: + /* + * drm fbdev emulation doesn't support changing the pixel format at all, + * so reject all pixel format changing requests. + */ + if (!drm_fb_pixel_format_equal(var, &info->var)) { + DRM_DEBUG("fbdev emulation doesn't support changing the pixel format\n"); return -EINVAL; } + return 0; } EXPORT_SYMBOL(drm_fb_helper_check_var); From 66448066c2b1ec587e6168a9206ffedd11d94444 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 16:59:01 +0200 Subject: [PATCH 0159/2608] RDMA/ucma: Fix Spectre v1 vulnerability commit a3671a4f973ee9d9621d60166cc3b037c397d604 upstream. hdr.cmd can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/infiniband/core/ucma.c:1686 ucma_write() warn: potential spectre issue 'ucma_cmd_table' [r] (local cap) Fix this by sanitizing hdr.cmd before using it to index ucm_cmd_table. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/ucma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 17144a781aeb..c3e5f921da12 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -44,6 +44,8 @@ #include #include +#include + #include #include #include @@ -1659,6 +1661,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) return -EINVAL; + hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table)); if (hdr.in + sizeof(hdr) > len) return -EINVAL; From f0e3b74a4dc0ce27abccdfdcd41eae73cee4b9ac Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 16:32:40 +0200 Subject: [PATCH 0160/2608] IB/ucm: Fix Spectre v1 vulnerability commit 0295e39595e1146522f2722715dba7f7fba42217 upstream. hdr.cmd can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/infiniband/core/ucm.c:1127 ib_ucm_write() warn: potential spectre issue 'ucm_cmd_table' [r] (local cap) Fix this by sanitizing hdr.cmd before using it to index ucm_cmd_table. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/ucm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index f2a7f62c2834..09cb24353be3 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -46,6 +46,8 @@ #include #include +#include + #include #include @@ -1118,6 +1120,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; + hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucm_cmd_table)); if (hdr.in + sizeof(hdr) > len) return -EINVAL; From 8686f337ca17db405ca770c490e725088be58414 Mon Sep 17 00:00:00 2001 From: Tobias Herzog Date: Sat, 22 Sep 2018 22:11:10 +0200 Subject: [PATCH 0161/2608] cdc-acm: do not reset notification buffer index upon urb unlinking commit dae3ddba36f8c337fb59cef07d564da6fc9b7551 upstream. Resetting the write index of the notification buffer on urb unlink (e.g. closing a cdc-acm device from userspace) may lead to wrong interpretation of further received notifications, in case the index is not 0 when urb unlink happens (i.e. when parts of a notification already have been transferred). On the device side there is no "reset" of the notification transimission and thus we would get out of sync with the device. Signed-off-by: Tobias Herzog Acked-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 9f6f402470ac..9f52859583cb 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -367,7 +367,6 @@ static void acm_ctrl_irq(struct urb *urb) case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ - acm->nb_index = 0; dev_dbg(&acm->control->dev, "%s - urb shutting down with status: %d\n", __func__, status); From 32772ef3f5ed990404c7664782ad77c551d19693 Mon Sep 17 00:00:00 2001 From: Tobias Herzog Date: Sat, 22 Sep 2018 22:11:11 +0200 Subject: [PATCH 0162/2608] cdc-acm: correct counting of UART states in serial state notification commit f976d0e5747ca65ccd0fb2a4118b193d70aa1836 upstream. The usb standard ("Universal Serial Bus Class Definitions for Communication Devices") distiguishes between "consistent signals" (DSR, DCD), and "irregular signals" (break, ring, parity error, framing error, overrun). The bits of "irregular signals" are set, if this error/event occurred on the device side and are immeadeatly unset, if the serial state notification was sent. Like other drivers of real serial ports do, just the occurence of those events should be counted in serial_icounter_struct (but no 1->0 transitions). Signed-off-by: Tobias Herzog Acked-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 9f52859583cb..5a703c33de1c 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -322,17 +322,17 @@ static void acm_process_notification(struct acm *acm, unsigned char *buf) if (difference & ACM_CTRL_DSR) acm->iocount.dsr++; - if (difference & ACM_CTRL_BRK) - acm->iocount.brk++; - if (difference & ACM_CTRL_RI) - acm->iocount.rng++; if (difference & ACM_CTRL_DCD) acm->iocount.dcd++; - if (difference & ACM_CTRL_FRAMING) + if (newctrl & ACM_CTRL_BRK) + acm->iocount.brk++; + if (newctrl & ACM_CTRL_RI) + acm->iocount.rng++; + if (newctrl & ACM_CTRL_FRAMING) acm->iocount.frame++; - if (difference & ACM_CTRL_PARITY) + if (newctrl & ACM_CTRL_PARITY) acm->iocount.parity++; - if (difference & ACM_CTRL_OVERRUN) + if (newctrl & ACM_CTRL_OVERRUN) acm->iocount.overrun++; spin_unlock(&acm->read_lock); From 6f053e36bda96461245d0d01f374dac4f7cab54c Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 4 Oct 2018 15:49:06 +0200 Subject: [PATCH 0163/2608] cdc-acm: fix race between reset and control messaging commit 9397940ed812b942c520e0c25ed4b2c64d57e8b9 upstream. If a device splits up a control message and a reset() happens between the parts, the message is lost and already recieved parts must be dropped. Signed-off-by: Oliver Neukum Fixes: 1aba579f3cf51 ("cdc-acm: handle read pipe errors") Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 5a703c33de1c..e41d00bc7e97 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1654,6 +1654,7 @@ static int acm_pre_reset(struct usb_interface *intf) struct acm *acm = usb_get_intfdata(intf); clear_bit(EVENT_RX_STALL, &acm->flags); + acm->nb_index = 0; /* pending control transfers are lost */ return 0; } From 89cd15c962405428bcd11c5f34127698f29bbd60 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Fri, 5 Oct 2018 16:17:44 -0600 Subject: [PATCH 0164/2608] usb: usbip: Fix BUG: KASAN: slab-out-of-bounds in vhci_hub_control() commit 81f7567c51ad97668d1c3a48e8ecc482e64d4161 upstream. vhci_hub_control() accesses port_status array with out of bounds port value. Fix it to reference port_status[] only with a valid rhport value when invalid_rhport flag is true. The invalid_rhport flag is set early on after detecting in port value is within the bounds or not. The following is used reproduce the problem and verify the fix: C reproducer: https://syzkaller.appspot.com/x/repro.c?x=14ed8ab6400000 Reported-by: syzbot+bccc1fe10b70fadc78d0@syzkaller.appspotmail.com Cc: stable Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_hcd.c | 57 ++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 05aa1ba351b6..84e2d7edaa5c 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -332,8 +332,9 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, struct vhci_hcd *vhci_hcd; struct vhci *vhci; int retval = 0; - int rhport; + int rhport = -1; unsigned long flags; + bool invalid_rhport = false; u32 prev_port_status[VHCI_HC_PORTS]; @@ -348,9 +349,19 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, usbip_dbg_vhci_rh("typeReq %x wValue %x wIndex %x\n", typeReq, wValue, wIndex); - if (wIndex > VHCI_HC_PORTS) - pr_err("invalid port number %d\n", wIndex); - rhport = wIndex - 1; + /* + * wIndex can be 0 for some request types (typeReq). rhport is + * in valid range when wIndex >= 1 and < VHCI_HC_PORTS. + * + * Reference port_status[] only with valid rhport when + * invalid_rhport is false. + */ + if (wIndex < 1 || wIndex > VHCI_HC_PORTS) { + invalid_rhport = true; + if (wIndex > VHCI_HC_PORTS) + pr_err("invalid port number %d\n", wIndex); + } else + rhport = wIndex - 1; vhci_hcd = hcd_to_vhci_hcd(hcd); vhci = vhci_hcd->vhci; @@ -359,8 +370,9 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, /* store old status and compare now and old later */ if (usbip_dbg_flag_vhci_rh) { - memcpy(prev_port_status, vhci_hcd->port_status, - sizeof(prev_port_status)); + if (!invalid_rhport) + memcpy(prev_port_status, vhci_hcd->port_status, + sizeof(prev_port_status)); } switch (typeReq) { @@ -368,8 +380,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, usbip_dbg_vhci_rh(" ClearHubFeature\n"); break; case ClearPortFeature: - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } switch (wValue) { case USB_PORT_FEAT_SUSPEND: if (hcd->speed == HCD_USB3) { @@ -429,9 +443,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, break; case GetPortStatus: usbip_dbg_vhci_rh(" GetPortStatus port %x\n", wIndex); - if (wIndex < 1) { + if (invalid_rhport) { pr_err("invalid port number %d\n", wIndex); retval = -EPIPE; + goto error; } /* we do not care about resume. */ @@ -527,16 +542,20 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, goto error; } - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } vhci_hcd->port_status[rhport] |= USB_PORT_STAT_SUSPEND; break; case USB_PORT_FEAT_POWER: usbip_dbg_vhci_rh( " SetPortFeature: USB_PORT_FEAT_POWER\n"); - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } if (hcd->speed == HCD_USB3) vhci_hcd->port_status[rhport] |= USB_SS_PORT_STAT_POWER; else @@ -545,8 +564,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_BH_PORT_RESET: usbip_dbg_vhci_rh( " SetPortFeature: USB_PORT_FEAT_BH_PORT_RESET\n"); - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } /* Applicable only for USB3.0 hub */ if (hcd->speed != HCD_USB3) { pr_err("USB_PORT_FEAT_BH_PORT_RESET req not " @@ -557,8 +578,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_RESET: usbip_dbg_vhci_rh( " SetPortFeature: USB_PORT_FEAT_RESET\n"); - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } /* if it's already enabled, disable */ if (hcd->speed == HCD_USB3) { vhci_hcd->port_status[rhport] = 0; @@ -579,8 +602,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, default: usbip_dbg_vhci_rh(" SetPortFeature: default %d\n", wValue); - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } if (hcd->speed == HCD_USB3) { if ((vhci_hcd->port_status[rhport] & USB_SS_PORT_STAT_POWER) != 0) { @@ -622,7 +647,7 @@ error: if (usbip_dbg_flag_vhci_rh) { pr_debug("port %d\n", rhport); /* Only dump valid port status */ - if (rhport >= 0) { + if (!invalid_rhport) { dump_port_status_diff(prev_port_status[rhport], vhci_hcd->port_status[rhport], hcd->speed == HCD_USB3); @@ -632,8 +657,10 @@ error: spin_unlock_irqrestore(&vhci->lock, flags); - if ((vhci_hcd->port_status[rhport] & PORT_C_MASK) != 0) + if (!invalid_rhport && + (vhci_hcd->port_status[rhport] & PORT_C_MASK) != 0) { usb_hcd_poll_rh_status(hcd); + } return retval; } From 509015954a0c67ba7c34dadaf0d3fffde8c482c7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 12:16:45 +0200 Subject: [PATCH 0165/2608] usb: gadget: storage: Fix Spectre v1 vulnerability commit 9ae24af3669111d418242caec8dd4ebd9ba26860 upstream. num can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/usb/gadget/function/f_mass_storage.c:3177 fsg_lun_make() warn: potential spectre issue 'fsg_opts->common->luns' [r] (local cap) Fix this by sanitizing num before using it to index fsg_opts->common->luns Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_mass_storage.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 5153e29870c3..25ba30329533 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -221,6 +221,8 @@ #include #include +#include + #include "configfs.h" @@ -3170,6 +3172,7 @@ static struct config_group *fsg_lun_make(struct config_group *group, fsg_opts = to_fsg_opts(&group->cg_item); if (num >= FSG_MAX_LUNS) return ERR_PTR(-ERANGE); + num = array_index_nospec(num, FSG_MAX_LUNS); mutex_lock(&fsg_opts->lock); if (fsg_opts->refcnt || fsg_opts->common->luns[num]) { From dad71807595250788821346805ee8976efcc7f29 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 15 Oct 2018 16:55:04 -0400 Subject: [PATCH 0166/2608] USB: fix the usbfs flag sanitization for control transfers commit 665c365a77fbfeabe52694aedf3446d5f2f1ce42 upstream. Commit 7a68d9fb8510 ("USB: usbdevfs: sanitize flags more") checks the transfer flags for URBs submitted from userspace via usbfs. However, the check for whether the USBDEVFS_URB_SHORT_NOT_OK flag should be allowed for a control transfer was added in the wrong place, before the code has properly determined the direction of the control transfer. (Control transfers are special because for them, the direction is set by the bRequestType byte of the Setup packet rather than direction bit of the endpoint address.) This patch moves code which sets up the allow_short flag for control transfers down after is_in has been set to the correct value. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+24a30223a4b609bb802e@syzkaller.appspotmail.com Fixes: 7a68d9fb8510 ("USB: usbdevfs: sanitize flags more") CC: Oliver Neukum CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 76cb9b3649b4..492977f78fde 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1491,8 +1491,6 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb u = 0; switch (uurb->type) { case USBDEVFS_URB_TYPE_CONTROL: - if (is_in) - allow_short = true; if (!usb_endpoint_xfer_control(&ep->desc)) return -EINVAL; /* min 8 byte setup packet */ @@ -1522,6 +1520,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb is_in = 0; uurb->endpoint &= ~USB_DIR_IN; } + if (is_in) + allow_short = true; snoop(&ps->dev->dev, "control urb: bRequestType=%02x " "bRequest=%02x wValue=%04x " "wIndex=%04x wLength=%04x\n", From e5c0a5893c0443d943b0a411802e3e463664c737 Mon Sep 17 00:00:00 2001 From: Mikhail Nikiforov Date: Mon, 15 Oct 2018 11:17:56 -0700 Subject: [PATCH 0167/2608] Input: elan_i2c - add ACPI ID for Lenovo IdeaPad 330-15IGM commit 13c1c5e4d7f887cba36c5e3df3faa22071c1469f upstream. Add ELAN061C to the ACPI table to support Elan touchpad found in Lenovo IdeaPad 330-15IGM. Signed-off-by: Mikhail Nikiforov Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 696e540304fd..766d30a7b085 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1262,6 +1262,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0611", 0 }, { "ELAN0612", 0 }, { "ELAN0618", 0 }, + { "ELAN061C", 0 }, { "ELAN061D", 0 }, { "ELAN0622", 0 }, { "ELAN1000", 0 }, From 1220de22227bbb2773f31c29c33c1271f2f42890 Mon Sep 17 00:00:00 2001 From: Phil Auld Date: Mon, 8 Oct 2018 10:36:40 -0400 Subject: [PATCH 0168/2608] sched/fair: Fix throttle_list starvation with low CFS quota commit baa9be4ffb55876923dc9716abc0a448e510ba30 upstream. With a very low cpu.cfs_quota_us setting, such as the minimum of 1000, distribute_cfs_runtime may not empty the throttled_list before it runs out of runtime to distribute. In that case, due to the change from c06f04c7048 to put throttled entries at the head of the list, later entries on the list will starve. Essentially, the same X processes will get pulled off the list, given CPU time and then, when expired, get put back on the head of the list where distribute_cfs_runtime will give runtime to the same set of processes leaving the rest. Fix the issue by setting a bit in struct cfs_bandwidth when distribute_cfs_runtime is running, so that the code in throttle_cfs_rq can decide to put the throttled entry on the tail or the head of the list. The bit is set/cleared by the callers of distribute_cfs_runtime while they hold cfs_bandwidth->lock. This is easy to reproduce with a handful of CPU consumers. I use 'crash' on the live system. In some cases you can simply look at the throttled list and see the later entries are not changing: crash> list cfs_rq.throttled_list -H 0xffff90b54f6ade40 -s cfs_rq.runtime_remaining | paste - - | awk '{print $1" "$4}' | pr -t -n3 1 ffff90b56cb2d200 -976050 2 ffff90b56cb2cc00 -484925 3 ffff90b56cb2bc00 -658814 4 ffff90b56cb2ba00 -275365 5 ffff90b166a45600 -135138 6 ffff90b56cb2da00 -282505 7 ffff90b56cb2e000 -148065 8 ffff90b56cb2fa00 -872591 9 ffff90b56cb2c000 -84687 10 ffff90b56cb2f000 -87237 11 ffff90b166a40a00 -164582 crash> list cfs_rq.throttled_list -H 0xffff90b54f6ade40 -s cfs_rq.runtime_remaining | paste - - | awk '{print $1" "$4}' | pr -t -n3 1 ffff90b56cb2d200 -994147 2 ffff90b56cb2cc00 -306051 3 ffff90b56cb2bc00 -961321 4 ffff90b56cb2ba00 -24490 5 ffff90b166a45600 -135138 6 ffff90b56cb2da00 -282505 7 ffff90b56cb2e000 -148065 8 ffff90b56cb2fa00 -872591 9 ffff90b56cb2c000 -84687 10 ffff90b56cb2f000 -87237 11 ffff90b166a40a00 -164582 Sometimes it is easier to see by finding a process getting starved and looking at the sched_info: crash> task ffff8eb765994500 sched_info PID: 7800 TASK: ffff8eb765994500 CPU: 16 COMMAND: "cputest" sched_info = { pcount = 8, run_delay = 697094208, last_arrival = 240260125039, last_queued = 240260327513 }, crash> task ffff8eb765994500 sched_info PID: 7800 TASK: ffff8eb765994500 CPU: 16 COMMAND: "cputest" sched_info = { pcount = 8, run_delay = 697094208, last_arrival = 240260125039, last_queued = 240260327513 }, Signed-off-by: Phil Auld Reviewed-by: Ben Segall Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: c06f04c70489 ("sched: Fix potential near-infinite distribute_cfs_runtime() loop") Link: http://lkml.kernel.org/r/20181008143639.GA4019@pauld.bos.csb Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 22 +++++++++++++++++++--- kernel/sched/sched.h | 2 ++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b2d699f28304..19bfa21f7197 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4299,9 +4299,13 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) /* * Add to the _head_ of the list, so that an already-started - * distribute_cfs_runtime will not see us + * distribute_cfs_runtime will not see us. If disribute_cfs_runtime is + * not running add to the tail so that later runqueues don't get starved. */ - list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); + if (cfs_b->distribute_running) + list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); + else + list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); /* * If we're the first throttled task, make sure the bandwidth @@ -4445,14 +4449,16 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) * in us over-using our runtime if it is all used during this loop, but * only by limited amounts in that extreme case. */ - while (throttled && cfs_b->runtime > 0) { + while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) { runtime = cfs_b->runtime; + cfs_b->distribute_running = 1; raw_spin_unlock(&cfs_b->lock); /* we can't nest cfs_b->lock while distributing bandwidth */ runtime = distribute_cfs_runtime(cfs_b, runtime, runtime_expires); raw_spin_lock(&cfs_b->lock); + cfs_b->distribute_running = 0; throttled = !list_empty(&cfs_b->throttled_cfs_rq); cfs_b->runtime -= min(runtime, cfs_b->runtime); @@ -4563,6 +4569,11 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) /* confirm we're still not at a refresh boundary */ raw_spin_lock(&cfs_b->lock); + if (cfs_b->distribute_running) { + raw_spin_unlock(&cfs_b->lock); + return; + } + if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) { raw_spin_unlock(&cfs_b->lock); return; @@ -4572,6 +4583,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) runtime = cfs_b->runtime; expires = cfs_b->runtime_expires; + if (runtime) + cfs_b->distribute_running = 1; + raw_spin_unlock(&cfs_b->lock); if (!runtime) @@ -4582,6 +4596,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) raw_spin_lock(&cfs_b->lock); if (expires == cfs_b->runtime_expires) cfs_b->runtime -= min(runtime, cfs_b->runtime); + cfs_b->distribute_running = 0; raw_spin_unlock(&cfs_b->lock); } @@ -4690,6 +4705,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) cfs_b->period_timer.function = sched_cfs_period_timer; hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cfs_b->slack_timer.function = sched_cfs_slack_timer; + cfs_b->distribute_running = 0; } static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b29376169f3f..63d999dfec80 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -288,6 +288,8 @@ struct cfs_bandwidth { /* statistics */ int nr_periods, nr_throttled; u64 throttled_time; + + bool distribute_running; #endif }; From 1595a964d7afb79530770a2dd6fc83b1676faef0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Oct 2018 12:38:26 +0200 Subject: [PATCH 0169/2608] x86/tsc: Force inlining of cyc2ns bits commit 4907c68abd3f60f650f98d5a69d4ec77c0bde44f upstream. Looking at the asm for native_sched_clock() I noticed we don't inline enough. Mostly caused by sharing code with cyc2ns_read_begin(), which we didn't used to do. So mark all that __force_inline to make it DTRT. Fixes: 59eaef78bfea ("x86/tsc: Remodel cyc2ns to use seqcount_latch()") Reported-by: Eric Dumazet Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: hpa@zytor.com Cc: eric.dumazet@gmail.com Cc: bp@alien8.de Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181011104019.695196158@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/tsc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 0bf06fa3027e..36d02484e384 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -60,7 +60,7 @@ struct cyc2ns { static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); -void cyc2ns_read_begin(struct cyc2ns_data *data) +void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data) { int seq, idx; @@ -77,7 +77,7 @@ void cyc2ns_read_begin(struct cyc2ns_data *data) } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence))); } -void cyc2ns_read_end(void) +void __always_inline cyc2ns_read_end(void) { preempt_enable_notrace(); } @@ -123,7 +123,7 @@ static void cyc2ns_init(int cpu) seqcount_init(&c2n->seq); } -static inline unsigned long long cycles_2_ns(unsigned long long cyc) +static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc) { struct cyc2ns_data data; unsigned long long ns; From 7f6273f584fd74bcee9aa16db3f0e2fbcae48e86 Mon Sep 17 00:00:00 2001 From: Zhimin Gu Date: Fri, 21 Sep 2018 14:26:24 +0800 Subject: [PATCH 0170/2608] x86, hibernate: Fix nosave_regions setup for hibernation commit cc55f7537db6af371e9c1c6a71161ee40f918824 upstream. On 32bit systems, nosave_regions(non RAM areas) located between max_low_pfn and max_pfn are not excluded from hibernation snapshot currently, which may result in a machine check exception when trying to access these unsafe regions during hibernation: [ 612.800453] Disabling lock debugging due to kernel taint [ 612.805786] mce: [Hardware Error]: CPU 0: Machine Check Exception: 5 Bank 6: fe00000000801136 [ 612.814344] mce: [Hardware Error]: RIP !INEXACT! 60:<00000000d90be566> {swsusp_save+0x436/0x560} [ 612.823167] mce: [Hardware Error]: TSC 1f5939fe276 ADDR dd000000 MISC 30e0000086 [ 612.830677] mce: [Hardware Error]: PROCESSOR 0:306c3 TIME 1529487426 SOCKET 0 APIC 0 microcode 24 [ 612.839581] mce: [Hardware Error]: Run the above through 'mcelog --ascii' [ 612.846394] mce: [Hardware Error]: Machine check: Processor context corrupt [ 612.853380] Kernel panic - not syncing: Fatal machine check [ 612.858978] Kernel Offset: 0x18000000 from 0xc1000000 (relocation range: 0xc0000000-0xf7ffdfff) This is because on 32bit systems, pages above max_low_pfn are regarded as high memeory, and accessing unsafe pages might cause expected MCE. On the problematic 32bit system, there are reserved memory above low memory, which triggered the MCE: e820 memory mapping: [ 0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009d7ff] usable [ 0.000000] BIOS-e820: [mem 0x000000000009d800-0x000000000009ffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000000e0000-0x00000000000fffff] reserved [ 0.000000] BIOS-e820: [mem 0x0000000000100000-0x00000000d160cfff] usable [ 0.000000] BIOS-e820: [mem 0x00000000d160d000-0x00000000d1613fff] ACPI NVS [ 0.000000] BIOS-e820: [mem 0x00000000d1614000-0x00000000d1a44fff] usable [ 0.000000] BIOS-e820: [mem 0x00000000d1a45000-0x00000000d1ecffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000d1ed0000-0x00000000d7eeafff] usable [ 0.000000] BIOS-e820: [mem 0x00000000d7eeb000-0x00000000d7ffffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000d8000000-0x00000000d875ffff] usable [ 0.000000] BIOS-e820: [mem 0x00000000d8760000-0x00000000d87fffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000d8800000-0x00000000d8fadfff] usable [ 0.000000] BIOS-e820: [mem 0x00000000d8fae000-0x00000000d8ffffff] ACPI data [ 0.000000] BIOS-e820: [mem 0x00000000d9000000-0x00000000da71bfff] usable [ 0.000000] BIOS-e820: [mem 0x00000000da71c000-0x00000000da7fffff] ACPI NVS [ 0.000000] BIOS-e820: [mem 0x00000000da800000-0x00000000dbb8bfff] usable [ 0.000000] BIOS-e820: [mem 0x00000000dbb8c000-0x00000000dbffffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000dd000000-0x00000000df1fffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000f8000000-0x00000000fbffffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000fec00000-0x00000000fec00fff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000fed00000-0x00000000fed03fff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000fed1c000-0x00000000fed1ffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000fee00000-0x00000000fee00fff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000ff000000-0x00000000ffffffff] reserved [ 0.000000] BIOS-e820: [mem 0x0000000100000000-0x000000041edfffff] usable Fix this problem by changing pfn limit from max_low_pfn to max_pfn. This fix does not impact 64bit system because on 64bit max_low_pfn is the same as max_pfn. Signed-off-by: Zhimin Gu Acked-by: Pavel Machek Signed-off-by: Chen Yu Acked-by: Thomas Gleixner Cc: All applicable Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index dcb00acb6583..4bc12447a50f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1287,7 +1287,7 @@ void __init setup_arch(char **cmdline_p) kvm_guest_init(); e820__reserve_resources(); - e820__register_nosave_regions(max_low_pfn); + e820__register_nosave_regions(max_pfn); x86_init.resources.reserve_resources(); From 577bfbe31fa9da8c8ed1c045a9be8d3cc21f1664 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Oct 2018 12:38:27 +0200 Subject: [PATCH 0171/2608] x86/percpu: Fix this_cpu_read() commit b59167ac7bafd804c91e49ad53c6d33a7394d4c8 upstream. Eric reported that a sequence count loop using this_cpu_read() got optimized out. This is wrong, this_cpu_read() must imply READ_ONCE() because the interface is IRQ-safe, therefore an interrupt can have changed the per-cpu value. Fixes: 7c3576d261ce ("[PATCH] i386: Convert PDA into the percpu section") Reported-by: Eric Dumazet Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Acked-by: Eric Dumazet Cc: hpa@zytor.com Cc: eric.dumazet@gmail.com Cc: bp@alien8.de Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181011104019.748208519@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/percpu.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ba3c523aaf16..12aa2bb6bac4 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -185,22 +185,22 @@ do { \ typeof(var) pfo_ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_arg(1)",%0" \ + asm volatile(op "b "__percpu_arg(1)",%0"\ : "=q" (pfo_ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_arg(1)",%0" \ + asm volatile(op "w "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_arg(1)",%0" \ + asm volatile(op "l "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 8: \ - asm(op "q "__percpu_arg(1)",%0" \ + asm volatile(op "q "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ From 70931375b112460ab15d601a1e89c6f2d0701012 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 12 Oct 2018 17:53:12 -0700 Subject: [PATCH 0172/2608] x86/time: Correct the attribute on jiffies' definition commit 53c13ba8ed39e89f21a0b98f4c8a241bb44e483d upstream. Clang warns that the declaration of jiffies in include/linux/jiffies.h doesn't match the definition in arch/x86/time/kernel.c: arch/x86/kernel/time.c:29:42: warning: section does not match previous declaration [-Wsection] __visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES; ^ ./include/linux/cache.h:49:4: note: expanded from macro '__cacheline_aligned' __section__(".data..cacheline_aligned"))) ^ ./include/linux/jiffies.h:81:31: note: previous attribute is here extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies; ^ ./arch/x86/include/asm/cache.h:20:2: note: expanded from macro '__cacheline_aligned_in_smp' __page_aligned_data ^ ./include/linux/linkage.h:39:29: note: expanded from macro '__page_aligned_data' #define __page_aligned_data __section(.data..page_aligned) __aligned(PAGE_SIZE) ^ ./include/linux/compiler_attributes.h:233:56: note: expanded from macro '__section' #define __section(S) __attribute__((__section__(#S))) ^ 1 warning generated. The declaration was changed in commit 7c30f352c852 ("jiffies.h: declare jiffies and jiffies_64 with ____cacheline_aligned_in_smp") but wasn't updated here. Make them match so Clang no longer warns. Fixes: 7c30f352c852 ("jiffies.h: declare jiffies and jiffies_64 with ____cacheline_aligned_in_smp") Signed-off-by: Nathan Chancellor Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Nick Desaulniers Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181013005311.28617-1-natechancellor@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 49a5c394f3ed..ab0176ae985b 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -25,7 +25,7 @@ #include #ifdef CONFIG_X86_64 -__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES; +__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES; #endif unsigned long profile_pc(struct pt_regs *regs) From ce4454ff2b2de585f059b011dc1b77205f76d607 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 16 Oct 2018 22:25:25 +0200 Subject: [PATCH 0173/2608] x86/fpu: Fix i486 + no387 boot crash by only saving FPU registers on context switch if there is an FPU commit 2224d616528194b02424c91c2ee254b3d29942c3 upstream. Booting an i486 with "no387 nofxsr" ends with with the following crash: math_emulate: 0060:c101987d Kernel panic - not syncing: Math emulation needed in kernel on the first context switch in user land. The reason is that copy_fpregs_to_fpstate() tries FNSAVE which does not work as the FPU is turned off. This bug was introduced in: f1c8cd0176078 ("x86/fpu: Change fpu->fpregs_active users to fpu->fpstate_active") Add a check for X86_FEATURE_FPU before trying to save FPU registers (we have such a check in switch_fpu_finish() already). Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: f1c8cd0176078 ("x86/fpu: Change fpu->fpregs_active users to fpu->fpstate_active") Link: http://lkml.kernel.org/r/20181016202525.29437-4-bigeasy@linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/fpu/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index a38bf5a1e37a..69dcdf195b61 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -528,7 +528,7 @@ static inline void fpregs_activate(struct fpu *fpu) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (old_fpu->initialized) { + if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else From 3a2b1d50bb294b7d5c31308ae4c04a87b6c82176 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 16 Jan 2018 10:33:05 +0100 Subject: [PATCH 0174/2608] net: fs_enet: do not call phy_stop() in interrupts [ Upstream commit f8b39039cbf2a15f2b8c9f081e1cbd5dee00aaf5 ] In case of TX timeout, fs_timeout() calls phy_stop(), which triggers the following BUG_ON() as we are in interrupt. [92708.199889] kernel BUG at drivers/net/phy/mdio_bus.c:482! [92708.204985] Oops: Exception in kernel mode, sig: 5 [#1] [92708.210119] PREEMPT [92708.212107] CMPC885 [92708.214216] CPU: 0 PID: 3 Comm: ksoftirqd/0 Tainted: G W 4.9.61 #39 [92708.223227] task: c60f0a40 task.stack: c6104000 [92708.227697] NIP: c02a84bc LR: c02a947c CTR: c02a93d8 [92708.232614] REGS: c6105c70 TRAP: 0700 Tainted: G W (4.9.61) [92708.241193] MSR: 00021032 [92708.244818] CR: 24000822 XER: 20000000 [92708.248767] GPR00: c02a947c c6105d20 c60f0a40 c62b4c00 00000005 0000001f c069aad8 0001a688 GPR08: 00000007 00000100 c02a93d8 00000000 000005fc 00000000 c6213240 c06338e4 GPR16: 00000001 c06330d4 c0633094 00000000 c0680000 c6104000 c6104000 00000000 GPR24: 00000200 00000000 ffffffff 00000004 00000078 00009032 00000000 c62b4c00 NIP [c02a84bc] mdiobus_read+0x20/0x74 [92708.281517] LR [c02a947c] kszphy_config_intr+0xa4/0xc4 [92708.286547] Call Trace: [92708.288980] [c6105d20] [c6104000] 0xc6104000 (unreliable) [92708.294339] [c6105d40] [c02a947c] kszphy_config_intr+0xa4/0xc4 [92708.300098] [c6105d50] [c02a5330] phy_stop+0x60/0x9c [92708.305007] [c6105d60] [c02c84d0] fs_timeout+0xdc/0x110 [92708.310197] [c6105d80] [c035cd48] dev_watchdog+0x268/0x2a0 [92708.315593] [c6105db0] [c0060288] call_timer_fn+0x34/0x17c [92708.321014] [c6105dd0] [c00605f0] run_timer_softirq+0x21c/0x2e4 [92708.326887] [c6105e50] [c001e19c] __do_softirq+0xf4/0x2f4 [92708.332207] [c6105eb0] [c001e3c8] run_ksoftirqd+0x2c/0x40 [92708.337560] [c6105ec0] [c003b420] smpboot_thread_fn+0x1f0/0x258 [92708.343405] [c6105ef0] [c003745c] kthread+0xbc/0xd0 [92708.348217] [c6105f40] [c000c400] ret_from_kernel_thread+0x5c/0x64 [92708.354275] Instruction dump: [92708.357207] 7c0803a6 bbc10018 38210020 4e800020 7c0802a6 9421ffe0 54290024 bfc10018 [92708.364865] 90010024 7c7f1b78 81290008 552902ee <0f090000> 3bc3002c 7fc3f378 90810008 [92708.372711] ---[ end trace 42b05441616fafd7 ]--- This patch moves fs_timeout() actions into an async worker. Fixes: commit 48257c4f168e5 ("Add fs_enet ethernet network driver, for several embedded platforms") Signed-off-by: Christophe Leroy Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ethernet/freescale/fs_enet/fs_enet-main.c | 16 +++++++++++++--- drivers/net/ethernet/freescale/fs_enet/fs_enet.h | 1 + 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 753259091b22..28bd4cf61741 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -613,9 +613,11 @@ static int fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static void fs_timeout(struct net_device *dev) +static void fs_timeout_work(struct work_struct *work) { - struct fs_enet_private *fep = netdev_priv(dev); + struct fs_enet_private *fep = container_of(work, struct fs_enet_private, + timeout_work); + struct net_device *dev = fep->ndev; unsigned long flags; int wake = 0; @@ -627,7 +629,6 @@ static void fs_timeout(struct net_device *dev) phy_stop(dev->phydev); (*fep->ops->stop)(dev); (*fep->ops->restart)(dev); - phy_start(dev->phydev); } phy_start(dev->phydev); @@ -639,6 +640,13 @@ static void fs_timeout(struct net_device *dev) netif_wake_queue(dev); } +static void fs_timeout(struct net_device *dev) +{ + struct fs_enet_private *fep = netdev_priv(dev); + + schedule_work(&fep->timeout_work); +} + /*----------------------------------------------------------------------------- * generic link-change handler - should be sufficient for most cases *-----------------------------------------------------------------------------*/ @@ -759,6 +767,7 @@ static int fs_enet_close(struct net_device *dev) netif_stop_queue(dev); netif_carrier_off(dev); napi_disable(&fep->napi); + cancel_work_sync(&fep->timeout_work); phy_stop(dev->phydev); spin_lock_irqsave(&fep->lock, flags); @@ -1019,6 +1028,7 @@ static int fs_enet_probe(struct platform_device *ofdev) ndev->netdev_ops = &fs_enet_netdev_ops; ndev->watchdog_timeo = 2 * HZ; + INIT_WORK(&fep->timeout_work, fs_timeout_work); netif_napi_add(ndev, &fep->napi, fs_enet_napi, fpi->napi_weight); ndev->ethtool_ops = &fs_ethtool_ops; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h index 168e10ea487f..837c802ca302 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h @@ -125,6 +125,7 @@ struct fs_enet_private { spinlock_t lock; /* during all ops except TX pckt processing */ spinlock_t tx_lock; /* during fs_start_xmit and fs_tx */ struct fs_platform_info *fpi; + struct work_struct timeout_work; const struct fs_ops *ops; int rx_ring, tx_ring; dma_addr_t ring_mem_addr; From 0b047cbc44ae7d0cea41a99cd7ec1f009360a605 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 10 Nov 2018 07:48:36 -0800 Subject: [PATCH 0175/2608] Linux 4.14.80 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 57a007bf1181..f4cad5e03561 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 79 +SUBLEVEL = 80 EXTRAVERSION = NAME = Petit Gorille From 31354285b2467a5838f131786181dd706b28688b Mon Sep 17 00:00:00 2001 From: Liu Xiang Date: Tue, 28 Aug 2018 22:32:57 +0800 Subject: [PATCH 0176/2608] mtd: spi-nor: fsl-quadspi: fix read error for flash size larger than 16MB commit 41fe242979e463d6ad251077ded01b825a330b7e upstream. If the size of spi-nor flash is larger than 16MB, the read_opcode is set to SPINOR_OP_READ_1_1_4_4B, and fsl_qspi_get_seqid() will return -EINVAL when cmd is SPINOR_OP_READ_1_1_4_4B. This can cause read operation fail. Fixes: e46ecda764dc ("mtd: spi-nor: Add Freescale QuadSPI driver") Cc: Signed-off-by: Liu Xiang Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/fsl-quadspi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c index f17d22435bfc..62f5763482b3 100644 --- a/drivers/mtd/spi-nor/fsl-quadspi.c +++ b/drivers/mtd/spi-nor/fsl-quadspi.c @@ -468,6 +468,7 @@ static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd) { switch (cmd) { case SPINOR_OP_READ_1_1_4: + case SPINOR_OP_READ_1_1_4_4B: return SEQID_READ; case SPINOR_OP_WREN: return SEQID_WREN; From 615bda31cc01cfe937c35c8b969807a38614a0a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 11 Oct 2018 09:42:17 +0200 Subject: [PATCH 0177/2608] spi: bcm-qspi: switch back to reading flash using smaller chunks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 940ec770c295682993d1cccce3081fd7c74fece8 upstream. Fixing/optimizing bcm_qspi_bspi_read() performance introduced two changes: 1) It added a loop to read all requested data using multiple BSPI ops. 2) It bumped max size of a single BSPI block request from 256 to 512 B. The later change resulted in occasional BSPI timeouts causing a regression. For some unknown reason hardware doesn't always handle reads as expected when using 512 B chunks. In such cases it may happen that BSPI returns amount of requested bytes without the last 1-3 ones. It provides the remaining bytes later but doesn't raise an interrupt until another LR start. Switching back to 256 B reads fixes that problem and regression. Fixes: 345309fa7c0c ("spi: bcm-qspi: Fix bcm_qspi_bspi_read() performance") Signed-off-by: Rafał Miłecki Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-bcm-qspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 6573152ce893..0316fae20cfe 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -88,7 +88,7 @@ #define BSPI_BPP_MODE_SELECT_MASK BIT(8) #define BSPI_BPP_ADDR_SELECT_MASK BIT(16) -#define BSPI_READ_LENGTH 512 +#define BSPI_READ_LENGTH 256 /* MSPI register offsets */ #define MSPI_SPCR0_LSB 0x000 From 82239bda9dac002ffb68c6019134c5245aeb4cae Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Mon, 8 Oct 2018 20:41:08 +0800 Subject: [PATCH 0178/2608] bcache: trace missed reading by cache_missed commit 502b291568fc7faf1ebdb2c2590f12851db0ff76 upstream. Missed reading IOs are identified by s->cache_missed, not the s->cache_miss, so in trace_bcache_read() using trace_bcache_read to identify whether the IO is missed or not. Signed-off-by: Tang Junhui Cc: stable@vger.kernel.org Signed-off-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 5b63afff46d5..69b336d8c05a 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -792,7 +792,7 @@ static void cached_dev_read_done_bh(struct closure *cl) bch_mark_cache_accounting(s->iop.c, s->d, !s->cache_missed, s->iop.bypass); - trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass); + trace_bcache_read(s->orig_bio, !s->cache_missed, s->iop.bypass); if (s->iop.status) continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq); From dcd048d6bb08d3da1e9cb1923bce80dbc0f90fed Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Mon, 8 Oct 2018 20:41:14 +0800 Subject: [PATCH 0179/2608] bcache: fix miss key refill->end in writeback commit 2d6cb6edd2c7fb4f40998895bda45006281b1ac5 upstream. refill->end record the last key of writeback, for example, at the first time, keys (1,128K) to (1,1024K) are flush to the backend device, but the end key (1,1024K) is not included, since the bellow code: if (bkey_cmp(k, refill->end) >= 0) { ret = MAP_DONE; goto out; } And in the next time when we refill writeback keybuf again, we searched key start from (1,1024K), and got a key bigger than it, so the key (1,1024K) missed. This patch modify the above code, and let the end key to be included to the writeback key buffer. Signed-off-by: Tang Junhui Cc: stable@vger.kernel.org Signed-off-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 89d088cf95d9..9406326216f1 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -2371,7 +2371,7 @@ static int refill_keybuf_fn(struct btree_op *op, struct btree *b, struct keybuf *buf = refill->buf; int ret = MAP_CONTINUE; - if (bkey_cmp(k, refill->end) >= 0) { + if (bkey_cmp(k, refill->end) > 0) { ret = MAP_DONE; goto out; } From 0be4ef1290cfe370caf52e3dac2325cf7ee12007 Mon Sep 17 00:00:00 2001 From: Dmitry Bazhenov Date: Mon, 15 Oct 2018 14:21:22 +0500 Subject: [PATCH 0180/2608] hwmon: (pmbus) Fix page count auto-detection. commit e7c6a55606b5c46b449d76588968b4d8caae903f upstream. Devices with compatible="pmbus" field have zero initial page count, and pmbus_clear_faults() being called before the page count auto- detection does not actually clear faults because it depends on the page count. Non-cleared faults in its turn may fail the subsequent page count auto-detection. This patch fixes this problem by calling pmbus_clear_fault_page() for currently set page and calling pmbus_clear_faults() after the page count was detected. Cc: stable@vger.kernel.org Signed-off-by: Dmitry Bazhenov Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/pmbus/pmbus.c | 2 ++ drivers/hwmon/pmbus/pmbus_core.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/pmbus.c b/drivers/hwmon/pmbus/pmbus.c index 7718e58dbda5..7688dab32f6e 100644 --- a/drivers/hwmon/pmbus/pmbus.c +++ b/drivers/hwmon/pmbus/pmbus.c @@ -118,6 +118,8 @@ static int pmbus_identify(struct i2c_client *client, } else { info->pages = 1; } + + pmbus_clear_faults(client); } if (pmbus_check_byte_register(client, 0, PMBUS_VOUT_MODE)) { diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index a139940cd991..924f3ca41c65 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -1802,7 +1802,10 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data, if (ret >= 0 && (ret & PB_CAPABILITY_ERROR_CHECK)) client->flags |= I2C_CLIENT_PEC; - pmbus_clear_faults(client); + if (data->info->pages) + pmbus_clear_faults(client); + else + pmbus_clear_fault_page(client, -1); if (info->identify) { ret = (*info->identify)(client, info); From 0035cb9d7366487205aba8cc0a2c03dd9dd75742 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Sat, 6 Oct 2018 17:09:35 +0800 Subject: [PATCH 0181/2608] jffs2: free jffs2_sb_info through jffs2_kill_sb() commit 92e2921f7eee63450a5f953f4b15dc6210219430 upstream. When an invalid mount option is passed to jffs2, jffs2_parse_options() will fail and jffs2_sb_info will be freed, but then jffs2_sb_info will be used (use-after-free) and freeed (double-free) in jffs2_kill_sb(). Fix it by removing the buggy invocation of kfree() when getting invalid mount options. Fixes: 92abc475d8de ("jffs2: implement mount option parsing and compression overriding") Cc: stable@kernel.org Signed-off-by: Hou Tao Reviewed-by: Richard Weinberger Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/super.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 33e01de576d2..bc00cc385b77 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -285,10 +285,8 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = c; ret = jffs2_parse_options(c, data); - if (ret) { - kfree(c); + if (ret) return -EINVAL; - } /* Initialize JFFS2 superblock locks, the further initialization will * be done later */ From f721267b1f28c13efab07bc80c28753e9fbec00e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Oct 2018 23:21:05 +0200 Subject: [PATCH 0182/2608] cpufreq: conservative: Take limits changes into account properly commit da5e79bc70b84971d2b3a55fb252e34e51d81d48 upstream. If the policy limits change between invocations of cs_dbs_update(), the requested frequency value stored in dbs_info may not be updated and the function may use a stale value of it next time. Moreover, if idle periods are takem into account by cs_dbs_update(), the requested frequency value stored in dbs_info may be below the min policy limit, which is incorrect. To fix these problems, always update the requested frequency value in dbs_info along with the local copy of it when the previous requested frequency is beyond the policy limits and avoid decreasing the requested frequency below the min policy limit when taking idle periods into account. Fixes: abb6627910a1 (cpufreq: conservative: Fix next frequency selection) Fixes: 00bfe05889e9 (cpufreq: conservative: Decrease frequency faster for deferred updates) Reported-by: Waldemar Rymarkiewicz Cc: All applicable Signed-off-by: Rafael J. Wysocki Acked-by: Waldemar Rymarkiewicz Acked-by: Viresh Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq_conservative.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index f20f20a77d4d..4268f87e99fc 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -80,8 +80,10 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy) * changed in the meantime, so fall back to current frequency in that * case. */ - if (requested_freq > policy->max || requested_freq < policy->min) + if (requested_freq > policy->max || requested_freq < policy->min) { requested_freq = policy->cur; + dbs_info->requested_freq = requested_freq; + } freq_step = get_freq_step(cs_tuners, policy); @@ -92,7 +94,7 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy) if (policy_dbs->idle_periods < UINT_MAX) { unsigned int freq_steps = policy_dbs->idle_periods * freq_step; - if (requested_freq > freq_steps) + if (requested_freq > policy->min + freq_steps) requested_freq -= freq_steps; else requested_freq = policy->min; From e3e48da87330397de4e50b9958e5694a6764e38a Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Sun, 9 Sep 2018 01:21:06 +0200 Subject: [PATCH 0183/2608] pcmcia: Implement CLKRUN protocol disabling for Ricoh bridges commit 95691e3eddc41da2d1cd3cca51fecdfb46bd85bc upstream. Currently, "disable_clkrun" yenta_socket module parameter is only implemented for TI CardBus bridges. Add also an implementation for Ricoh bridges that have the necessary setting documented in publicly available datasheets. Tested on a RL5C476II with a Sunrich C-160 CardBus NIC that doesn't work correctly unless the CLKRUN protocol is disabled. Let's also make it clear in its description that the "disable_clkrun" module parameter only works on these two previously mentioned brands of CardBus bridges. Signed-off-by: Maciej S. Szmigiero Cc: stable@vger.kernel.org Signed-off-by: Dominik Brodowski Signed-off-by: Greg Kroah-Hartman --- drivers/pcmcia/ricoh.h | 35 +++++++++++++++++++++++++++++++++++ drivers/pcmcia/yenta_socket.c | 3 ++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/pcmcia/ricoh.h b/drivers/pcmcia/ricoh.h index 01098c841f87..8ac7b138c094 100644 --- a/drivers/pcmcia/ricoh.h +++ b/drivers/pcmcia/ricoh.h @@ -119,6 +119,10 @@ #define RL5C4XX_MISC_CONTROL 0x2F /* 8 bit */ #define RL5C4XX_ZV_ENABLE 0x08 +/* Misc Control 3 Register */ +#define RL5C4XX_MISC3 0x00A2 /* 16 bit */ +#define RL5C47X_MISC3_CB_CLKRUN_DIS BIT(1) + #ifdef __YENTA_H #define rl_misc(socket) ((socket)->private[0]) @@ -156,6 +160,35 @@ static void ricoh_set_zv(struct yenta_socket *socket) } } +static void ricoh_set_clkrun(struct yenta_socket *socket, bool quiet) +{ + u16 misc3; + + /* + * RL5C475II likely has this setting, too, however no datasheet + * is publicly available for this chip + */ + if (socket->dev->device != PCI_DEVICE_ID_RICOH_RL5C476 && + socket->dev->device != PCI_DEVICE_ID_RICOH_RL5C478) + return; + + if (socket->dev->revision < 0x80) + return; + + misc3 = config_readw(socket, RL5C4XX_MISC3); + if (misc3 & RL5C47X_MISC3_CB_CLKRUN_DIS) { + if (!quiet) + dev_dbg(&socket->dev->dev, + "CLKRUN feature already disabled\n"); + } else if (disable_clkrun) { + if (!quiet) + dev_info(&socket->dev->dev, + "Disabling CLKRUN feature\n"); + misc3 |= RL5C47X_MISC3_CB_CLKRUN_DIS; + config_writew(socket, RL5C4XX_MISC3, misc3); + } +} + static void ricoh_save_state(struct yenta_socket *socket) { rl_misc(socket) = config_readw(socket, RL5C4XX_MISC); @@ -172,6 +205,7 @@ static void ricoh_restore_state(struct yenta_socket *socket) config_writew(socket, RL5C4XX_16BIT_IO_0, rl_io(socket)); config_writew(socket, RL5C4XX_16BIT_MEM_0, rl_mem(socket)); config_writew(socket, RL5C4XX_CONFIG, rl_config(socket)); + ricoh_set_clkrun(socket, true); } @@ -197,6 +231,7 @@ static int ricoh_override(struct yenta_socket *socket) config_writew(socket, RL5C4XX_CONFIG, config); ricoh_set_zv(socket); + ricoh_set_clkrun(socket, false); return 0; } diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c index 5d6d9b1549bc..5034422a1d96 100644 --- a/drivers/pcmcia/yenta_socket.c +++ b/drivers/pcmcia/yenta_socket.c @@ -26,7 +26,8 @@ static bool disable_clkrun; module_param(disable_clkrun, bool, 0444); -MODULE_PARM_DESC(disable_clkrun, "If PC card doesn't function properly, please try this option"); +MODULE_PARM_DESC(disable_clkrun, + "If PC card doesn't function properly, please try this option (TI and Ricoh bridges only)"); static bool isa_probe = 1; module_param(isa_probe, bool, 0444); From bbde82fdb6d083d6e4c2b33c65e8752f12ce03e1 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Thu, 11 Oct 2018 12:13:01 +0200 Subject: [PATCH 0184/2608] ipmi: Fix timer race with module unload commit 0711e8c1b4572d076264e71b0002d223f2666ed7 upstream. Please note that below oops is from an older kernel, but the same race seems to be present in the upstream kernel too. ---8<--- The following panic was encountered during removing the ipmi_ssif module: [ 526.352555] Unable to handle kernel paging request at virtual address ffff000006923090 [ 526.360464] Mem abort info: [ 526.363257] ESR = 0x86000007 [ 526.366304] Exception class = IABT (current EL), IL = 32 bits [ 526.372221] SET = 0, FnV = 0 [ 526.375269] EA = 0, S1PTW = 0 [ 526.378405] swapper pgtable: 4k pages, 48-bit VAs, pgd = 000000008ae60416 [ 526.385185] [ffff000006923090] *pgd=000000bffcffe803, *pud=000000bffcffd803, *pmd=0000009f4731a003, *pte=0000000000000000 [ 526.396141] Internal error: Oops: 86000007 [#1] SMP [ 526.401008] Modules linked in: nls_iso8859_1 ipmi_devintf joydev input_leds ipmi_msghandler shpchp sch_fq_codel ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 btrfs zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear i2c_smbus hid_generic usbhid uas hid usb_storage ast aes_ce_blk i2c_algo_bit aes_ce_cipher qede ttm crc32_ce ptp crct10dif_ce drm_kms_helper ghash_ce syscopyarea sha2_ce sysfillrect sysimgblt pps_core fb_sys_fops sha256_arm64 sha1_ce mpt3sas qed drm raid_class ahci scsi_transport_sas libahci gpio_xlp i2c_xlp9xx aes_neon_bs aes_neon_blk crypto_simd cryptd aes_arm64 [last unloaded: ipmi_ssif] [ 526.468085] CPU: 125 PID: 0 Comm: swapper/125 Not tainted 4.15.0-35-generic #38~lp1775396+build.1 [ 526.476942] Hardware name: To be filled by O.E.M. Saber/Saber, BIOS 0ACKL022 08/14/2018 [ 526.484932] pstate: 00400009 (nzcv daif +PAN -UAO) [ 526.489713] pc : 0xffff000006923090 [ 526.493198] lr : call_timer_fn+0x34/0x178 [ 526.497194] sp : ffff000009b0bdd0 [ 526.500496] x29: ffff000009b0bdd0 x28: 0000000000000082 [ 526.505796] x27: 0000000000000002 x26: ffff000009515188 [ 526.511096] x25: ffff000009515180 x24: ffff0000090f1018 [ 526.516396] x23: ffff000009519660 x22: dead000000000200 [ 526.521696] x21: ffff000006923090 x20: 0000000000000100 [ 526.526995] x19: ffff809eeb466a40 x18: 0000000000000000 [ 526.532295] x17: 000000000000000e x16: 0000000000000007 [ 526.537594] x15: 0000000000000000 x14: 071c71c71c71c71c [ 526.542894] x13: 0000000000000000 x12: 0000000000000000 [ 526.548193] x11: 0000000000000001 x10: ffff000009b0be88 [ 526.553493] x9 : 0000000000000000 x8 : 0000000000000005 [ 526.558793] x7 : ffff80befc1f8528 x6 : 0000000000000020 [ 526.564092] x5 : 0000000000000040 x4 : 0000000020001b20 [ 526.569392] x3 : 0000000000000000 x2 : ffff809eeb466a40 [ 526.574692] x1 : ffff000006923090 x0 : ffff809eeb466a40 [ 526.579992] Process swapper/125 (pid: 0, stack limit = 0x000000002eb50acc) [ 526.586854] Call trace: [ 526.589289] 0xffff000006923090 [ 526.592419] expire_timers+0xc8/0x130 [ 526.596070] run_timer_softirq+0xec/0x1b0 [ 526.600070] __do_softirq+0x134/0x328 [ 526.603726] irq_exit+0xc8/0xe0 [ 526.606857] __handle_domain_irq+0x6c/0xc0 [ 526.610941] gic_handle_irq+0x84/0x188 [ 526.614679] el1_irq+0xe8/0x180 [ 526.617822] cpuidle_enter_state+0xa0/0x328 [ 526.621993] cpuidle_enter+0x34/0x48 [ 526.625564] call_cpuidle+0x44/0x70 [ 526.629040] do_idle+0x1b8/0x1f0 [ 526.632256] cpu_startup_entry+0x2c/0x30 [ 526.636174] secondary_start_kernel+0x11c/0x130 [ 526.640694] Code: bad PC value [ 526.643800] ---[ end trace d020b0b8417c2498 ]--- [ 526.648404] Kernel panic - not syncing: Fatal exception in interrupt [ 526.654778] SMP: stopping secondary CPUs [ 526.658734] Kernel Offset: disabled [ 526.662211] CPU features: 0x5800c38 [ 526.665688] Memory Limit: none [ 526.668768] ---[ end Kernel panic - not syncing: Fatal exception in interrupt Prevent mod_timer from arming a timer that was already removed by del_timer during module unload. Signed-off-by: Jan Glauber Cc: # 3.19 Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_ssif.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 932678617dfa..0904ab442d31 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -621,8 +621,9 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, flags = ipmi_ssif_lock_cond(ssif_info, &oflags); ssif_info->waiting_alert = true; ssif_info->rtc_us_timer = SSIF_MSG_USEC; - mod_timer(&ssif_info->retry_timer, - jiffies + SSIF_MSG_JIFFIES); + if (!ssif_info->stopping) + mod_timer(&ssif_info->retry_timer, + jiffies + SSIF_MSG_JIFFIES); ipmi_ssif_unlock_cond(ssif_info, flags); return; } @@ -954,8 +955,9 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, ssif_info->waiting_alert = true; ssif_info->retries_left = SSIF_RECV_RETRIES; ssif_info->rtc_us_timer = SSIF_MSG_PART_USEC; - mod_timer(&ssif_info->retry_timer, - jiffies + SSIF_MSG_PART_JIFFIES); + if (!ssif_info->stopping) + mod_timer(&ssif_info->retry_timer, + jiffies + SSIF_MSG_PART_JIFFIES); ipmi_ssif_unlock_cond(ssif_info, flags); } } From 472e300014bc0b3dde10b7ff224197c678a6d3d9 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sat, 6 Oct 2018 13:11:30 -0400 Subject: [PATCH 0185/2608] parisc: Fix address in HPMC IVA commit 1138b6718ff74d2a934459643e3754423d23b5e2 upstream. Helge noticed that the address of the os_hpmc handler was not being correctly calculated in the hpmc macro. As a result, PDCE_CHECK would fail to call os_hpmc: e800009802e00000 0000000000000000 CC_ERR_CHECK_HPMC 37000f7302e00000 8040004000000000 CC_ERR_CPU_CHECK_SUMMARY f600105e02e00000 fffffff0f0c00000 CC_MC_HPMC_MONARCH_SELECTED 140003b202e00000 000000000000000b CC_ERR_HPMC_STATE_ENTRY 5600100b02e00000 00000000000001a0 CC_MC_OS_HPMC_LEN_ERR 5600106402e00000 fffffff0f0438e70 CC_MC_BR_TO_OS_HPMC_FAILED e800009802e00000 0000000000000000 CC_ERR_CHECK_HPMC 37000f7302e00000 8040004000000000 CC_ERR_CPU_CHECK_SUMMARY 4000109f02e00000 0000000000000000 CC_MC_HPMC_INITIATED 4000101902e00000 0000000000000000 CC_MC_MULTIPLE_HPMCS 030010d502e00000 0000000000000000 CC_CPU_STOP The address problem can be seen by dumping the fault vector: 0000000040159000 : 40159000: 63 6f 77 73 stb r15,-2447(dp) 40159004: 20 63 61 6e ldil L%b747000,r3 40159008: 20 66 6c 79 ldil L%-1c3b3000,r3 ... 40159020: 08 00 02 40 nop 40159024: 20 6e 60 02 ldil L%15d000,r3 40159028: 34 63 00 00 ldo 0(r3),r3 4015902c: e8 60 c0 02 bv,n r0(r3) 40159030: 08 00 02 40 nop 40159034: 00 00 00 00 break 0,0 40159038: c0 00 70 00 bb,*< r0,sar,40159840 4015903c: 00 00 00 00 break 0,0 Location 40159038 should contain the physical address of os_hpmc: 000000004015d000 : 4015d000: 08 1a 02 43 copy r26,r3 4015d004: 01 c0 08 a4 mfctl iva,r4 4015d008: 48 85 00 68 ldw 34(r4),r5 This patch moves the address setup into initialize_ivt to resolve the above problem. I tested the change by dumping the HPMC entry after setup: 0000000040209020: 8000240 0000000040209024: 206a2004 0000000040209028: 34630ac0 000000004020902c: e860c002 0000000040209030: 8000240 0000000040209034: 1bdddce6 0000000040209038: 15d000 000000004020903c: 1a0 Signed-off-by: John David Anglin Cc: Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/entry.S | 2 +- arch/parisc/kernel/traps.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 1b4732e20137..843825a7e6e2 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -185,7 +185,7 @@ bv,n 0(%r3) nop .word 0 /* checksum (will be patched) */ - .word PA(os_hpmc) /* address of handler */ + .word 0 /* address of handler */ .word 0 /* length of handler */ .endm diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 8453724b8009..9a898d68f4a0 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -836,7 +836,8 @@ void __init initialize_ivt(const void *iva) if (pdc_instr(&instr) == PDC_OK) ivap[0] = instr; - /* Compute Checksum for HPMC handler */ + /* Setup IVA and compute checksum for HPMC handler */ + ivap[6] = (u32)__pa(os_hpmc); length = os_hpmc_size; ivap[7] = length; From e5475f5fc5f881d547a16cc7c1249e50a48a1a80 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 12 Oct 2018 22:37:46 +0200 Subject: [PATCH 0186/2608] parisc: Fix map_pages() to not overwrite existing pte entries commit 3c229b3f2dd8133f61bb81d3cb018be92f4bba39 upstream. Fix a long-existing small nasty bug in the map_pages() implementation which leads to overwriting already written pte entries with zero, *if* map_pages() is called a second time with an end address which isn't aligned on a pmd boundry. This happens for example if we want to remap only the text segment read/write in order to run alternative patching on the code. Exiting the loop when we reach the end address fixes this. Cc: stable@vger.kernel.org Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/mm/init.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 13f7854e0d49..cc700f7dda54 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -495,12 +495,8 @@ static void __init map_pages(unsigned long start_vaddr, pte = pte_mkhuge(pte); } - if (address >= end_paddr) { - if (force) - break; - else - pte_val(pte) = 0; - } + if (address >= end_paddr) + break; set_pte(pg_table, pte); From d665f97f859a355618653c016c824e82ec29d288 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 14 Oct 2018 21:58:00 +0200 Subject: [PATCH 0187/2608] parisc: Fix exported address of os_hpmc handler commit 99a3ae51d557d8e38a7aece65678a31f9db215ee upstream. In the C-code we need to put the physical address of the hpmc handler in the interrupt vector table (IVA) in order to get HPMCs working. Since on parisc64 function pointers are indirect (in fact they are function descriptors) we instead export the address as variable and not as function. This reverts a small part of commit f39cce654f9a ("parisc: Add cfi_startproc and cfi_endproc to assembly code"). Signed-off-by: Helge Deller Cc: [4.9+] Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/hpmc.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S index 781c3b9a3e46..fde654115564 100644 --- a/arch/parisc/kernel/hpmc.S +++ b/arch/parisc/kernel/hpmc.S @@ -85,7 +85,7 @@ END(hpmc_pim_data) .import intr_save, code .align 16 -ENTRY_CFI(os_hpmc) +ENTRY(os_hpmc) .os_hpmc: /* @@ -302,7 +302,6 @@ os_hpmc_6: b . nop .align 16 /* make function length multiple of 16 bytes */ -ENDPROC_CFI(os_hpmc) .os_hpmc_end: From f80215f62eb7f82247b51cf1f098a4ee2643d8fe Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 7 Oct 2018 09:44:17 +0200 Subject: [PATCH 0188/2608] ALSA: hda - Add quirk for ASUS G751 laptop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 11ba6111160290ccd35562f4e05cec08942a6c4c upstream. ASUS G751 requires the extra COEF initialization to make it microphone working properly. Reported-and-tested-by: Håvard Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fe5c741fcc6a..201d4f55b6c2 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7515,6 +7515,7 @@ enum { ALC662_FIXUP_ASUS_Nx50, ALC668_FIXUP_ASUS_Nx51_HEADSET_MODE, ALC668_FIXUP_ASUS_Nx51, + ALC668_FIXUP_ASUS_G751, ALC891_FIXUP_HEADSET_MODE, ALC891_FIXUP_DELL_MIC_NO_PRESENCE, ALC662_FIXUP_ACER_VERITON, @@ -7784,6 +7785,14 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC668_FIXUP_ASUS_Nx51_HEADSET_MODE, }, + [ALC668_FIXUP_ASUS_G751] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x20, AC_VERB_SET_COEF_INDEX, 0xc3 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x4000 }, + {} + }, + }, [ALC891_FIXUP_HEADSET_MODE] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode, @@ -7857,6 +7866,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50), SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A), SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50), + SND_PCI_QUIRK(0x1043, 0x12ff, "ASUS G751", ALC668_FIXUP_ASUS_G751), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x177d, "ASUS N551", ALC668_FIXUP_ASUS_Nx51), From 2ad912e2b2c18784b52de195d403dc269aecfd8f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 9 Oct 2018 14:20:17 +0200 Subject: [PATCH 0189/2608] ALSA: hda - Fix headphone pin config for ASUS G751 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5b7c5e1f4c36b99d0f694f38b9ad910f520cb7ef upstream. BIOS on ASUS G751 doesn't seem to map the headphone pin (NID 0x16) correctly. Add a quirk to address it, as well as chaining to the previous fix for the microphone. Reported-by: Håvard Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 201d4f55b6c2..e625eee03c9d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7515,6 +7515,7 @@ enum { ALC662_FIXUP_ASUS_Nx50, ALC668_FIXUP_ASUS_Nx51_HEADSET_MODE, ALC668_FIXUP_ASUS_Nx51, + ALC668_FIXUP_MIC_COEF, ALC668_FIXUP_ASUS_G751, ALC891_FIXUP_HEADSET_MODE, ALC891_FIXUP_DELL_MIC_NO_PRESENCE, @@ -7785,7 +7786,7 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC668_FIXUP_ASUS_Nx51_HEADSET_MODE, }, - [ALC668_FIXUP_ASUS_G751] = { + [ALC668_FIXUP_MIC_COEF] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { { 0x20, AC_VERB_SET_COEF_INDEX, 0xc3 }, @@ -7793,6 +7794,15 @@ static const struct hda_fixup alc662_fixups[] = { {} }, }, + [ALC668_FIXUP_ASUS_G751] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x16, 0x0421101f }, /* HP */ + {} + }, + .chained = true, + .chain_id = ALC668_FIXUP_MIC_COEF + }, [ALC891_FIXUP_HEADSET_MODE] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode, From 7b50b3d70994010c478f74b3ae15389c4b974272 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 10 Oct 2018 11:57:25 +0800 Subject: [PATCH 0190/2608] ALSA: hda/realtek - Fix the problem of the front MIC on the Lenovo M715 commit d06fb562bff5d14defdacbd92449bacbaedd5cdf upstream. The front MIC on the Lenovo M715 can't record sound, after applying the ALC294_FIXUP_LENOVO_MIC_LOCATION, the problem is fixed. So add the pin configuration of this machine to the pin quirk table. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e625eee03c9d..eb8807de3ebc 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6629,6 +6629,12 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x1a, 0x02a11040}, {0x1b, 0x01014020}, {0x21, 0x0221101f}), + SND_HDA_PIN_QUIRK(0x10ec0235, 0x17aa, "Lenovo", ALC294_FIXUP_LENOVO_MIC_LOCATION, + {0x14, 0x90170110}, + {0x19, 0x02a11030}, + {0x1a, 0x02a11040}, + {0x1b, 0x01011020}, + {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0235, 0x17aa, "Lenovo", ALC294_FIXUP_LENOVO_MIC_LOCATION, {0x14, 0x90170110}, {0x19, 0x02a11020}, From e0404d81ab842b8d4246662c1c46bb20525dd743 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Thu, 11 Oct 2018 15:49:17 -0400 Subject: [PATCH 0191/2608] ALSA: hda - Add mic quirk for the Lenovo G50-30 (17aa:3905) commit e7bb6ad5685f05685dd8a6a5eda7bfcd14d5f95b upstream. The Lenovo G50-30, like other G50 models, has a Conexant codec that requires a quirk for its inverted stereo dmic. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1249364 Reported-by: Alexander Ploumistos Tested-by: Alexander Ploumistos Cc: stable@vger.kernel.org Signed-off-by: Jeremy Cline Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 16197ad4512a..0cc0ced1f2ed 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -981,6 +981,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x17aa, 0x21da, "Lenovo X220", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21db, "Lenovo X220-tablet", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo IdeaPad Z560", CXT_FIXUP_MUTE_LED_EAPD), + SND_PCI_QUIRK(0x17aa, 0x3905, "Lenovo G50-30", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x390b, "Lenovo G50-80", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3975, "Lenovo U300s", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_FIXUP_STEREO_DMIC), From a436f6b2db64cd9e853ee6b54fba0499d8f80f93 Mon Sep 17 00:00:00 2001 From: Alex Stanoev Date: Sun, 28 Oct 2018 16:55:12 +0000 Subject: [PATCH 0192/2608] ALSA: ca0106: Disable IZD on SB0570 DAC to fix audio pops commit ac237c28d5ac1b241d58b1b7b4b9fa10efb22fb5 upstream. The Creative Audigy SE (SB0570) card currently exhibits an audible pop whenever playback is stopped or resumed, or during silent periods of an audio stream. Initialise the IZD bit to the 0 to eliminate these pops. The Infinite Zero Detection (IZD) feature on the DAC causes the output to be shunted to Vcap after 2048 samples of silence. This discharges the AC coupling capacitor through the output and causes the aforementioned pop/click noise. The behaviour of the IZD bit is described on page 15 of the WM8768GEDS datasheet: "With IZD=1, applying MUTE for 1024 consecutive input samples will cause all outputs to be connected directly to VCAP. This also happens if 2048 consecutive zero input samples are applied to all 6 channels, and IZD=0. It will be removed as soon as any channel receives a non-zero input". I believe the second sentence might be referring to IZD=1 instead of IZD=0 given the observed behaviour of the card. This change should make the DAC initialisation consistent with Creative's Windows driver, as this popping persists when initialising the card in Linux and soft rebooting into Windows, but is not present on a cold boot to Windows. Signed-off-by: Alex Stanoev Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/ca0106/ca0106.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/ca0106/ca0106.h b/sound/pci/ca0106/ca0106.h index 04402c14cb23..9847b669cf3c 100644 --- a/sound/pci/ca0106/ca0106.h +++ b/sound/pci/ca0106/ca0106.h @@ -582,7 +582,7 @@ #define SPI_PL_BIT_R_R (2<<7) /* right channel = right */ #define SPI_PL_BIT_R_C (3<<7) /* right channel = (L+R)/2 */ #define SPI_IZD_REG 2 -#define SPI_IZD_BIT (1<<4) /* infinite zero detect */ +#define SPI_IZD_BIT (0<<4) /* infinite zero detect */ #define SPI_FMT_REG 3 #define SPI_FMT_BIT_RJ (0<<0) /* right justified mode */ From 8a13906ae519b3ed95cd0fb73f1098b46362f6c4 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 25 Sep 2018 14:38:55 +0200 Subject: [PATCH 0193/2608] x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation commit 53c613fe6349994f023245519265999eed75957f upstream. STIBP is a feature provided by certain Intel ucodes / CPUs. This feature (once enabled) prevents cross-hyperthread control of decisions made by indirect branch predictors. Enable this feature if - the CPU is vulnerable to spectre v2 - the CPU supports SMT and has SMT siblings online - spectre_v2 mitigation autoselection is enabled (default) After some previous discussion, this leaves STIBP on all the time, as wrmsr on crossing kernel boundary is a no-no. This could perhaps later be a bit more optimized (like disabling it in NOHZ, experiment with disabling it in idle, etc) if needed. Note that the synchronization of the mask manipulation via newly added spec_ctrl_mutex is currently not strictly needed, as the only updater is already being serialized by cpu_add_remove_lock, but let's make this a little bit more future-proof. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: "WoodhouseDavid" Cc: Andi Kleen Cc: Tim Chen Cc: "SchauflerCasey" Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1809251438240.15880@cbobk.fhfr.pm Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 57 ++++++++++++++++++++++++++++++++++---- kernel/cpu.c | 11 +++++++- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3e435f88621d..90fcf16249ea 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -34,12 +34,10 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); -/* - * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any - * writes to SPEC_CTRL contain whatever reserved bits have been set. - */ -u64 __ro_after_init x86_spec_ctrl_base; +/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ +u64 x86_spec_ctrl_base; EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); +static DEFINE_MUTEX(spec_ctrl_mutex); /* * The vendor and possibly platform specific bits which can be modified in @@ -322,6 +320,46 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } +static bool stibp_needed(void) +{ + if (spectre_v2_enabled == SPECTRE_V2_NONE) + return false; + + if (!boot_cpu_has(X86_FEATURE_STIBP)) + return false; + + return true; +} + +static void update_stibp_msr(void *info) +{ + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); +} + +void arch_smt_update(void) +{ + u64 mask; + + if (!stibp_needed()) + return; + + mutex_lock(&spec_ctrl_mutex); + mask = x86_spec_ctrl_base; + if (cpu_smt_control == CPU_SMT_ENABLED) + mask |= SPEC_CTRL_STIBP; + else + mask &= ~SPEC_CTRL_STIBP; + + if (mask != x86_spec_ctrl_base) { + pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", + cpu_smt_control == CPU_SMT_ENABLED ? + "Enabling" : "Disabling"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); + } + mutex_unlock(&spec_ctrl_mutex); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -406,6 +444,9 @@ retpoline_auto: setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } + + /* Enable STIBP if appropriate */ + arch_smt_update(); } #undef pr_fmt @@ -798,6 +839,8 @@ static ssize_t l1tf_show_state(char *buf) static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { + int ret; + if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); @@ -812,10 +855,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + ret = sprintf(buf, "%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", spectre_v2_module_string()); + return ret; case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); diff --git a/kernel/cpu.c b/kernel/cpu.c index f3f389e33343..90cf6a04e08a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2045,6 +2045,12 @@ static void cpuhp_online_cpu_device(unsigned int cpu) kobject_uevent(&dev->kobj, KOBJ_ONLINE); } +/* + * Architectures that need SMT-specific errata handling during SMT hotplug + * should override this. + */ +void __weak arch_smt_update(void) { }; + static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { int cpu, ret = 0; @@ -2071,8 +2077,10 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) */ cpuhp_offline_cpu_device(cpu); } - if (!ret) + if (!ret) { cpu_smt_control = ctrlval; + arch_smt_update(); + } cpu_maps_update_done(); return ret; } @@ -2083,6 +2091,7 @@ static int cpuhp_smt_enable(void) cpu_maps_update_begin(); cpu_smt_control = CPU_SMT_ENABLED; + arch_smt_update(); for_each_present_cpu(cpu) { /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) From ed2df6687223bd1d7f82b19cbd52d79c4263ff0f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 10 Oct 2018 08:14:54 +0200 Subject: [PATCH 0194/2608] x86/xen: Fix boot loader version reported for PVH guests commit 357d291ce035d1b757568058f3c9898c60d125b1 upstream. The boot loader version reported via sysfs is wrong in case of the kernel being booted via the Xen PVH boot entry. it should be 2.12 (0x020c), but it is reported to be 2.18 (0x0212). As the current way to set the version is error prone use the more readable variant (2 << 8) | 12. Signed-off-by: Juergen Gross Cc: # 4.12 Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boris.ostrovsky@oracle.com Cc: bp@alien8.de Cc: corbet@lwn.net Cc: linux-doc@vger.kernel.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20181010061456.22238-2-jgross@suse.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/enlighten_pvh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 7bd3ee08393e..d6d7b29b3be0 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -76,7 +76,7 @@ static void __init init_pvh_bootparams(void) * Version 2.12 supports Xen entry point but we will use default x86/PC * environment (i.e. hardware_subarch 0). */ - pvh_bootparams.hdr.version = 0x212; + pvh_bootparams.hdr.version = (2 << 8) | 12; pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */ } From f2e58b0414523cac77c52ab1f7b4f470e6e69ada Mon Sep 17 00:00:00 2001 From: He Zhe Date: Tue, 14 Aug 2018 23:33:42 +0800 Subject: [PATCH 0195/2608] x86/corruption-check: Fix panic in memory_corruption_check() when boot option without value is provided commit ccde460b9ae5c2bd5e4742af0a7f623c2daad566 upstream. memory_corruption_check[{_period|_size}]()'s handlers do not check input argument before passing it to kstrtoul() or simple_strtoull(). The argument would be a NULL pointer if each of the kernel parameters, without its value, is set in command line and thus cause the following panic. PANIC: early exception 0xe3 IP 10:ffffffff73587c22 error 0 cr2 0x0 [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 4.18-rc8+ #2 [ 0.000000] RIP: 0010:kstrtoull+0x2/0x10 ... [ 0.000000] Call Trace [ 0.000000] ? set_corruption_check+0x21/0x49 [ 0.000000] ? do_early_param+0x4d/0x82 [ 0.000000] ? parse_args+0x212/0x330 [ 0.000000] ? rdinit_setup+0x26/0x26 [ 0.000000] ? parse_early_options+0x20/0x23 [ 0.000000] ? rdinit_setup+0x26/0x26 [ 0.000000] ? parse_early_param+0x2d/0x39 [ 0.000000] ? setup_arch+0x2f7/0xbf4 [ 0.000000] ? start_kernel+0x5e/0x4c2 [ 0.000000] ? load_ucode_bsp+0x113/0x12f [ 0.000000] ? secondary_startup_64+0xa5/0xb0 This patch adds checks to prevent the panic. Signed-off-by: He Zhe Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: gregkh@linuxfoundation.org Cc: kstewart@linuxfoundation.org Cc: pombredanne@nexb.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1534260823-87917-1-git-send-email-zhe.he@windriver.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/check.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index 33399426793e..cc8258a5378b 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c @@ -31,6 +31,11 @@ static __init int set_corruption_check(char *arg) ssize_t ret; unsigned long val; + if (!arg) { + pr_err("memory_corruption_check config string not provided\n"); + return -EINVAL; + } + ret = kstrtoul(arg, 10, &val); if (ret) return ret; @@ -45,6 +50,11 @@ static __init int set_corruption_check_period(char *arg) ssize_t ret; unsigned long val; + if (!arg) { + pr_err("memory_corruption_check_period config string not provided\n"); + return -EINVAL; + } + ret = kstrtoul(arg, 10, &val); if (ret) return ret; @@ -59,6 +69,11 @@ static __init int set_corruption_check_size(char *arg) char *end; unsigned size; + if (!arg) { + pr_err("memory_corruption_check_size config string not provided\n"); + return -EINVAL; + } + size = memparse(arg, &end); if (*end == '\0') From b80a2dd4d7a1e51ed34c91b4e03e425dc0c21223 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 17 Oct 2018 12:34:32 +0200 Subject: [PATCH 0196/2608] x86/mm/pat: Disable preemption around __flush_tlb_all() commit f77084d96355f5fba8e2c1fb3a51a393b1570de7 upstream. The WARN_ON_ONCE(__read_cr3() != build_cr3()) in switch_mm_irqs_off() triggers every once in a while during a snapshotted system upgrade. The warning triggers since commit decab0888e6e ("x86/mm: Remove preempt_disable/enable() from __native_flush_tlb()"). The callchain is: get_page_from_freelist() -> post_alloc_hook() -> __kernel_map_pages() with CONFIG_DEBUG_PAGEALLOC enabled. Disable preemption during CR3 reset / __flush_tlb_all() and add a comment why preemption has to be disabled so it won't be removed accidentaly. Add another preemptible() check in __flush_tlb_all() to catch callers with enabled preemption when PGE is enabled, because PGE enabled does not trigger the warning in __native_flush_tlb(). Suggested by Andy Lutomirski. Fixes: decab0888e6e ("x86/mm: Remove preempt_disable/enable() from __native_flush_tlb()") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Dave Hansen Cc: Peter Zijlstra Cc: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181017103432.zgv46nlu3hc7k4rq@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/tlbflush.h | 6 ++++++ arch/x86/mm/pageattr.c | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 5f00ecb9d251..2501be609b82 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -466,6 +466,12 @@ static inline void __native_flush_tlb_one_user(unsigned long addr) */ static inline void __flush_tlb_all(void) { + /* + * This is to catch users with enabled preemption and the PGE feature + * and don't trigger the warning in __native_flush_tlb(). + */ + VM_WARN_ON_ONCE(preemptible()); + if (boot_cpu_has(X86_FEATURE_PGE)) { __flush_tlb_global(); } else { diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 464f53da3a6f..835620ab435f 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -2037,9 +2037,13 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) /* * We should perform an IPI and flush all tlbs, - * but that can deadlock->flush only current cpu: + * but that can deadlock->flush only current cpu. + * Preemption needs to be disabled around __flush_tlb_all() due to + * CR3 reload in __native_flush_tlb(). */ + preempt_disable(); __flush_tlb_all(); + preempt_enable(); arch_flush_lazy_mmu_mode(); } From a7051265f1e648d627e684ff903650439b89882c Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Wed, 1 Aug 2018 11:42:25 -0700 Subject: [PATCH 0197/2608] x86/speculation: Support Enhanced IBRS on future CPUs commit 706d51681d636a0c4a5ef53395ec3b803e45ed4d upstream. Future Intel processors will support "Enhanced IBRS" which is an "always on" mode i.e. IBRS bit in SPEC_CTRL MSR is enabled once and never disabled. From the specification [1]: "With enhanced IBRS, the predicted targets of indirect branches executed cannot be controlled by software that was executed in a less privileged predictor mode or on another logical processor. As a result, software operating on a processor with enhanced IBRS need not use WRMSR to set IA32_SPEC_CTRL.IBRS after every transition to a more privileged predictor mode. Software can isolate predictor modes effectively simply by setting the bit once. Software need not disable enhanced IBRS prior to entering a sleep state such as MWAIT or HLT." If Enhanced IBRS is supported by the processor then use it as the preferred spectre v2 mitigation mechanism instead of Retpoline. Intel's Retpoline white paper [2] states: "Retpoline is known to be an effective branch target injection (Spectre variant 2) mitigation on Intel processors belonging to family 6 (enumerated by the CPUID instruction) that do not have support for enhanced IBRS. On processors that support enhanced IBRS, it should be used for mitigation instead of retpoline." The reason why Enhanced IBRS is the recommended mitigation on processors which support it is that these processors also support CET which provides a defense against ROP attacks. Retpoline is very similar to ROP techniques and might trigger false positives in the CET defense. If Enhanced IBRS is selected as the mitigation technique for spectre v2, the IBRS bit in SPEC_CTRL MSR is set once at boot time and never cleared. Kernel also has to make sure that IBRS bit remains set after VMEXIT because the guest might have cleared the bit. This is already covered by the existing x86_spec_ctrl_set_guest() and x86_spec_ctrl_restore_host() speculation control functions. Enhanced IBRS still requires IBPB for full mitigation. [1] Speculative-Execution-Side-Channel-Mitigations.pdf [2] Retpoline-A-Branch-Target-Injection-Mitigation.pdf Both documents are available at: https://bugzilla.kernel.org/show_bug.cgi?id=199511 Originally-by: David Woodhouse Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Thomas Gleixner Cc: Tim C Chen Cc: Dave Hansen Cc: Ravi Shankar Link: https://lkml.kernel.org/r/1533148945-24095-1-git-send-email-sai.praneeth.prakhya@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 20 ++++++++++++++++++-- arch/x86/kernel/cpu/common.c | 3 +++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 8418462298e7..673d6e988196 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -220,6 +220,7 @@ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ +#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8b38df98548e..1b4132161c1f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -215,6 +215,7 @@ enum spectre_v2_mitigation { SPECTRE_V2_RETPOLINE_GENERIC, SPECTRE_V2_RETPOLINE_AMD, SPECTRE_V2_IBRS, + SPECTRE_V2_IBRS_ENHANCED, }; /* The Speculative Store Bypass disable variants */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 90fcf16249ea..aa6e7f75bccc 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -138,6 +138,7 @@ static const char *spectre_v2_strings[] = { [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", + [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", }; #undef pr_fmt @@ -379,6 +380,13 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_CMD_FORCE: case SPECTRE_V2_CMD_AUTO: + if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { + mode = SPECTRE_V2_IBRS_ENHANCED; + /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + goto specv2_set_mode; + } if (IS_ENABLED(CONFIG_RETPOLINE)) goto retpoline_auto; break; @@ -416,6 +424,7 @@ retpoline_auto: setup_force_cpu_cap(X86_FEATURE_RETPOLINE); } +specv2_set_mode: spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); @@ -438,9 +447,16 @@ retpoline_auto: /* * Retpoline means the kernel is safe because it has no indirect - * branches. But firmware isn't, so use IBRS to protect that. + * branches. Enhanced IBRS protects firmware too, so, enable restricted + * speculation around firmware calls only when Enhanced IBRS isn't + * supported. + * + * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because + * the user might select retpoline on the kernel command line and if + * the CPU supports Enhanced IBRS, kernel might un-intentionally not + * enable IBRS around firmware calls. */ - if (boot_cpu_has(X86_FEATURE_IBRS)) { + if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) { setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7d2a7890a823..96643e2c75b8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -967,6 +967,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + if (ia32_cap & ARCH_CAP_IBRS_ALL) + setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); + if (x86_match_cpu(cpu_no_meltdown)) return; From 43c7313ec7cbe3a31643a3d2b3c3e2d20312dc8b Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 5 Sep 2018 12:02:15 +0200 Subject: [PATCH 0198/2608] ARM: dts: exynos: Disable pull control for MAX8997 interrupts on Origen commit f5e758b8358f6c27e8a351ddf0b441a64cdabb94 upstream. PMIC_IRQB and PMIC_KEYINB lines on Exynos4210-based Origen board have external pull-up resistors, so disable any pull control for those lines in respective pin controller node. This fixes support for MAX8997 interrupts and enables operation of wakeup from MAX8997 RTC alarm. Signed-off-by: Marek Szyprowski Fixes: 17419726aaa1 ("ARM: dts: add max8997 device node for exynos4210-origen board") Cc: Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos4210-origen.dts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm/boot/dts/exynos4210-origen.dts b/arch/arm/boot/dts/exynos4210-origen.dts index 084fcc5574ef..e4876186d5cd 100644 --- a/arch/arm/boot/dts/exynos4210-origen.dts +++ b/arch/arm/boot/dts/exynos4210-origen.dts @@ -152,6 +152,8 @@ reg = <0x66>; interrupt-parent = <&gpx0>; interrupts = <4 IRQ_TYPE_NONE>, <3 IRQ_TYPE_NONE>; + pinctrl-names = "default"; + pinctrl-0 = <&max8997_irq>; max8997,pmic-buck1-dvs-voltage = <1350000>; max8997,pmic-buck2-dvs-voltage = <1100000>; @@ -289,6 +291,13 @@ }; }; +&pinctrl_1 { + max8997_irq: max8997-irq { + samsung,pins = "gpx0-3", "gpx0-4"; + samsung,pin-pud = ; + }; +}; + &sdhci_0 { bus-width = <4>; pinctrl-0 = <&sd0_clk &sd0_cmd &sd0_bus4 &sd0_cd>; From 9c9cd35cb3e63f71af7c95a918c1586b21f5c3ee Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 5 Oct 2018 19:38:46 -0700 Subject: [PATCH 0199/2608] bpf: do not blindly change rlimit in reuseport net selftest [ Upstream commit 262f9d811c7608f1e74258ceecfe1fa213bdf912 ] If the current process has unlimited RLIMIT_MEMLOCK, we should should leave it as is. Fixes: 941ff6f11c02 ("bpf: fix rlimit in reuseport net selftest") Signed-off-by: John Sperbeck Signed-off-by: Eric Dumazet Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/reuseport_bpf.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index cad14cd0ea92..b5277106df1f 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -437,14 +437,19 @@ void enable_fastopen(void) } } -static struct rlimit rlim_old, rlim_new; +static struct rlimit rlim_old; static __attribute__((constructor)) void main_ctor(void) { getrlimit(RLIMIT_MEMLOCK, &rlim_old); - rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20); - rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20); - setrlimit(RLIMIT_MEMLOCK, &rlim_new); + + if (rlim_old.rlim_cur != RLIM_INFINITY) { + struct rlimit rlim_new; + + rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20); + rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20); + setrlimit(RLIMIT_MEMLOCK, &rlim_new); + } } static __attribute__((destructor)) void main_dtor(void) From 0c7cd9fe35497975c9590accd84273a4f95fb1e6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 3 Oct 2018 09:20:46 +0200 Subject: [PATCH 0200/2608] Revert "perf tools: Fix PMU term format max value calculation" [ Upstream commit 1b9caa10b31dda0866f4028e4bfb923fb6e4072f ] This reverts commit ac0e2cd555373ae6f8f3a3ad3fbbf5b6d1e7aaaa. Michael reported an issue with oversized terms values assignment and I noticed there was actually a misunderstanding of the max value check in the past. The above commit's changelog says: If bit 21 is set, there is parsing issues as below. $ perf stat -a -e uncore_qpi_0/event=0x200002,umask=0x8/ event syntax error: '..pi_0/event=0x200002,umask=0x8/' \___ value too big for format, maximum is 511 But there's no issue there, because the event value is distributed along the value defined by the format. Even if the format defines separated bit, the value is treated as a continual number, which should follow the format definition. In above case it's 9-bit value with last bit separated: $ cat uncore_qpi_0/format/event config:0-7,21 Hence the value 0x200002 is correctly reported as format violation, because it exceeds 9 bits. It should have been 0x102 instead, which sets the 9th bit - the bit 21 of the format. $ perf stat -vv -a -e uncore_qpi_0/event=0x102,umask=0x8/ Using CPUID GenuineIntel-6-2D ... ------------------------------------------------------------ perf_event_attr: type 10 size 112 config 0x200802 sample_type IDENTIFIER ... Reported-by: Michael Petlan Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: ac0e2cd55537 ("perf tools: Fix PMU term format max value calculation") Link: http://lkml.kernel.org/r/20181003072046.29276-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/pmu.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d87d458996b7..dceef4725d33 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -754,13 +754,14 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, static __u64 pmu_format_max_value(const unsigned long *format) { - __u64 w = 0; - int fbit; + int w; - for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS) - w |= (1ULL << fbit); - - return w; + w = bitmap_weight(format, PERF_PMU_FORMAT_BITS); + if (!w) + return 0; + if (w < 64) + return (1ULL << w) - 1; + return -1; } /* From 11896963b7b5977616b2010e89d532559950de73 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 10 Oct 2018 18:02:21 +0200 Subject: [PATCH 0201/2608] xfrm: policy: use hlist rcu variants on insert [ Upstream commit 9dffff200fd178f11dd50eb1fd8ccd0650c9284e ] bydst table/list lookups use rcu, so insertions must use rcu versions. Fixes: a7c44247f704e ("xfrm: policy: make xfrm_policy_lookup_bytype lockless") Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_policy.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 37c32e73aaef..70ec57b887f6 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -626,9 +626,9 @@ static void xfrm_hash_rebuild(struct work_struct *work) break; } if (newpos) - hlist_add_behind(&policy->bydst, newpos); + hlist_add_behind_rcu(&policy->bydst, newpos); else - hlist_add_head(&policy->bydst, chain); + hlist_add_head_rcu(&policy->bydst, chain); } spin_unlock_bh(&net->xfrm.xfrm_policy_lock); @@ -767,9 +767,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) break; } if (newpos) - hlist_add_behind(&policy->bydst, newpos); + hlist_add_behind_rcu(&policy->bydst, newpos); else - hlist_add_head(&policy->bydst, chain); + hlist_add_head_rcu(&policy->bydst, chain); __xfrm_policy_link(policy, dir); /* After previous checking, family can either be AF_INET or AF_INET6 */ From 9eedfdf17234f4cf800cc1814054c65ebd9712bb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 10 Oct 2018 10:03:39 +0200 Subject: [PATCH 0202/2608] perf vendor events intel: Fix wrong filter_band* values for uncore events [ Upstream commit 94aafb74cee0002e2f2eb6dc5376f54d5951ab4d ] Michael reported that he could not stat following event: $ perf stat -e unc_p_freq_ge_1200mhz_cycles -a -- ls event syntax error: '..e_1200mhz_cycles' \___ value too big for format, maximum is 255 Run 'perf list' for a list of valid events The event is unwrapped into: uncore_pcu/event=0xb,filter_band0=1200/ where filter_band0 format says it's one byte only: # cat uncore_pcu/format/filter_band0 config1:0-7 while JSON files specifies bigger number: "Filter": "filter_band0=1200", all the filter_band* formats show 1 byte width: # cat uncore_pcu/format/filter_band1 config1:8-15 # cat uncore_pcu/format/filter_band2 config1:16-23 # cat uncore_pcu/format/filter_band3 config1:24-31 The reason of the issue is that filter_band* values are supposed to be in 100Mhz units.. it's stated in the JSON help for the events, like: filter_band3=XXX, with XXX in 100Mhz units This patch divides the filter_band* values by 100, plus there's couple of changes that actually change the number completely, like: - "Filter": "edge=1,filter_band2=4000", + "Filter": "edge=1,filter_band2=30", Reported-by: Michael Petlan Signed-off-by: Jiri Olsa Acked-by: Andi Kleen Cc: Alexander Shishkin Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20181010080339.GB15790@krava Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../arch/x86/ivytown/uncore-power.json | 16 ++++++++-------- .../arch/x86/jaketown/uncore-power.json | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json index d40498f2cb1e..635c09fda1d9 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json @@ -188,7 +188,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", - "Filter": "filter_band0=1200", + "Filter": "filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -199,7 +199,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", - "Filter": "filter_band1=2000", + "Filter": "filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -210,7 +210,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", - "Filter": "filter_band2=3000", + "Filter": "filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -221,7 +221,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", - "Filter": "filter_band3=4000", + "Filter": "filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", @@ -232,7 +232,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band0=1200", + "Filter": "edge=1,filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -243,7 +243,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band1=2000", + "Filter": "edge=1,filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -254,7 +254,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band2=4000", + "Filter": "edge=1,filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -265,7 +265,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band3=4000", + "Filter": "edge=1,filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json index 16034bfd06dd..8755693d86c6 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json @@ -187,7 +187,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", - "Filter": "filter_band0=1200", + "Filter": "filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -198,7 +198,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", - "Filter": "filter_band1=2000", + "Filter": "filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -209,7 +209,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", - "Filter": "filter_band2=3000", + "Filter": "filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -220,7 +220,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", - "Filter": "filter_band3=4000", + "Filter": "filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", @@ -231,7 +231,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band0=1200", + "Filter": "edge=1,filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -242,7 +242,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band1=2000", + "Filter": "edge=1,filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -253,7 +253,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band2=4000", + "Filter": "edge=1,filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -264,7 +264,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band3=4000", + "Filter": "edge=1,filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", From 9d388b0a09f5e9861a50ab71cafbdac45a2a5d25 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 12 Oct 2018 10:31:58 -0700 Subject: [PATCH 0203/2608] sparc: Fix single-pcr perf event counter management. [ Upstream commit cfdc3170d214046b9509183fe9b9544dc644d40b ] It is important to clear the hw->state value for non-stopped events when they are added into the PMU. Otherwise when the event is scheduled out, we won't read the counter because HES_UPTODATE is still set. This breaks 'perf stat' and similar use cases, causing all the events to show zero. This worked for multi-pcr because we make explicit sparc_pmu_start() calls in calculate_multiple_pcrs(). calculate_single_pcr() doesn't do this because the idea there is to accumulate all of the counter settings into the single pcr value. So we have to add explicit hw->state handling there. Like x86, we use the PERF_HES_ARCH bit to track truly stopped events so that we don't accidently start them on a reload. Related to all of this, sparc_pmu_start() is missing a userpage update so add it. Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/perf_event.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 5c1f54758312..8a569b09d2c2 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -927,6 +927,8 @@ static void read_in_all_counters(struct cpu_hw_events *cpuc) sparc_perf_event_update(cp, &cp->hw, cpuc->current_idx[i]); cpuc->current_idx[i] = PIC_NO_INDEX; + if (cp->hw.state & PERF_HES_STOPPED) + cp->hw.state |= PERF_HES_ARCH; } } } @@ -959,10 +961,12 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc) enc = perf_event_get_enc(cpuc->events[i]); cpuc->pcr[0] &= ~mask_for_index(idx); - if (hwc->state & PERF_HES_STOPPED) + if (hwc->state & PERF_HES_ARCH) { cpuc->pcr[0] |= nop_for_index(idx); - else + } else { cpuc->pcr[0] |= event_encoding(enc, idx); + hwc->state = 0; + } } out: cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; @@ -988,6 +992,9 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) cpuc->current_idx[i] = idx; + if (cp->hw.state & PERF_HES_ARCH) + continue; + sparc_pmu_start(cp, PERF_EF_RELOAD); } out: @@ -1079,6 +1086,8 @@ static void sparc_pmu_start(struct perf_event *event, int flags) event->hw.state = 0; sparc_pmu_enable_event(cpuc, &event->hw, idx); + + perf_event_update_userpage(event); } static void sparc_pmu_stop(struct perf_event *event, int flags) @@ -1371,9 +1380,9 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) cpuc->events[n0] = event->hw.event_base; cpuc->current_idx[n0] = PIC_NO_INDEX; - event->hw.state = PERF_HES_UPTODATE; + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; if (!(ef_flags & PERF_EF_START)) - event->hw.state |= PERF_HES_STOPPED; + event->hw.state |= PERF_HES_ARCH; /* * If group events scheduling transaction was started, From c37b554f7d71fb92fa58a4c0a8476b54a24ca4e2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 12 Oct 2018 10:33:20 -0700 Subject: [PATCH 0204/2608] sparc: Throttle perf events properly. [ Upstream commit 455adb3174d2c8518cef1a61140c211f6ac224d2 ] Like x86 and arm, call perf_sample_event_took() in perf event NMI interrupt handler. Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/perf_event.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 8a569b09d2c2..eceb0215bdee 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1612,6 +1613,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, struct perf_sample_data data; struct cpu_hw_events *cpuc; struct pt_regs *regs; + u64 finish_clock; + u64 start_clock; int i; if (!atomic_read(&active_events)) @@ -1625,6 +1628,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_DONE; } + start_clock = sched_clock(); + regs = args->regs; cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1663,6 +1668,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, sparc_pmu_stop(event, 0); } + finish_clock = sched_clock(); + + perf_sample_event_took(finish_clock - start_clock); + return NOTIFY_STOP; } From f26f0ff7dd8afb9057e8cef81344a2e71bf3e4e0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 14 Oct 2018 20:19:31 -0700 Subject: [PATCH 0205/2608] sparc64: Make proc_id signed. [ Upstream commit b3e1eb8e7ac9aaa283989496651d99267c4cad6c ] So that when it is unset, ie. '-1', userspace can see it properly. Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/sparc/include/asm/cpudata_64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index 666d6b5c0440..9c3fc03abe9a 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -28,7 +28,7 @@ typedef struct { unsigned short sock_id; /* physical package */ unsigned short core_id; unsigned short max_cache_id; /* groupings of highest shared cache */ - unsigned short proc_id; /* strand (aka HW thread) id */ + signed short proc_id; /* strand (aka HW thread) id */ } cpuinfo_sparc; DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); From e5a02efefbd20efca39becab8221cd509a854efa Mon Sep 17 00:00:00 2001 From: Song Muchun Date: Sun, 14 Oct 2018 19:26:12 +0800 Subject: [PATCH 0206/2608] sched/fair: Fix the min_vruntime update logic in dequeue_entity() [ Upstream commit 9845c49cc9bbb317a0bc9e9cf78d8e09d54c9af0 ] The comment and the code around the update_min_vruntime() call in dequeue_entity() are not in agreement. >From commit: b60205c7c558 ("sched/fair: Fix min_vruntime tracking") I think that we want to update min_vruntime when a task is sleeping/migrating. So, the check is inverted there - fix it. Signed-off-by: Song Muchun Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: b60205c7c558 ("sched/fair: Fix min_vruntime tracking") Link: http://lkml.kernel.org/r/20181014112612.2614-1-smuchun@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 19bfa21f7197..2d4d79420e36 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3825,7 +3825,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * put back on, and if we advance min_vruntime, we'll be placed back * further than we started -- ie. we'll be penalized. */ - if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE) + if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE) update_min_vruntime(cfs_rq); } From faf96991f88265dd55bf3207109dc5106e570d0c Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Thu, 6 Sep 2018 18:18:12 -0400 Subject: [PATCH 0207/2608] perf tools: Fix use of alternatives to find JDIR [ Upstream commit 36b8d4628d3cc8f5a748e508cce8673bc00fc63c ] When a build is run from something like a cron job, the user's $PATH is rather minimal, of note, not including /usr/sbin in my own case. Because of that, an automated rpm package build ultimately fails to find libperf-jvmti.so, because somewhere within the build, this happens... /bin/sh: alternatives: command not found /bin/sh: alternatives: command not found Makefile.config:849: No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel ...and while the build continues, libperf-jvmti.so isn't built, and things fall down when rpm tries to find all the %files specified. Exact same system builds everything just fine when the job is launched from a login shell instead of a cron job, since alternatives is in $PATH, so openjdk is actually found. The test required to get into this section of code actually specifies the full path, as does a block just above it, so let's do that here too. Signed-off-by: Jarod Wilson Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: William Cohen Fixes: d4dfdf00d43e ("perf jvmti: Plug compilation into perf build") Link: http://lkml.kernel.org/r/20180906221812.11167-1-jarod@redhat.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 63f534a0902f..f362ee46506a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -795,7 +795,7 @@ ifndef NO_JVMTI JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}') else ifneq (,$(wildcard /usr/sbin/alternatives)) - JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') + JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') endif endif ifndef JDIR From 1309de40f2e0e47cc05c7b4e7a3637c5e71a89af Mon Sep 17 00:00:00 2001 From: David Miller Date: Thu, 11 Oct 2018 22:46:55 -0700 Subject: [PATCH 0208/2608] perf cpu_map: Align cpu map synthesized events properly. [ Upstream commit 0ed149cf5239cc6e7e65bf00f769e8f1e91076c0 ] The size of the resulting cpu map can be smaller than a multiple of sizeof(u64), resulting in SIGBUS on cpus like Sparc as the next event will not be aligned properly. Signed-off-by: David S. Miller Cc: Jiri Olsa Cc: Kan Liang Fixes: 6c872901af07 ("perf cpu_map: Add cpu_map event synthesize function") Link: http://lkml.kernel.org/r/20181011.224655.716771175766946817.davem@davemloft.net Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/event.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index fc690fecbfd6..a19e840db54a 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -951,6 +951,7 @@ void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max } *size += sizeof(struct cpu_map_data); + *size = PERF_ALIGN(*size, sizeof(u64)); return zalloc(*size); } From bd79c781f6bf8697b6579b428e311432cb2e70fe Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 16 Oct 2018 22:25:24 +0200 Subject: [PATCH 0209/2608] x86/fpu: Remove second definition of fpu in __fpu__restore_sig() [ Upstream commit 6aa676761d4c1acfa31320e55fa1f83f3fcbbc7a ] Commit: c5bedc6847c3b ("x86/fpu: Get rid of PF_USED_MATH usage, convert it to fpu->fpstate_active") introduced the 'fpu' variable at top of __restore_xstate_sig(), which now shadows the other definition: arch/x86/kernel/fpu/signal.c:318:28: warning: symbol 'fpu' shadows an earlier one arch/x86/kernel/fpu/signal.c:271:20: originally declared here Remove the shadowed definition of 'fpu', as the two definitions are the same. Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: c5bedc6847c3b ("x86/fpu: Get rid of PF_USED_MATH usage, convert it to fpu->fpstate_active") Link: http://lkml.kernel.org/r/20181016202525.29437-3-bigeasy@linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/signal.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 23f1691670b6..61a949d84dfa 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -314,7 +314,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) * thread's fpu state, reconstruct fxstate from the fsave * header. Validate and sanitize the copied state. */ - struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; int err = 0; From 60b7367caab51871b41ed03bbdffde93011e3606 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 12 Oct 2018 19:14:58 -0700 Subject: [PATCH 0210/2608] net: qla3xxx: Remove overflowing shift statement [ Upstream commit 8c3bf9b62b667456a57aefcf1689e826df146159 ] Clang currently warns: drivers/net/ethernet/qlogic/qla3xxx.c:384:24: warning: signed shift result (0xF00000000) requires 37 bits to represent, but 'int' only has 32 bits [-Wshift-overflow] ((ISP_NVRAM_MASK << 16) | qdev->eeprom_cmd_data)); ~~~~~~~~~~~~~~ ^ ~~ 1 warning generated. The warning is certainly accurate since ISP_NVRAM_MASK is defined as (0x000F << 16) which is then shifted by 16, resulting in 64424509440, well above UINT_MAX. Given that this is the only location in this driver where ISP_NVRAM_MASK is shifted again, it seems likely that ISP_NVRAM_MASK was originally defined without a shift and during the move of the shift to the definition, this statement wasn't properly removed (since ISP_NVRAM_MASK is used in the statenent right above this). Only the maintainers can confirm this since this statment has been here since the driver was first added to the kernel. Link: https://github.com/ClangBuiltLinux/linux/issues/127 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qla3xxx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 2991179c2fd0..080d00520362 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -380,8 +380,6 @@ static void fm93c56a_select(struct ql3_adapter *qdev) qdev->eeprom_cmd_data = AUBURN_EEPROM_CS_1; ql_write_nvram_reg(qdev, spir, ISP_NVRAM_MASK | qdev->eeprom_cmd_data); - ql_write_nvram_reg(qdev, spir, - ((ISP_NVRAM_MASK << 16) | qdev->eeprom_cmd_data)); } /* From 5832fa5bd0324847ef5a256f36dd1e80caa2b024 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 18 Oct 2018 22:13:02 +0900 Subject: [PATCH 0211/2608] selftests: ftrace: Add synthetic event syntax testcase [ Upstream commit ba0e41ca81b935b958006c7120466e2217357827 ] Add a testcase to check the syntax and field types for synthetic_events interface. Link: http://lkml.kernel.org/r/153986838264.18251.16627517536956299922.stgit@devbox Acked-by: Shuah Khan Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../trigger-synthetic-event-syntax.tc | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc new file mode 100644 index 000000000000..88e6c3f43006 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc @@ -0,0 +1,80 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test synthetic_events syntax parser + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +echo "Test synthetic_events syntax parser" + +echo > synthetic_events + +# synthetic event must have a field +! echo "myevent" >> synthetic_events +echo "myevent u64 var1" >> synthetic_events + +# synthetic event must be found in synthetic_events +grep "myevent[[:space:]]u64 var1" synthetic_events + +# it is not possible to add same name event +! echo "myevent u64 var2" >> synthetic_events + +# Non-append open will cleanup all events and add new one +echo "myevent u64 var2" > synthetic_events + +# multiple fields with different spaces +echo "myevent u64 var1; u64 var2;" > synthetic_events +grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events +echo "myevent u64 var1 ; u64 var2 ;" > synthetic_events +grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events +echo "myevent u64 var1 ;u64 var2" > synthetic_events +grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events + +# test field types +echo "myevent u32 var" > synthetic_events +echo "myevent u16 var" > synthetic_events +echo "myevent u8 var" > synthetic_events +echo "myevent s64 var" > synthetic_events +echo "myevent s32 var" > synthetic_events +echo "myevent s16 var" > synthetic_events +echo "myevent s8 var" > synthetic_events + +echo "myevent char var" > synthetic_events +echo "myevent int var" > synthetic_events +echo "myevent long var" > synthetic_events +echo "myevent pid_t var" > synthetic_events + +echo "myevent unsigned char var" > synthetic_events +echo "myevent unsigned int var" > synthetic_events +echo "myevent unsigned long var" > synthetic_events +grep "myevent[[:space:]]unsigned long var" synthetic_events + +# test string type +echo "myevent char var[10]" > synthetic_events +grep "myevent[[:space:]]char\[10\] var" synthetic_events + +do_reset + +exit 0 From fd83130bdbe48a0d4e6e650f37073d80b80d7adc Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 19 Oct 2018 21:15:26 +0200 Subject: [PATCH 0212/2608] i2c: rcar: cleanup DMA for all kinds of failure [ Upstream commit 31d86033a0749a0463ea654130b2de5c163154f1 ] DMA needs to be cleaned up not only on timeout, but on all errors where it has been setup before. Fixes: 73e8b0528346 ("i2c: rcar: add DMA support") Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-rcar.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 7f044df1ea07..3415733a9364 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -761,8 +761,12 @@ static int rcar_i2c_master_xfer(struct i2c_adapter *adap, time_left = wait_event_timeout(priv->wait, priv->flags & ID_DONE, num * adap->timeout); - if (!time_left) { + + /* cleanup DMA if it couldn't complete properly due to an error */ + if (priv->dma_direction != DMA_NONE) rcar_i2c_cleanup_dma(priv); + + if (!time_left) { rcar_i2c_init(priv); ret = -ETIMEDOUT; } else if (priv->flags & ID_NACK) { From 5cf2ab06e8d0d7c9ca15f51eeed67103a973725f Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 18 Oct 2018 21:45:17 -0400 Subject: [PATCH 0213/2608] locking/lockdep: Fix debug_locks off performance problem [ Upstream commit 9506a7425b094d2f1d9c877ed5a78f416669269b ] It was found that when debug_locks was turned off because of a problem found by the lockdep code, the system performance could drop quite significantly when the lock_stat code was also configured into the kernel. For instance, parallel kernel build time on a 4-socket x86-64 server nearly doubled. Further analysis into the cause of the slowdown traced back to the frequent call to debug_locks_off() from the __lock_acquired() function probably due to some inconsistent lockdep states with debug_locks off. The debug_locks_off() function did an unconditional atomic xchg to write a 0 value into debug_locks which had already been set to 0. This led to severe cacheline contention in the cacheline that held debug_locks. As debug_locks is being referenced in quite a few different places in the kernel, this greatly slow down the system performance. To prevent that trashing of debug_locks cacheline, lock_acquired() and lock_contended() now checks the state of debug_locks before proceeding. The debug_locks_off() function is also modified to check debug_locks before calling __debug_locks_off(). Signed-off-by: Waiman Long Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1539913518-15598-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/locking/lockdep.c | 4 ++-- lib/debug_locks.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index d7c155048ea9..bf694c709b96 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4215,7 +4215,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; - if (unlikely(!lock_stat)) + if (unlikely(!lock_stat || !debug_locks)) return; if (unlikely(current->lockdep_recursion)) @@ -4235,7 +4235,7 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; - if (unlikely(!lock_stat)) + if (unlikely(!lock_stat || !debug_locks)) return; if (unlikely(current->lockdep_recursion)) diff --git a/lib/debug_locks.c b/lib/debug_locks.c index 96c4c633d95e..124fdf238b3d 100644 --- a/lib/debug_locks.c +++ b/lib/debug_locks.c @@ -37,7 +37,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent); */ int debug_locks_off(void) { - if (__debug_locks_off()) { + if (debug_locks && __debug_locks_off()) { if (!debug_locks_silent) { console_verbose(); return 1; From 7084b74ffc246e781c77a963321f34b661ea49cc Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 11 Oct 2018 12:20:49 -0700 Subject: [PATCH 0214/2608] ataflop: fix error handling during setup [ Upstream commit 71327f547ee3a46ec5c39fdbbd268401b2578d0e ] Move queue allocation next to disk allocation to fix a couple of issues: - If add_disk() hasn't been called, we should clear disk->queue before calling put_disk(). - If we fail to allocate a request queue, we still need to put all of the disks, not just the ones that we allocated queues for. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/block/ataflop.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 92da886180aa..1dacc42e2dcf 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1935,6 +1935,11 @@ static int __init atari_floppy_init (void) unit[i].disk = alloc_disk(1); if (!unit[i].disk) goto Enomem; + + unit[i].disk->queue = blk_init_queue(do_fd_request, + &ataflop_lock); + if (!unit[i].disk->queue) + goto Enomem; } if (UseTrackbuffer < 0) @@ -1966,10 +1971,6 @@ static int __init atari_floppy_init (void) sprintf(unit[i].disk->disk_name, "fd%d", i); unit[i].disk->fops = &floppy_fops; unit[i].disk->private_data = &unit[i]; - unit[i].disk->queue = blk_init_queue(do_fd_request, - &ataflop_lock); - if (!unit[i].disk->queue) - goto Enomem; set_capacity(unit[i].disk, MAX_DISK_SIZE * 2); add_disk(unit[i].disk); } @@ -1984,13 +1985,17 @@ static int __init atari_floppy_init (void) return 0; Enomem: - while (i--) { - struct request_queue *q = unit[i].disk->queue; + do { + struct gendisk *disk = unit[i].disk; - put_disk(unit[i].disk); - if (q) - blk_cleanup_queue(q); - } + if (disk) { + if (disk->queue) { + blk_cleanup_queue(disk->queue); + disk->queue = NULL; + } + put_disk(unit[i].disk); + } + } while (i--); unregister_blkdev(FLOPPY_MAJOR, "fd"); return -ENOMEM; From 8ac8e0fecd5e36caf0bdb34ccb9e6dc4853c62b3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 11 Oct 2018 12:20:41 -0700 Subject: [PATCH 0215/2608] swim: fix cleanup on setup error [ Upstream commit 1448a2a5360ae06f25e2edc61ae070dff5c0beb4 ] If we fail to allocate the request queue for a disk, we still need to free that disk, not just the previous ones. Additionally, we need to cleanup the previous request queues. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/block/swim.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index e88d50f75a4a..58e308145e95 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -887,8 +887,17 @@ static int swim_floppy_init(struct swim_priv *swd) exit_put_disks: unregister_blkdev(FLOPPY_MAJOR, "fd"); - while (drive--) - put_disk(swd->unit[drive].disk); + do { + struct gendisk *disk = swd->unit[drive].disk; + + if (disk) { + if (disk->queue) { + blk_cleanup_queue(disk->queue); + disk->queue = NULL; + } + put_disk(disk); + } + } while (drive--); return err; } From fd160e8b0f078fc46bd9857b2a73b7fc4ff63544 Mon Sep 17 00:00:00 2001 From: Ryan C Goodfellow Date: Fri, 12 Oct 2018 11:09:01 -0700 Subject: [PATCH 0216/2608] nfp: devlink port split support for 1x100G CXP NIC [ Upstream commit 5948185b97fa1f83d7855e638a72982a1073ebf5 ] This commit makes it possible to use devlink to split the 100G CXP Netronome into two 40G interfaces. Currently when you ask for 2 interfaces, the math in src/nfp_devlink.c:nfp_devlink_port_split calculates that you want 5 lanes per port because for some reason eth_port.port_lanes=10 (shouldn't this be 12 for CXP?). What we really want when asking for 2 breakout interfaces is 4 lanes per port. This commit makes that happen by calculating based on 8 lanes if 10 are present. Signed-off-by: Ryan C Goodfellow Reviewed-by: Jakub Kicinski Reviewed-by: Greg Weeks Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/netronome/nfp/nfp_devlink.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index 6c9f29c2e975..90a6c4fbc113 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -96,6 +96,7 @@ nfp_devlink_port_split(struct devlink *devlink, unsigned int port_index, { struct nfp_pf *pf = devlink_priv(devlink); struct nfp_eth_table_port eth_port; + unsigned int lanes; int ret; if (count < 2) @@ -114,8 +115,12 @@ nfp_devlink_port_split(struct devlink *devlink, unsigned int port_index, goto out; } - ret = nfp_devlink_set_lanes(pf, eth_port.index, - eth_port.port_lanes / count); + /* Special case the 100G CXP -> 2x40G split */ + lanes = eth_port.port_lanes / count; + if (eth_port.lanes == 10 && count == 2) + lanes = 8 / count; + + ret = nfp_devlink_set_lanes(pf, eth_port.index, lanes); out: mutex_unlock(&pf->lock); @@ -127,6 +132,7 @@ nfp_devlink_port_unsplit(struct devlink *devlink, unsigned int port_index) { struct nfp_pf *pf = devlink_priv(devlink); struct nfp_eth_table_port eth_port; + unsigned int lanes; int ret; mutex_lock(&pf->lock); @@ -142,7 +148,12 @@ nfp_devlink_port_unsplit(struct devlink *devlink, unsigned int port_index) goto out; } - ret = nfp_devlink_set_lanes(pf, eth_port.index, eth_port.port_lanes); + /* Special case the 100G CXP -> 2x40G unsplit */ + lanes = eth_port.port_lanes; + if (eth_port.port_lanes == 8) + lanes = 10; + + ret = nfp_devlink_set_lanes(pf, eth_port.index, lanes); out: mutex_unlock(&pf->lock); From 11d9f6e44a38a7096e39f8ed293746be29cb7715 Mon Sep 17 00:00:00 2001 From: Serhey Popovych Date: Tue, 9 Oct 2018 21:21:01 +0300 Subject: [PATCH 0217/2608] tun: Consistently configure generic netdev params via rtnetlink [ Upstream commit df52eab23d703142c766ac00bdb8db19d71238d0 ] Configuring generic network device parameters on tun will fail in presence of IFLA_INFO_KIND attribute in IFLA_LINKINFO nested attribute since tun_validate() always return failure. This can be visualized with following ip-link(8) command sequences: # ip link set dev tun0 group 100 # ip link set dev tun0 group 100 type tun RTNETLINK answers: Invalid argument with contrast to dummy and veth drivers: # ip link set dev dummy0 group 100 # ip link set dev dummy0 type dummy # ip link set dev veth0 group 100 # ip link set dev veth0 group 100 type veth Fix by returning zero in tun_validate() when @data is NULL that is always in case since rtnl_link_ops->maxtype is zero in tun driver. Fixes: f019a7a594d9 ("tun: Implement ip link del tunXXX") Signed-off-by: Serhey Popovych Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e0baea2dfd3c..7f8c7e3aa356 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1814,6 +1814,8 @@ static void tun_setup(struct net_device *dev) static int tun_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { + if (!data) + return 0; return -EINVAL; } From ec5c7c48925ca01c19c47133a3e3c35de0a4aaaf Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 2 Oct 2018 10:57:52 +0200 Subject: [PATCH 0218/2608] s390/sthyi: Fix machine name validity indication [ Upstream commit b5130dc2224d1881f24224c0590c6d97f2168d6a ] When running as a level 3 guest with no host provided sthyi support sclp_ocf_cpc_name_copy() will only return zeroes. Zeroes are not a valid group name, so let's not indicate that the group name field is valid. Also the group name is not dependent on stsi, let's not return based on stsi before setting it. Fixes: 95ca2cb57985 ("KVM: s390: Add sthyi emulation") Signed-off-by: Janosch Frank Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/sthyi.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index 395926b8c1ed..ffba4617d108 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -174,17 +174,19 @@ static void fill_hdr(struct sthyi_sctns *sctns) static void fill_stsi_mac(struct sthyi_sctns *sctns, struct sysinfo_1_1_1 *sysinfo) { + sclp_ocf_cpc_name_copy(sctns->mac.infmname); + if (*(u64 *)sctns->mac.infmname != 0) + sctns->mac.infmval1 |= MAC_NAME_VLD; + if (stsi(sysinfo, 1, 1, 1)) return; - sclp_ocf_cpc_name_copy(sctns->mac.infmname); - memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); - sctns->mac.infmval1 |= MAC_ID_VLD | MAC_NAME_VLD; + sctns->mac.infmval1 |= MAC_ID_VLD; } static void fill_stsi_par(struct sthyi_sctns *sctns, From d16cd14ea370d87824744644819b21f4ea9aa481 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 21 Sep 2018 12:10:48 +0200 Subject: [PATCH 0219/2608] hwmon: (pwm-fan) Set fan speed to 0 on suspend [ Upstream commit 95dcd64bc5a27080beaa344edfe5bdcca3d2e7dc ] Technically this is not required because disabling the PWM should be enough. However, when support for atomic operations was implemented in the PWM subsystem, only actual changes to the PWM channel are applied during pwm_config(), which means that during after resume from suspend the old settings won't be applied. One possible solution is for the PWM driver to implement its own PM operations such that settings from before suspend get applied on resume. This has the disadvantage of completely ignoring any particular ordering requirements that PWM user drivers might have, so it is best to leave it up to the user drivers to apply the settings that they want at the appropriate time. Another way to solve this would be to read back the current state of the PWM at the time of resume. That way, in case the configuration was lost during suspend, applying the old settings in PWM user drivers would actually get them applied because they differ from the current settings. However, not all PWM drivers support reading the hardware state, and not all hardware may support it. The best workaround at this point seems to be to let PWM user drivers tell the PWM subsystem that the PWM is turned off by, in addition to disabling it, also setting the duty cycle to 0. This causes the resume operation to apply a configuration that is different from the current configuration, resulting in the proper state from before suspend getting restored. Signed-off-by: Thierry Reding Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/pwm-fan.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index 70cc0d134f3c..ca250e7ac511 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -290,9 +290,19 @@ static int pwm_fan_remove(struct platform_device *pdev) static int pwm_fan_suspend(struct device *dev) { struct pwm_fan_ctx *ctx = dev_get_drvdata(dev); + struct pwm_args args; + int ret; + + pwm_get_args(ctx->pwm, &args); + + if (ctx->pwm_value) { + ret = pwm_config(ctx->pwm, 0, args.period); + if (ret < 0) + return ret; - if (ctx->pwm_value) pwm_disable(ctx->pwm); + } + return 0; } From 47f29328600d0d3b761f7c1d968aaf1cdc208169 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Tue, 9 Oct 2018 13:12:00 +0200 Subject: [PATCH 0220/2608] lightnvm: pblk: fix two sleep-in-atomic-context bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7325b4bbe5952e3e939f15de812f2ee0c0d33ca9 ] The driver may sleep with holding a spinlock. The function call paths (from bottom to top) in Linux-4.16 are: [FUNC] nvm_dev_dma_alloc(GFP_KERNEL) drivers/lightnvm/pblk-core.c, 754: nvm_dev_dma_alloc in pblk_line_submit_smeta_io drivers/lightnvm/pblk-core.c, 1048: pblk_line_submit_smeta_io in pblk_line_init_bb drivers/lightnvm/pblk-core.c, 1434: pblk_line_init_bb in pblk_line_replace_data drivers/lightnvm/pblk-recovery.c, 980: pblk_line_replace_data in pblk_recov_l2p drivers/lightnvm/pblk-recovery.c, 976: spin_lock in pblk_recov_l2p [FUNC] bio_map_kern(GFP_KERNEL) drivers/lightnvm/pblk-core.c, 762: bio_map_kern in pblk_line_submit_smeta_io drivers/lightnvm/pblk-core.c, 1048: pblk_line_submit_smeta_io in pblk_line_init_bb drivers/lightnvm/pblk-core.c, 1434: pblk_line_init_bb in pblk_line_replace_data drivers/lightnvm/pblk-recovery.c, 980: pblk_line_replace_data in pblk_recov_l2p drivers/lightnvm/pblk-recovery.c, 976: spin_lock in pblk_recov_l2p To fix these bugs, the call to pblk_line_replace_data() is moved out of the spinlock protection. These bugs are found by my static analysis tool DSAC. Signed-off-by: Jia-Ju Bai Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/lightnvm/pblk-recovery.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index cb556e06673e..5d0912bf9eab 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -1001,12 +1001,14 @@ next: } } - spin_lock(&l_mg->free_lock); if (!open_lines) { + spin_lock(&l_mg->free_lock); WARN_ON_ONCE(!test_and_clear_bit(meta_line, &l_mg->meta_bitmap)); + spin_unlock(&l_mg->free_lock); pblk_line_replace_data(pblk); } else { + spin_lock(&l_mg->free_lock); /* Allocate next line for preparation */ l_mg->data_next = pblk_line_get(pblk); if (l_mg->data_next) { @@ -1014,8 +1016,8 @@ next: l_mg->data_next->type = PBLK_LINETYPE_DATA; is_next = 1; } + spin_unlock(&l_mg->free_lock); } - spin_unlock(&l_mg->free_lock); if (is_next) { pblk_line_erase(pblk, l_mg->data_next); From 8515f4ea7289440a5c46be6af9d8668449956e9a Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 8 Oct 2018 11:08:47 -0700 Subject: [PATCH 0221/2608] spi: spi-ep93xx: Use dma_data_direction for ep93xx_spi_dma_{finish,prepare} [ Upstream commit a1108c7b2efb892350ba6a0e932dfd45622f4e2b ] Clang warns when one enumerated type is implicitly converted to another. drivers/spi/spi-ep93xx.c:342:62: warning: implicit conversion from enumeration type 'enum dma_transfer_direction' to different enumeration type 'enum dma_data_direction' [-Wenum-conversion] nents = dma_map_sg(chan->device->dev, sgt->sgl, sgt->nents, dir); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~ ./include/linux/dma-mapping.h:428:58: note: expanded from macro 'dma_map_sg' #define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, 0) ~~~~~~~~~~~~~~~~ ^ drivers/spi/spi-ep93xx.c:348:57: warning: implicit conversion from enumeration type 'enum dma_transfer_direction' to different enumeration type 'enum dma_data_direction' [-Wenum-conversion] dma_unmap_sg(chan->device->dev, sgt->sgl, sgt->nents, dir); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~ ./include/linux/dma-mapping.h:429:62: note: expanded from macro 'dma_unmap_sg' #define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, 0) ~~~~~~~~~~~~~~~~~~ ^ drivers/spi/spi-ep93xx.c:377:56: warning: implicit conversion from enumeration type 'enum dma_transfer_direction' to different enumeration type 'enum dma_data_direction' [-Wenum-conversion] dma_unmap_sg(chan->device->dev, sgt->sgl, sgt->nents, dir); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~ ./include/linux/dma-mapping.h:429:62: note: expanded from macro 'dma_unmap_sg' #define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, 0) ~~~~~~~~~~~~~~~~~~ ^ 3 warnings generated. dma_{,un}map_sg expect an enum of type dma_data_direction but this driver uses dma_transfer_direction for everything. Convert the driver to use dma_data_direction for these two functions. There are two places that strictly require an enum of type dma_transfer_direction: the direction member in struct dma_slave_config and the direction parameter in dmaengine_prep_slave_sg. To avoid using an explicit cast, add a simple function, ep93xx_dma_data_to_trans_dir, to safely map between the two types because they are not 1 to 1 in meaning. Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Reviewed-by: Mika Westerberg Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-ep93xx.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/spi/spi-ep93xx.c b/drivers/spi/spi-ep93xx.c index e5cc07357746..ce28c910ee48 100644 --- a/drivers/spi/spi-ep93xx.c +++ b/drivers/spi/spi-ep93xx.c @@ -246,6 +246,19 @@ static int ep93xx_spi_read_write(struct spi_master *master) return -EINPROGRESS; } +static enum dma_transfer_direction +ep93xx_dma_data_to_trans_dir(enum dma_data_direction dir) +{ + switch (dir) { + case DMA_TO_DEVICE: + return DMA_MEM_TO_DEV; + case DMA_FROM_DEVICE: + return DMA_DEV_TO_MEM; + default: + return DMA_TRANS_NONE; + } +} + /** * ep93xx_spi_dma_prepare() - prepares a DMA transfer * @master: SPI master @@ -257,7 +270,7 @@ static int ep93xx_spi_read_write(struct spi_master *master) */ static struct dma_async_tx_descriptor * ep93xx_spi_dma_prepare(struct spi_master *master, - enum dma_transfer_direction dir) + enum dma_data_direction dir) { struct ep93xx_spi *espi = spi_master_get_devdata(master); struct spi_transfer *xfer = master->cur_msg->state; @@ -277,9 +290,9 @@ ep93xx_spi_dma_prepare(struct spi_master *master, buswidth = DMA_SLAVE_BUSWIDTH_1_BYTE; memset(&conf, 0, sizeof(conf)); - conf.direction = dir; + conf.direction = ep93xx_dma_data_to_trans_dir(dir); - if (dir == DMA_DEV_TO_MEM) { + if (dir == DMA_FROM_DEVICE) { chan = espi->dma_rx; buf = xfer->rx_buf; sgt = &espi->rx_sgt; @@ -343,7 +356,8 @@ ep93xx_spi_dma_prepare(struct spi_master *master, if (!nents) return ERR_PTR(-ENOMEM); - txd = dmaengine_prep_slave_sg(chan, sgt->sgl, nents, dir, DMA_CTRL_ACK); + txd = dmaengine_prep_slave_sg(chan, sgt->sgl, nents, conf.direction, + DMA_CTRL_ACK); if (!txd) { dma_unmap_sg(chan->device->dev, sgt->sgl, sgt->nents, dir); return ERR_PTR(-ENOMEM); @@ -360,13 +374,13 @@ ep93xx_spi_dma_prepare(struct spi_master *master, * unmapped. */ static void ep93xx_spi_dma_finish(struct spi_master *master, - enum dma_transfer_direction dir) + enum dma_data_direction dir) { struct ep93xx_spi *espi = spi_master_get_devdata(master); struct dma_chan *chan; struct sg_table *sgt; - if (dir == DMA_DEV_TO_MEM) { + if (dir == DMA_FROM_DEVICE) { chan = espi->dma_rx; sgt = &espi->rx_sgt; } else { @@ -381,8 +395,8 @@ static void ep93xx_spi_dma_callback(void *callback_param) { struct spi_master *master = callback_param; - ep93xx_spi_dma_finish(master, DMA_MEM_TO_DEV); - ep93xx_spi_dma_finish(master, DMA_DEV_TO_MEM); + ep93xx_spi_dma_finish(master, DMA_TO_DEVICE); + ep93xx_spi_dma_finish(master, DMA_FROM_DEVICE); spi_finalize_current_transfer(master); } @@ -392,15 +406,15 @@ static int ep93xx_spi_dma_transfer(struct spi_master *master) struct ep93xx_spi *espi = spi_master_get_devdata(master); struct dma_async_tx_descriptor *rxd, *txd; - rxd = ep93xx_spi_dma_prepare(master, DMA_DEV_TO_MEM); + rxd = ep93xx_spi_dma_prepare(master, DMA_FROM_DEVICE); if (IS_ERR(rxd)) { dev_err(&master->dev, "DMA RX failed: %ld\n", PTR_ERR(rxd)); return PTR_ERR(rxd); } - txd = ep93xx_spi_dma_prepare(master, DMA_MEM_TO_DEV); + txd = ep93xx_spi_dma_prepare(master, DMA_TO_DEVICE); if (IS_ERR(txd)) { - ep93xx_spi_dma_finish(master, DMA_DEV_TO_MEM); + ep93xx_spi_dma_finish(master, DMA_FROM_DEVICE); dev_err(&master->dev, "DMA TX failed: %ld\n", PTR_ERR(txd)); return PTR_ERR(txd); } From 52ff94ce5134464db22720973e7773af0969b96f Mon Sep 17 00:00:00 2001 From: Sanskriti Sharma Date: Tue, 2 Oct 2018 10:29:14 -0400 Subject: [PATCH 0222/2608] perf tools: Free temporary 'sys' string in read_event_files() [ Upstream commit 1e44224fb0528b4c0cc176bde2bb31e9127eb14b ] For each system in a given pevent, read_event_files() reads in a temporary 'sys' string. Be sure to free this string before moving onto to the next system and/or leaving read_event_files(). Fixes the following coverity complaints: Error: RESOURCE_LEAK (CWE-772): tools/perf/util/trace-event-read.c:343: overwrite_var: Overwriting "sys" in "sys = read_string()" leaks the storage that "sys" points to. tools/perf/util/trace-event-read.c:353: leaked_storage: Variable "sys" going out of scope leaks the storage it points to. Signed-off-by: Sanskriti Sharma Reviewed-by: Jiri Olsa Cc: Joe Lawrence Link: http://lkml.kernel.org/r/1538490554-8161-6-git-send-email-sansharm@redhat.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/trace-event-read.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 8a9a677f7576..6bfd690d63d9 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -350,9 +350,12 @@ static int read_event_files(struct pevent *pevent) for (x=0; x < count; x++) { size = read8(pevent); ret = read_event_file(pevent, sys, size); - if (ret) + if (ret) { + free(sys); return ret; + } } + free(sys); } return 0; } From 2e8e70e56265496274e0c19464fffe896009bfc0 Mon Sep 17 00:00:00 2001 From: Sanskriti Sharma Date: Tue, 2 Oct 2018 10:29:11 -0400 Subject: [PATCH 0223/2608] perf tools: Cleanup trace-event-info 'tdata' leak [ Upstream commit faedbf3fd19f2511a39397f76359e4cc6ee93072 ] Free tracing_data structure in tracing_data_get() error paths. Fixes the following coverity complaint: Error: RESOURCE_LEAK (CWE-772): leaked_storage: Variable "tdata" going out of scope leaks the storage Signed-off-by: Sanskriti Sharma Reviewed-by: Jiri Olsa Cc: Joe Lawrence Link: http://lkml.kernel.org/r/1538490554-8161-3-git-send-email-sansharm@redhat.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/trace-event-info.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 8f3b7ef221f2..71a5b4863707 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -533,12 +533,14 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs, "/tmp/perf-XXXXXX"); if (!mkstemp(tdata->temp_file)) { pr_debug("Can't make temp file"); + free(tdata); return NULL; } temp_fd = open(tdata->temp_file, O_RDWR); if (temp_fd < 0) { pr_debug("Can't read '%s'", tdata->temp_file); + free(tdata); return NULL; } From 1ab8d2dbc1a07743d0e48f63ec0d0179e6b3c900 Mon Sep 17 00:00:00 2001 From: Sanskriti Sharma Date: Tue, 2 Oct 2018 10:29:10 -0400 Subject: [PATCH 0224/2608] perf strbuf: Match va_{add,copy} with va_end [ Upstream commit ce49d8436cffa9b7a6a5f110879d53e89dbc6746 ] Ensure that all code paths in strbuf_addv() call va_end() on the ap_saved copy that was made. Fixes the following coverity complaint: Error: VARARGS (CWE-237): [#def683] tools/perf/util/strbuf.c:106: missing_va_end: va_end was not called for "ap_saved". Signed-off-by: Sanskriti Sharma Reviewed-by: Jiri Olsa Cc: Joe Lawrence Link: http://lkml.kernel.org/r/1538490554-8161-2-git-send-email-sansharm@redhat.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/strbuf.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 3d1cf5bf7f18..9005fbe0780e 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -98,19 +98,25 @@ static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap) va_copy(ap_saved, ap); len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); - if (len < 0) + if (len < 0) { + va_end(ap_saved); return len; + } if (len > strbuf_avail(sb)) { ret = strbuf_grow(sb, len); - if (ret) + if (ret) { + va_end(ap_saved); return ret; + } len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved); va_end(ap_saved); if (len > strbuf_avail(sb)) { pr_debug("this should not happen, your vsnprintf is broken"); + va_end(ap_saved); return -EINVAL; } } + va_end(ap_saved); return strbuf_setlen(sb, sb->len + len); } From b5d5f109c6d28c34c4f5469199073d4836567a17 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 8 Oct 2018 11:06:19 -0400 Subject: [PATCH 0225/2608] cpupower: Fix coredump on VMWare [ Upstream commit f69ffc5d3db8f1f03fd6d1df5930f9a1fbd787b6 ] cpupower crashes on VMWare guests. The guests have the AMD PStateDef MSR (0xC0010064 + state number) set to zero. As a result fid and did are zero and the crash occurs because of a divide by zero (cof = fid/did). This can be prevented by checking the enable bit in the PStateDef MSR before calculating cof. By doing this the value of pstate[i] remains zero and the value can be tested before displaying the active Pstates. Check the enable bit in the PstateDef register for all supported families and only print out enabled Pstates. Signed-off-by: Prarit Bhargava Cc: Shuah Khan Cc: Stafford Horne Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/power/cpupower/utils/cpufreq-info.c | 2 ++ tools/power/cpupower/utils/helpers/amd.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index 3e701f0e9c14..5853faa9daf3 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -202,6 +202,8 @@ static int get_boost_mode(unsigned int cpu) printf(_(" Boost States: %d\n"), b_states); printf(_(" Total States: %d\n"), pstate_no); for (i = 0; i < pstate_no; i++) { + if (!pstates[i]) + continue; if (i < b_states) printf(_(" Pstate-Pb%d: %luMHz (boost state)" "\n"), i, pstates[i]); diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index bb41cdd0df6b..58d23997424d 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -119,6 +119,11 @@ int decode_pstates(unsigned int cpu, unsigned int cpu_family, } if (read_msr(cpu, MSR_AMD_PSTATE + i, &pstate.val)) return -1; + if ((cpu_family == 0x17) && (!pstate.fam17h_bits.en)) + continue; + else if (!pstate.bits.en) + continue; + pstates[i] = get_cof(cpu_family, pstate); } *no = i; From f349beaa4c8f3d928a720e58fceefce4c02f0740 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sun, 23 Sep 2018 14:39:24 -0600 Subject: [PATCH 0226/2608] mmc: sdhci-pci-o2micro: Add quirk for O2 Micro dev 0x8620 rev 0x01 [ Upstream commit 5169894982bb67486d93cc1e10151712bb86bcb6 ] This device reports SDHCI_CLOCK_INT_STABLE even though it's not ready to take SDHCI_CLOCK_CARD_EN. The symptom is that reading SDHCI_CLOCK_CONTROL after enabling the clock shows absence of the bit from the register (e.g. expecting 0x0000fa07 = 0x0000fa03 | SDHCI_CLOCK_CARD_EN but only observed the first operand). mmc1: Timeout waiting for hardware cmd interrupt. mmc1: sdhci: ============ SDHCI REGISTER DUMP =========== mmc1: sdhci: Sys addr: 0x00000000 | Version: 0x00000603 mmc1: sdhci: Blk size: 0x00000000 | Blk cnt: 0x00000000 mmc1: sdhci: Argument: 0x00000000 | Trn mode: 0x00000000 mmc1: sdhci: Present: 0x01ff0001 | Host ctl: 0x00000001 mmc1: sdhci: Power: 0x0000000f | Blk gap: 0x00000000 mmc1: sdhci: Wake-up: 0x00000000 | Clock: 0x0000fa03 mmc1: sdhci: Timeout: 0x00000000 | Int stat: 0x00000000 mmc1: sdhci: Int enab: 0x00ff0083 | Sig enab: 0x00ff0083 mmc1: sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000 mmc1: sdhci: Caps: 0x25fcc8bf | Caps_1: 0x00002077 mmc1: sdhci: Cmd: 0x00000000 | Max curr: 0x005800c8 mmc1: sdhci: Resp[0]: 0x00000000 | Resp[1]: 0x00000000 mmc1: sdhci: Resp[2]: 0x00000000 | Resp[3]: 0x00000000 mmc1: sdhci: Host ctl2: 0x00000008 mmc1: sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x00000000 mmc1: sdhci: ============================================ The problem happens during wakeup from S3. Adding a delay quirk after power up reliably fixes the problem. Signed-off-by: Yu Zhao Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-o2micro.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index 14273ca00641..44a809a20d3a 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -334,6 +334,9 @@ int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); break; case PCI_DEVICE_ID_O2_SEABIRD0: + if (chip->pdev->revision == 0x01) + chip->quirks |= SDHCI_QUIRK_DELAY_AFTER_POWER; + /* fall through */ case PCI_DEVICE_ID_O2_SEABIRD1: /* UnLock WP */ ret = pci_read_config_byte(chip->pdev, From 709c3305511b234c57710461a67d96fa202b6d03 Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Wed, 6 Jun 2018 17:20:58 +0300 Subject: [PATCH 0227/2608] iwlwifi: pcie: avoid empty free RB queue [ Upstream commit 868a1e863f95183f00809363fefba6d4f5bcd116 ] If all free RB queues are empty, the driver will never restock the free RB queue. That's because the restocking happens in the Rx flow, and if the free queue is empty there will be no Rx. Although there's a background worker (a.k.a. allocator) allocating memory for RBs so that the Rx handler can restock them, the worker may run only after the free queue has become empty (and then it is too late for restocking as explained above). There is a solution for that called 'emergency': If the number of used RB's reaches half the amount of all RB's, the Rx handler will not wait for the allocator but immediately allocate memory for the used RB's and restock the free queue. But, since the used RB's is per queue, it may happen that the used RB's are spread between the queues such that the emergency check will fail for each of the queues (and still run out of RBs, causing the above symptom). To fix it, move to emergency mode if the sum of *all* used RBs (for all Rx queues) reaches half the amount of all RB's Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 30 +++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index ca99c3cf41c2..5a15362ef671 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1049,6 +1049,14 @@ void iwl_pcie_rx_free(struct iwl_trans *trans) kfree(trans_pcie->rxq); } +static void iwl_pcie_rx_move_to_allocator(struct iwl_rxq *rxq, + struct iwl_rb_allocator *rba) +{ + spin_lock(&rba->lock); + list_splice_tail_init(&rxq->rx_used, &rba->rbd_empty); + spin_unlock(&rba->lock); +} + /* * iwl_pcie_rx_reuse_rbd - Recycle used RBDs * @@ -1080,9 +1088,7 @@ static void iwl_pcie_rx_reuse_rbd(struct iwl_trans *trans, if ((rxq->used_count % RX_CLAIM_REQ_ALLOC) == RX_POST_REQ_ALLOC) { /* Move the 2 RBDs to the allocator ownership. Allocator has another 6 from pool for the request completion*/ - spin_lock(&rba->lock); - list_splice_tail_init(&rxq->rx_used, &rba->rbd_empty); - spin_unlock(&rba->lock); + iwl_pcie_rx_move_to_allocator(rxq, rba); atomic_inc(&rba->req_pending); queue_work(rba->alloc_wq, &rba->rx_alloc); @@ -1260,10 +1266,18 @@ restart: IWL_DEBUG_RX(trans, "Q %d: HW = SW = %d\n", rxq->id, r); while (i != r) { + struct iwl_rb_allocator *rba = &trans_pcie->rba; struct iwl_rx_mem_buffer *rxb; + /* number of RBDs still waiting for page allocation */ + u32 rb_pending_alloc = + atomic_read(&trans_pcie->rba.req_pending) * + RX_CLAIM_REQ_ALLOC; - if (unlikely(rxq->used_count == rxq->queue_size / 2)) + if (unlikely(rb_pending_alloc >= rxq->queue_size / 2 && + !emergency)) { + iwl_pcie_rx_move_to_allocator(rxq, rba); emergency = true; + } if (trans->cfg->mq_rx_supported) { /* @@ -1306,17 +1320,13 @@ restart: iwl_pcie_rx_allocator_get(trans, rxq); if (rxq->used_count % RX_CLAIM_REQ_ALLOC == 0 && !emergency) { - struct iwl_rb_allocator *rba = &trans_pcie->rba; - /* Add the remaining empty RBDs for allocator use */ - spin_lock(&rba->lock); - list_splice_tail_init(&rxq->rx_used, &rba->rbd_empty); - spin_unlock(&rba->lock); + iwl_pcie_rx_move_to_allocator(rxq, rba); } else if (emergency) { count++; if (count == 8) { count = 0; - if (rxq->used_count < rxq->queue_size / 3) + if (rb_pending_alloc < rxq->queue_size / 3) emergency = false; rxq->read = i; From 371075886a394e8c8e9cc5177e4cadb0f9e2ae17 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 13 Jun 2018 11:49:20 +0300 Subject: [PATCH 0228/2608] iwlwifi: mvm: clear HW_RESTART_REQUESTED when stopping the interface [ Upstream commit 155f7e0441cd121b1e673d465a35e99f4b9b2f0b ] Fix a bug that happens in the following scenario: 1) suspend without WoWLAN 2) mac80211 calls drv_stop because of the suspend 3) __iwl_mvm_mac_stop deallocates the aux station 4) during drv_stop the firmware crashes 5) iwlmvm: * sets IWL_MVM_STATUS_HW_RESTART_REQUESTED * asks mac80211 to kick the restart flow 6) mac80211 puts the restart worker into a freezable queue which means that the worker will not run for now since the workqueue is already frozen 7) ... 8) resume 9) mac80211 runs ieee80211_reconfig as part of the resume 10) mac80211 detects that a restart flow has been requested and that we are now resuming from suspend and cancels the restart worker 11) mac80211 calls drv_start() 12) __iwl_mvm_mac_start checks that IWL_MVM_STATUS_HW_RESTART_REQUESTED clears it, sets IWL_MVM_STATUS_IN_HW_RESTART and calls iwl_mvm_restart_cleanup() 13) iwl_fw_error_dump gets called and accesses the device to get debug data 14) iwl_mvm_up adds the aux station 15) iwl_mvm_add_aux_sta() allocates an internal station for the aux station 16) iwl_mvm_allocate_int_sta() tests IWL_MVM_STATUS_IN_HW_RESTART and doesn't really allocate a station ID for the aux station 17) a new queue is added for the aux station Note that steps from 5 to 9 aren't really part of the problem but were described for the sake of completeness. Once the iwl_mvm_mac_stop() is called, the device is not accessible, meaning that step 12) can't succeed and we'll see the following: drivers/net/wireless/intel/iwlwifi/pcie/trans.c:2122 iwl_trans_pcie_grab_nic_access+0xc0/0x1d6 [iwlwifi]() Timeout waiting for hardware access (CSR_GP_CNTRL 0x080403d8) Call Trace: [] iwl_trans_pcie_grab_nic_access+0xc0/0x1d6 [iwlwifi] [] iwl_trans_pcie_dump_regs+0x3fd/0x3fd [iwlwifi] [] iwl_fw_error_dump+0x4f5/0xe8b [iwlwifi] [] __iwl_mvm_mac_start+0x5a/0x21a [iwlmvm] [] iwl_mvm_mac_start+0xd4/0x103 [iwlmvm] [] drv_start+0xa1/0xc5 [iwl7000_mac80211] [] ieee80211_reconfig+0x145/0xf50 [mac80211] [] ieee80211_resume+0x62/0x66 [mac80211] [] wiphy_resume+0xa9/0xc6 [cfg80211] The station id of the aux station is set to 0xff in step 3 and because we don't really allocate a new station id for the auxliary station (as explained in 16), we end up sending a command to the firmware asking to connect the queue to station id 0xff. This makes the firmware crash with the following information: 0x00002093 | ADVANCED_SYSASSERT 0x000002F0 | trm_hw_status0 0x00000000 | trm_hw_status1 0x00000B38 | branchlink2 0x0001978C | interruptlink1 0x00000000 | interruptlink2 0xFF080501 | data1 0xDEADBEEF | data2 0xDEADBEEF | data3 Firmware error during reconfiguration - reprobe! FW error in SYNC CMD SCD_QUEUE_CFG Fix this by clearing IWL_MVM_STATUS_HW_RESTART_REQUESTED in iwl_mvm_mac_stop(). We won't be able to collect debug data anyway and when we will brought up again, we will have a clean state from the firmware perspective. Since we won't have IWL_MVM_STATUS_IN_HW_RESTART set in step 12) we won't get to the 2093 ASSERT either. Fixes: bf8b286f86fc ("iwlwifi: mvm: defer setting IWL_MVM_STATUS_IN_HW_RESTART") Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index db1fab9aa1c6..80a653950e86 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1225,12 +1225,15 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm) iwl_mvm_del_aux_sta(mvm); /* - * Clear IN_HW_RESTART flag when stopping the hw (as restart_complete() - * won't be called in this case). + * Clear IN_HW_RESTART and HW_RESTART_REQUESTED flag when stopping the + * hw (as restart_complete() won't be called in this case) and mac80211 + * won't execute the restart. * But make sure to cleanup interfaces that have gone down before/during * HW restart was requested. */ - if (test_and_clear_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) + if (test_and_clear_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) || + test_and_clear_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, + &mvm->status)) ieee80211_iterate_interfaces(mvm->hw, 0, iwl_mvm_cleanup_iterator, mvm); From 0d7a51786faf11362a9d9b3841f9d9ffe5b65c24 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Thu, 4 Oct 2018 18:08:08 +0200 Subject: [PATCH 0229/2608] x86/olpc: Indicate that legacy PC XO-1 platform should not register RTC [ Upstream commit d92116b800fb79a72ad26121f5011f6aa3ad94c2 ] On OLPC XO-1, the RTC is discovered via device tree from the arch initcall. Don't let the PC platform register another one from its device initcall, it's not going to work: sysfs: cannot create duplicate filename '/devices/platform/rtc_cmos' CPU: 0 PID: 1 Comm: swapper Not tainted 4.19.0-rc6 #12 Hardware name: OLPC XO/XO, BIOS OLPC Ver 1.00.01 06/11/2014 Call Trace: dump_stack+0x16/0x18 sysfs_warn_dup+0x46/0x58 sysfs_create_dir_ns+0x76/0x9b kobject_add_internal+0xed/0x209 ? __schedule+0x3fa/0x447 kobject_add+0x5b/0x66 device_add+0x298/0x535 ? insert_resource_conflict+0x2a/0x3e platform_device_add+0x14d/0x192 ? io_delay_init+0x19/0x19 platform_device_register+0x1c/0x1f add_rtc_cmos+0x16/0x31 do_one_initcall+0x78/0x14a ? do_early_param+0x75/0x75 kernel_init_freeable+0x152/0x1e0 ? rest_init+0xa2/0xa2 kernel_init+0x8/0xd5 ret_from_fork+0x2e/0x38 kobject_add_internal failed for rtc_cmos with -EEXIST, don't try to register things with the same name in the same directory. platform rtc_cmos: registered platform RTC device (no PNP device found) Signed-off-by: Lubomir Rintel Signed-off-by: Borislav Petkov Acked-by: Thomas Gleixner CC: "H. Peter Anvin" CC: Ingo Molnar CC: x86-ml Link: http://lkml.kernel.org/r/20181004160808.307738-1-lkundrak@v3.sk Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/platform/olpc/olpc-xo1-rtc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/platform/olpc/olpc-xo1-rtc.c b/arch/x86/platform/olpc/olpc-xo1-rtc.c index a2b4efddd61a..8e7ddd7e313a 100644 --- a/arch/x86/platform/olpc/olpc-xo1-rtc.c +++ b/arch/x86/platform/olpc/olpc-xo1-rtc.c @@ -16,6 +16,7 @@ #include #include +#include static void rtc_wake_on(struct device *dev) { @@ -75,6 +76,8 @@ static int __init xo1_rtc_init(void) if (r) return r; + x86_platform.legacy.rtc = 0; + device_init_wakeup(&xo1_rtc_device.dev, 1); return 0; } From 85e20b47dec298b569de62cce4b1762f647b28e7 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 24 Aug 2018 10:51:26 +0800 Subject: [PATCH 0230/2608] ACPI / processor: Fix the return value of acpi_processor_ids_walk() [ Upstream commit d0381bf4f80c571dde1244fe5b85dc35e8b3f546 ] ACPI driver should make sure all the processor IDs in their ACPI Namespace are unique. the driver performs a depth-first walk of the namespace tree and calls the acpi_processor_ids_walk() to check the duplicate IDs. But, the acpi_processor_ids_walk() mistakes the return value. If a processor is checked, it returns true which causes the walk break immediately, and other processors will never be checked. Repace the value with AE_OK which is the standard acpi_status value. And don't abort the namespace walk even on error. Fixes: 8c8cb30f49b8 (acpi/processor: Implement DEVICE operator for processor enumeration) Signed-off-by: Dou Liyang Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_processor.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 86c10599d9f8..ccf07674a2a0 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -642,7 +642,7 @@ static acpi_status __init acpi_processor_ids_walk(acpi_handle handle, status = acpi_get_type(handle, &acpi_type); if (ACPI_FAILURE(status)) - return false; + return status; switch (acpi_type) { case ACPI_TYPE_PROCESSOR: @@ -662,11 +662,12 @@ static acpi_status __init acpi_processor_ids_walk(acpi_handle handle, } processor_validated_ids_update(uid); - return true; + return AE_OK; err: + /* Exit on error, but don't abort the namespace walk */ acpi_handle_info(handle, "Invalid processor object\n"); - return false; + return AE_OK; } From bd8dfdcc2249b3bcfd33bdb547e392c4aca134f7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 3 Oct 2018 15:35:21 +0530 Subject: [PATCH 0231/2608] cpufreq: dt: Try freeing static OPPs only if we have added them [ Upstream commit 51c99dd2c06b234575661fa1e0a1dea6c3ef566f ] We can not call dev_pm_opp_of_cpumask_remove_table() freely anymore since the latest OPP core updates as that uses reference counting to free resources. There are cases where no static OPPs are added (using DT) for a platform and trying to remove the OPP table may end up decrementing refcount which is already zero and hence generating warnings. Lets track if we were able to add static OPPs or not and then only remove the table based on that. Some reshuffling of code is also done to do that. Reported-by: Niklas Cassel Tested-by: Niklas Cassel Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq-dt.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index d83ab94d041a..ca6ee9f389b6 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -32,6 +32,7 @@ struct private_data { struct device *cpu_dev; struct thermal_cooling_device *cdev; const char *reg_name; + bool have_static_opps; }; static struct freq_attr *cpufreq_dt_attr[] = { @@ -196,6 +197,15 @@ static int cpufreq_init(struct cpufreq_policy *policy) } } + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + ret = -ENOMEM; + goto out_put_regulator; + } + + priv->reg_name = name; + priv->opp_table = opp_table; + /* * Initialize OPP tables for all policy->cpus. They will be shared by * all CPUs which have marked their CPUs shared with OPP bindings. @@ -206,7 +216,8 @@ static int cpufreq_init(struct cpufreq_policy *policy) * * OPPs might be populated at runtime, don't check for error here */ - dev_pm_opp_of_cpumask_add_table(policy->cpus); + if (!dev_pm_opp_of_cpumask_add_table(policy->cpus)) + priv->have_static_opps = true; /* * But we need OPP table to function so if it is not there let's @@ -232,19 +243,10 @@ static int cpufreq_init(struct cpufreq_policy *policy) __func__, ret); } - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) { - ret = -ENOMEM; - goto out_free_opp; - } - - priv->reg_name = name; - priv->opp_table = opp_table; - ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); - goto out_free_priv; + goto out_free_opp; } priv->cpu_dev = cpu_dev; @@ -280,10 +282,11 @@ static int cpufreq_init(struct cpufreq_policy *policy) out_free_cpufreq_table: dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); -out_free_priv: - kfree(priv); out_free_opp: - dev_pm_opp_of_cpumask_remove_table(policy->cpus); + if (priv->have_static_opps) + dev_pm_opp_of_cpumask_remove_table(policy->cpus); + kfree(priv); +out_put_regulator: if (name) dev_pm_opp_put_regulators(opp_table); out_put_clk: @@ -298,7 +301,8 @@ static int cpufreq_exit(struct cpufreq_policy *policy) cpufreq_cooling_unregister(priv->cdev); dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); - dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); + if (priv->have_static_opps) + dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); if (priv->reg_name) dev_pm_opp_put_regulators(priv->opp_table); From 2d2b73cf0bfcac058789e0edd4fbe83d8b26bb14 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 18 Sep 2018 08:55:55 -0500 Subject: [PATCH 0232/2608] mtd: rawnand: atmel: Fix potential NULL pointer dereference [ Upstream commit fbed20280d912449cfb40c382cb55e3d11502587 ] There is a potential execution path in which function of_find_compatible_node() returns NULL. In such a case, we end up having a NULL pointer dereference when accessing pointer *nfc_np* in function of_clk_get(). So, we better don't take any chances and fix this by null checking pointer *nfc_np* before calling of_clk_get(). Addresses-Coverity-ID: 1473052 ("Dereference null return value") Fixes: f88fc122cc34 ("mtd: nand: Cleanup/rework the atmel_nand driver") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Boris Brezillon Acked-by: Tudor Ambarus Signed-off-by: Miquel Raynal Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/atmel/nand-controller.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c index 148744418e82..32a2f947a454 100644 --- a/drivers/mtd/nand/atmel/nand-controller.c +++ b/drivers/mtd/nand/atmel/nand-controller.c @@ -2079,6 +2079,10 @@ atmel_hsmc_nand_controller_legacy_init(struct atmel_hsmc_nand_controller *nc) nand_np = dev->of_node; nfc_np = of_find_compatible_node(dev->of_node, NULL, "atmel,sama5d3-nfc"); + if (!nfc_np) { + dev_err(dev, "Could not find device node for sama5d3-nfc\n"); + return -ENODEV; + } nc->clk = of_clk_get(nfc_np, 0); if (IS_ERR(nc->clk)) { From b66d0bb19f24702ca3b9ac8dc6f570d5be90ec7d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 5 Sep 2018 15:34:42 +0100 Subject: [PATCH 0233/2608] signal: Introduce COMPAT_SIGMINSTKSZ for use in compat_sys_sigaltstack [ Upstream commit 22839869f21ab3850fbbac9b425ccc4c0023926f ] The sigaltstack(2) system call fails with -ENOMEM if the new alternative signal stack is found to be smaller than SIGMINSTKSZ. On architectures such as arm64, where the native value for SIGMINSTKSZ is larger than the compat value, this can result in an unexpected error being reported to a compat task. See, for example: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=904385 This patch fixes the problem by extending do_sigaltstack to take the minimum signal stack size as an additional parameter, allowing the native and compat system call entry code to pass in their respective values. COMPAT_SIGMINSTKSZ is just defined as SIGMINSTKSZ if it has not been defined by the architecture. Cc: Arnd Bergmann Cc: Dominik Brodowski Cc: "Eric W. Biederman" Cc: Andrew Morton Cc: Al Viro Cc: Oleg Nesterov Reported-by: Steve McIntyre Tested-by: Steve McIntyre <93sam@debian.org> Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/compat.h | 3 +++ kernel/signal.c | 14 +++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/linux/compat.h b/include/linux/compat.h index 3e838a828459..23909d12f729 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -68,6 +68,9 @@ typedef struct compat_sigaltstack { compat_size_t ss_size; } compat_stack_t; #endif +#ifndef COMPAT_MINSIGSTKSZ +#define COMPAT_MINSIGSTKSZ MINSIGSTKSZ +#endif #define compat_jiffies_to_clock_t(x) \ (((unsigned long)(x) * COMPAT_USER_HZ) / HZ) diff --git a/kernel/signal.c b/kernel/signal.c index 4439ba9dc5d9..b74acbec9876 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3215,7 +3215,8 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) } static int -do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp) +do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp, + size_t min_ss_size) { struct task_struct *t = current; @@ -3245,7 +3246,7 @@ do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp) ss_size = 0; ss_sp = NULL; } else { - if (unlikely(ss_size < MINSIGSTKSZ)) + if (unlikely(ss_size < min_ss_size)) return -ENOMEM; } @@ -3263,7 +3264,8 @@ SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss) if (uss && copy_from_user(&new, uss, sizeof(stack_t))) return -EFAULT; err = do_sigaltstack(uss ? &new : NULL, uoss ? &old : NULL, - current_user_stack_pointer()); + current_user_stack_pointer(), + MINSIGSTKSZ); if (!err && uoss && copy_to_user(uoss, &old, sizeof(stack_t))) err = -EFAULT; return err; @@ -3274,7 +3276,8 @@ int restore_altstack(const stack_t __user *uss) stack_t new; if (copy_from_user(&new, uss, sizeof(stack_t))) return -EFAULT; - (void)do_sigaltstack(&new, NULL, current_user_stack_pointer()); + (void)do_sigaltstack(&new, NULL, current_user_stack_pointer(), + MINSIGSTKSZ); /* squash all but EFAULT for now */ return 0; } @@ -3309,7 +3312,8 @@ COMPAT_SYSCALL_DEFINE2(sigaltstack, uss.ss_size = uss32.ss_size; } ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, - compat_user_stack_pointer()); + compat_user_stack_pointer(), + COMPAT_MINSIGSTKSZ); if (ret >= 0 && uoss_ptr) { compat_stack_t old; memset(&old, 0, sizeof(old)); From 71ef2fef4a394fb40bf0087f2ddaa5d035355d30 Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Tue, 4 Sep 2018 21:50:57 +0400 Subject: [PATCH 0234/2608] Bluetooth: btbcm: Add entry for BCM4335C0 UART bluetooth [ Upstream commit a357ea098c9605f60d92a66a9073f56ce25726da ] This patch adds the device ID for the AMPAK AP6335 combo module used in the 1st generation WeTek Hub Android/LibreELEC HTPC box. The WiFI chip identifies itself as BCM4339, while Bluetooth identifies itself as BCM4335 (rev C0): ``` [ 4.864248] Bluetooth: hci0: BCM: chip id 86 [ 4.866388] Bluetooth: hci0: BCM: features 0x2f [ 4.889317] Bluetooth: hci0: BCM4335C0 [ 4.889332] Bluetooth: hci0: BCM4335C0 (003.001.009) build 0000 [ 9.778383] Bluetooth: hci0: BCM4335C0 (003.001.009) build 0268 ``` Output from hciconfig: ``` hci0: Type: Primary Bus: UART BD Address: 43:39:00:00:1F:AC ACL MTU: 1021:8 SCO MTU: 64:1 UP RUNNING RX bytes:7567 acl:234 sco:0 events:386 errors:0 TX bytes:53844 acl:77 sco:0 commands:304 errors:0 Features: 0xbf 0xfe 0xcf 0xfe 0xdb 0xff 0x7b 0x87 Packet type: DM1 DM3 DM5 DH1 DH3 DH5 HV1 HV2 HV3 Link policy: RSWITCH SNIFF Link mode: SLAVE ACCEPT Name: 'HUB' Class: 0x0c0000 Service Classes: Rendering, Capturing Device Class: Miscellaneous, HCI Version: 4.0 (0x6) Revision: 0x10c LMP Version: 4.0 (0x6) Subversion: 0x6109 Manufacturer: Broadcom Corporation (15) ``` Signed-off-by: Christian Hewitt Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btbcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index cc4bdefa6648..67315cb28826 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -325,6 +325,7 @@ static const struct { { 0x4103, "BCM4330B1" }, /* 002.001.003 */ { 0x410e, "BCM43341B0" }, /* 002.001.014 */ { 0x4406, "BCM4324B3" }, /* 002.004.006 */ + { 0x6109, "BCM4335C0" }, /* 003.001.009 */ { 0x610c, "BCM4354" }, /* 003.001.012 */ { 0x2209, "BCM43430A1" }, /* 001.002.009 */ { } From fcc506cab03fee33a12ab0558bf6c53b897f9339 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 16 Sep 2018 16:22:47 +0100 Subject: [PATCH 0235/2608] x86: boot: Fix EFI stub alignment [ Upstream commit 9c1442a9d039a1a3302fa93e9a11001c5f23b624 ] We currently align the end of the compressed image to a multiple of 16. However, the PE-COFF header included in the EFI stub says that the file alignment is 32 bytes, and when adding an EFI signature to the file it must first be padded to this alignment. sbsigntool commands warn about this: warning: file-aligned section .text extends beyond end of file warning: checksum areas are greater than image size. Invalid section table? Worse, pesign -at least when creating a detached signature- uses the hash of the unpadded file, resulting in an invalid signature if padding is required. Avoid both these problems by increasing alignment to 32 bytes when CONFIG_EFI_STUB is enabled. Signed-off-by: Ben Hutchings Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/tools/build.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index d4e6cd4577e5..bf0e82400358 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -391,6 +391,13 @@ int main(int argc, char ** argv) die("Unable to mmap '%s': %m", argv[2]); /* Number of 16-byte paragraphs, including space for a 4-byte CRC */ sys_size = (sz + 15 + 4) / 16; +#ifdef CONFIG_EFI_STUB + /* + * COFF requires minimum 32-byte alignment of sections, and + * adding a signature is problematic without that alignment. + */ + sys_size = (sys_size + 1) & ~1; +#endif /* Patch the setup code with the appropriate size parameters */ buf[0x1f1] = setup_sectors-1; From 5c27d0536adc684122587ce39467d450ecdf23b0 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 20 Sep 2018 01:58:18 +0000 Subject: [PATCH 0236/2608] pinctrl: qcom: spmi-mpp: Fix err handling of pmic_mpp_set_mux [ Upstream commit 69f8455f6cc78fa6cdf80d0105d7a748106271dc ] 'ret' should be returned while pmic_mpp_write_mode_ctl fails. Fixes: 0e948042c420 ("pinctrl: qcom: spmi-mpp: Implement support for sink mode") Signed-off-by: YueHaibing Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-spmi-mpp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c index 6556dbeae65e..418bc40df7ca 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c @@ -319,6 +319,8 @@ static int pmic_mpp_set_mux(struct pinctrl_dev *pctldev, unsigned function, pad->function = function; ret = pmic_mpp_write_mode_ctl(state, pad); + if (ret < 0) + return ret; val = pad->is_enabled << PMIC_MPP_REG_MASTER_EN_SHIFT; From 4484de1283a7808c9a7ba55dc3841a44771a5db0 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Wed, 5 Sep 2018 09:48:58 +0200 Subject: [PATCH 0237/2608] brcmfmac: fix for proper support of 160MHz bandwidth [ Upstream commit 330994e8e8ec5d0b269a5265e6032b37e29aa336 ] Decoding of firmware channel information was not complete for 160MHz support. This resulted in the following warning: WARNING: CPU: 2 PID: 2222 at .../broadcom/brcm80211/brcmutil/d11.c:196 brcmu_d11ac_decchspec+0x2e/0x100 [brcmutil] Modules linked in: brcmfmac(O) brcmutil(O) sha256_generic cfg80211 ... CPU: 2 PID: 2222 Comm: kworker/2:0 Tainted: G O 4.17.0-wt-testing-x64-00002-gf1bed50 #1 Hardware name: Dell Inc. Latitude E6410/07XJP9, BIOS A07 02/15/2011 Workqueue: events request_firmware_work_func RIP: 0010:brcmu_d11ac_decchspec+0x2e/0x100 [brcmutil] RSP: 0018:ffffc90000047bd0 EFLAGS: 00010206 RAX: 000000000000e832 RBX: ffff8801146fe910 RCX: ffff8801146fd3c0 RDX: 0000000000002800 RSI: 0000000000000070 RDI: ffffc90000047c30 RBP: ffffc90000047bd0 R08: 0000000000000000 R09: ffffffffa0798c80 R10: ffff88012bca55e0 R11: ffff880110a4ea00 R12: ffff8801146f8000 R13: ffffc90000047c30 R14: ffff8801146fe930 R15: ffff8801138e02e0 FS: 0000000000000000(0000) GS:ffff88012bc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f18ce8b8070 CR3: 000000000200a003 CR4: 00000000000206e0 Call Trace: brcmf_setup_wiphybands+0x212/0x780 [brcmfmac] brcmf_cfg80211_attach+0xae2/0x11a0 [brcmfmac] brcmf_attach+0x1fc/0x4b0 [brcmfmac] ? __kmalloc+0x13c/0x1c0 brcmf_pcie_setup+0x99b/0xe00 [brcmfmac] brcmf_fw_request_done+0x16a/0x1f0 [brcmfmac] request_firmware_work_func+0x36/0x60 process_one_work+0x146/0x350 worker_thread+0x4a/0x3b0 kthread+0x102/0x140 ? process_one_work+0x350/0x350 ? kthread_bind+0x20/0x20 ret_from_fork+0x35/0x40 Code: 66 90 0f b7 07 55 48 89 e5 89 c2 88 47 02 88 47 03 66 81 e2 00 38 66 81 fa 00 18 74 6e 66 81 fa 00 20 74 39 66 81 fa 00 10 74 14 <0f> 0b 66 25 00 c0 74 20 66 3d 00 c0 75 20 c6 47 04 01 5d c3 66 ---[ end trace 550c46682415b26d ]--- brcmfmac: brcmf_construct_chaninfo: Ignoring unexpected firmware channel 50 This patch adds the missing stuff to properly handle this. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../broadcom/brcm80211/brcmutil/d11.c | 34 ++++++++++++++++++- .../broadcom/brcm80211/include/brcmu_wifi.h | 2 ++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c b/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c index d8b79cb72b58..e7584b842dce 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c @@ -77,6 +77,8 @@ static u16 d11ac_bw(enum brcmu_chan_bw bw) return BRCMU_CHSPEC_D11AC_BW_40; case BRCMU_CHAN_BW_80: return BRCMU_CHSPEC_D11AC_BW_80; + case BRCMU_CHAN_BW_160: + return BRCMU_CHSPEC_D11AC_BW_160; default: WARN_ON(1); } @@ -190,8 +192,38 @@ static void brcmu_d11ac_decchspec(struct brcmu_chan *ch) break; } break; - case BRCMU_CHSPEC_D11AC_BW_8080: case BRCMU_CHSPEC_D11AC_BW_160: + switch (ch->sb) { + case BRCMU_CHAN_SB_LLL: + ch->control_ch_num -= CH_70MHZ_APART; + break; + case BRCMU_CHAN_SB_LLU: + ch->control_ch_num -= CH_50MHZ_APART; + break; + case BRCMU_CHAN_SB_LUL: + ch->control_ch_num -= CH_30MHZ_APART; + break; + case BRCMU_CHAN_SB_LUU: + ch->control_ch_num -= CH_10MHZ_APART; + break; + case BRCMU_CHAN_SB_ULL: + ch->control_ch_num += CH_10MHZ_APART; + break; + case BRCMU_CHAN_SB_ULU: + ch->control_ch_num += CH_30MHZ_APART; + break; + case BRCMU_CHAN_SB_UUL: + ch->control_ch_num += CH_50MHZ_APART; + break; + case BRCMU_CHAN_SB_UUU: + ch->control_ch_num += CH_70MHZ_APART; + break; + default: + WARN_ON_ONCE(1); + break; + } + break; + case BRCMU_CHSPEC_D11AC_BW_8080: default: WARN_ON_ONCE(1); break; diff --git a/drivers/net/wireless/broadcom/brcm80211/include/brcmu_wifi.h b/drivers/net/wireless/broadcom/brcm80211/include/brcmu_wifi.h index 7b9a77981df1..75b2a0438cfa 100644 --- a/drivers/net/wireless/broadcom/brcm80211/include/brcmu_wifi.h +++ b/drivers/net/wireless/broadcom/brcm80211/include/brcmu_wifi.h @@ -29,6 +29,8 @@ #define CH_UPPER_SB 0x01 #define CH_LOWER_SB 0x02 #define CH_EWA_VALID 0x04 +#define CH_70MHZ_APART 14 +#define CH_50MHZ_APART 10 #define CH_30MHZ_APART 6 #define CH_20MHZ_APART 4 #define CH_10MHZ_APART 2 From 4c2f34ab8315299f8a4e43cb70bf981ef296d7bf Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 19 Sep 2018 11:39:31 +0200 Subject: [PATCH 0238/2608] net: phy: phylink: ensure the carrier is off when starting phylink [ Upstream commit aeeb2e8fdefdd5d257a1446351c70cb3df540199 ] Phylink made an assumption about the carrier state being down when calling phylink_start(). If this assumption isn't satisfied, the internal phylink state could misbehave and a net device could end up not being functional. This patch fixes this by explicitly calling netif_carrier_off() in phylink_start(). Signed-off-by: Antoine Tenart Acked-by: Russell King Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phylink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 79f28b9186c6..70ce7da26d1f 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -747,6 +747,9 @@ void phylink_start(struct phylink *pl) phylink_an_mode_str(pl->link_an_mode), phy_modes(pl->link_config.interface)); + /* Always set the carrier off */ + netif_carrier_off(pl->netdev); + /* Apply the link configuration to the MAC when starting. This allows * a fixed-link to start with the correct parameters, and also * ensures that we set the appropriate advertisment for Serdes links. From d4d73f1fd8ec1f7c0581f177154a36b5acebb4f1 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Fri, 14 Sep 2018 16:23:07 +0200 Subject: [PATCH 0239/2608] block, bfq: correctly charge and reset entity service in all cases [ Upstream commit cbeb869a3d1110450186b738199963c5e68c2a71 ] BFQ schedules entities (which represent either per-process queues or groups of queues) as a function of their timestamps. In particular, as a function of their (virtual) finish times. The finish time of an entity is computed as a function of the budget assigned to the entity, assuming, tentatively, that the entity, once in service, will receive an amount of service equal to its budget. Then, when the entity is expired because it finishes to be served, this finish time is updated as a function of the actual service received by the entity. This allows the entity to be correctly charged with only the service received, and then to be correctly re-scheduled. Yet an entity may receive service also while not being the entity in service (in the scheduling environment of its parent entity), for several reasons. If the entity remains with no backlog while receiving this 'unofficial' service, then it is expired. Also on such an expiration, the finish time of the entity should be updated to account for only the service actually received by the entity. Unfortunately, such an update is not performed for an entity expiring without being the entity in service. In a similar vein, the service counter of the entity in service is reset when the entity is expired, to be ready to be used for next service cycle. This reset too should be performed also in case an entity is expired because it remains empty after receiving service while not being the entity in service. But in this case the reset is not performed. This commit performs the above update of the finish time and reset of the service received, also for an entity expiring while not being the entity in service. Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/bfq-wf2q.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 414ba686a847..c1727604ad14 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1172,10 +1172,17 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) st = bfq_entity_service_tree(entity); is_in_service = entity == sd->in_service_entity; - if (is_in_service) { - bfq_calc_finish(entity, entity->service); + bfq_calc_finish(entity, entity->service); + + if (is_in_service) sd->in_service_entity = NULL; - } + else + /* + * Non in-service entity: nobody will take care of + * resetting its service counter on expiration. Do it + * now. + */ + entity->service = 0; if (entity->tree == &st->active) bfq_active_extract(st, entity); From 0e0b860fffa41ecca0004da3bed908b7c6fbfcfb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 11 Sep 2018 19:20:40 +0900 Subject: [PATCH 0240/2608] kprobes: Return error if we fail to reuse kprobe instead of BUG_ON() [ Upstream commit 819319fc93461c07b9cdb3064f154bd8cfd48172 ] Make reuse_unused_kprobe() to return error code if it fails to reuse unused kprobe for optprobe instead of calling BUG_ON(). Signed-off-by: Masami Hiramatsu Cc: Anil S Keshavamurthy Cc: David S . Miller Cc: Linus Torvalds Cc: Naveen N . Rao Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/153666124040.21306.14150398706331307654.stgit@devbox Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/kprobes.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5c90765d37e7..5cbad4fb9107 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -700,9 +700,10 @@ static void unoptimize_kprobe(struct kprobe *p, bool force) } /* Cancel unoptimizing for reusing */ -static void reuse_unused_kprobe(struct kprobe *ap) +static int reuse_unused_kprobe(struct kprobe *ap) { struct optimized_kprobe *op; + int ret; BUG_ON(!kprobe_unused(ap)); /* @@ -716,8 +717,12 @@ static void reuse_unused_kprobe(struct kprobe *ap) /* Enable the probe again */ ap->flags &= ~KPROBE_FLAG_DISABLED; /* Optimize it again (remove from op->list) */ - BUG_ON(!kprobe_optready(ap)); + ret = kprobe_optready(ap); + if (ret) + return ret; + optimize_kprobe(ap); + return 0; } /* Remove optimized instructions */ @@ -942,11 +947,16 @@ static void __disarm_kprobe(struct kprobe *p, bool reopt) #define kprobe_disarmed(p) kprobe_disabled(p) #define wait_for_kprobe_optimizer() do {} while (0) -/* There should be no unused kprobes can be reused without optimization */ -static void reuse_unused_kprobe(struct kprobe *ap) +static int reuse_unused_kprobe(struct kprobe *ap) { + /* + * If the optimized kprobe is NOT supported, the aggr kprobe is + * released at the same time that the last aggregated kprobe is + * unregistered. + * Thus there should be no chance to reuse unused kprobe. + */ printk(KERN_ERR "Error: There should be no unused kprobe here.\n"); - BUG_ON(kprobe_unused(ap)); + return -EINVAL; } static void free_aggr_kprobe(struct kprobe *p) @@ -1320,9 +1330,12 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) goto out; } init_aggr_kprobe(ap, orig_p); - } else if (kprobe_unused(ap)) + } else if (kprobe_unused(ap)) { /* This probe is going to die. Rescue it */ - reuse_unused_kprobe(ap); + ret = reuse_unused_kprobe(ap); + if (ret) + goto out; + } if (kprobe_gone(ap)) { /* From 2a9dc99f9b03cb6d76b8f4d2f447a349371d10ae Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 27 Aug 2018 09:45:44 +0200 Subject: [PATCH 0241/2608] ACPI / LPSS: Add alternative ACPI HIDs for Cherry Trail DMA controllers [ Upstream commit 240714061c58e6b1abfb3322398a7634151c06cb ] Bay and Cherry Trail DSTDs represent a different set of devices depending on which OS the device think it is booting. One set of decices for Windows and another set of devices for Android which targets the Android-x86 Linux kernel fork (which e.g. used to have its own display driver instead of using the i915 driver). Which set of devices we are actually going to get is out of our control, this is controlled by the ACPI OSID variable, which gets either set through an EFI setup option, or sometimes is autodetected. So we need to support both. This commit adds support for the 80862286 and 808622C0 ACPI HIDs which we get for the first resp. second DMA controller on Cherry Trail devices when OSID is set to Android. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_lpss.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 75c3cb377b98..a56d3f352765 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -326,9 +326,11 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = { { "INT33FC", }, /* Braswell LPSS devices */ + { "80862286", LPSS_ADDR(lpss_dma_desc) }, { "80862288", LPSS_ADDR(bsw_pwm_dev_desc) }, { "8086228A", LPSS_ADDR(bsw_uart_dev_desc) }, { "8086228E", LPSS_ADDR(bsw_spi_dev_desc) }, + { "808622C0", LPSS_ADDR(lpss_dma_desc) }, { "808622C1", LPSS_ADDR(bsw_i2c_dev_desc) }, /* Broadwell LPSS devices */ From 6b196dd5bcd92ea5217278c2d1a4bc64ddbb3036 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 30 Aug 2018 17:58:52 -0700 Subject: [PATCH 0242/2608] pinctrl: qcom: spmi-mpp: Fix drive strength setting [ Upstream commit 89c68b102f13f123aaef22b292526d6b92501334 ] It looks like we parse the drive strength setting here, but never actually write it into the hardware to update it. Parse the setting and then write it at the end of the pinconf setting function so that it actually sticks in the hardware. Fixes: 0e948042c420 ("pinctrl: qcom: spmi-mpp: Implement support for sink mode") Cc: Doug Anderson Signed-off-by: Stephen Boyd Reviewed-by: Bjorn Andersson Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-spmi-mpp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c index 418bc40df7ca..7577f133d326 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c @@ -457,7 +457,7 @@ static int pmic_mpp_config_set(struct pinctrl_dev *pctldev, unsigned int pin, pad->dtest = arg; break; case PIN_CONFIG_DRIVE_STRENGTH: - arg = pad->drive_strength; + pad->drive_strength = arg; break; case PMIC_MPP_CONF_AMUX_ROUTE: if (arg >= PMIC_MPP_AMUX_ROUTE_ABUS4) @@ -504,6 +504,10 @@ static int pmic_mpp_config_set(struct pinctrl_dev *pctldev, unsigned int pin, if (ret < 0) return ret; + ret = pmic_mpp_write(state, pad, PMIC_MPP_REG_SINK_CTL, pad->drive_strength); + if (ret < 0) + return ret; + val = pad->is_enabled << PMIC_MPP_REG_MASTER_EN_SHIFT; return pmic_mpp_write(state, pad, PMIC_MPP_REG_EN_CTL, val); From 49d44c0a0b2d03fdb60fd217d6234e1987cb3040 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 30 Aug 2018 08:23:39 -0700 Subject: [PATCH 0243/2608] pinctrl: spmi-mpp: Fix pmic_mpp_config_get() to be compliant [ Upstream commit 0d5b476f8f57fcb06c45fe27681ac47254f63fd2 ] If you look at "pinconf-groups" in debugfs for ssbi-mpp you'll notice it looks like nonsense. The problem is fairly well described in commit 1cf86bc21257 ("pinctrl: qcom: spmi-gpio: Fix pmic_gpio_config_get() to be compliant") and commit 05e0c828955c ("pinctrl: msm: Fix msm_config_group_get() to be compliant"), but it was pointed out that ssbi-mpp has the same problem. Let's fix it there too. NOTE: in case it's helpful to someone reading this, the way to tell whether to do the -EINVAL or not is to look at the PCONFDUMP for a given attribute. If the last element (has_arg) is false then you need to do the -EINVAL trick. ALSO NOTE: it seems unlikely that the values returned when we try to get PIN_CONFIG_BIAS_PULL_UP will actually be printed since "has_arg" is false for that one, but I guess it's still fine to return different values so I kept doing that. It seems like another driver (ssbi-gpio) uses a custom attribute (PM8XXX_QCOM_PULL_UP_STRENGTH) for something similar so maybe a future change should do that here too. Fixes: cfb24f6ebd38 ("pinctrl: Qualcomm SPMI PMIC MPP pin controller driver") Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Reviewed-by: Bjorn Andersson Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-spmi-mpp.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c index 7577f133d326..ac251c62bc66 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c @@ -345,13 +345,12 @@ static int pmic_mpp_config_get(struct pinctrl_dev *pctldev, switch (param) { case PIN_CONFIG_BIAS_DISABLE: - arg = pad->pullup == PMIC_MPP_PULL_UP_OPEN; + if (pad->pullup != PMIC_MPP_PULL_UP_OPEN) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_BIAS_PULL_UP: switch (pad->pullup) { - case PMIC_MPP_PULL_UP_OPEN: - arg = 0; - break; case PMIC_MPP_PULL_UP_0P6KOHM: arg = 600; break; @@ -366,13 +365,17 @@ static int pmic_mpp_config_get(struct pinctrl_dev *pctldev, } break; case PIN_CONFIG_BIAS_HIGH_IMPEDANCE: - arg = !pad->is_enabled; + if (pad->is_enabled) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_POWER_SOURCE: arg = pad->power_source; break; case PIN_CONFIG_INPUT_ENABLE: - arg = pad->input_enabled; + if (!pad->input_enabled) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_OUTPUT: arg = pad->out_value; @@ -384,7 +387,9 @@ static int pmic_mpp_config_get(struct pinctrl_dev *pctldev, arg = pad->amux_input; break; case PMIC_MPP_CONF_PAIRED: - arg = pad->paired; + if (!pad->paired) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_DRIVE_STRENGTH: arg = pad->drive_strength; From b7111ec0bdf1e3a833965ab6df7258066f95c7ec Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 30 Aug 2018 08:23:38 -0700 Subject: [PATCH 0244/2608] pinctrl: ssbi-gpio: Fix pm8xxx_pin_config_get() to be compliant [ Upstream commit b432414b996d32a1bd9afe2bd595bd5729c1477f ] If you look at "pinconf-groups" in debugfs for ssbi-gpio you'll notice it looks like nonsense. The problem is fairly well described in commit 1cf86bc21257 ("pinctrl: qcom: spmi-gpio: Fix pmic_gpio_config_get() to be compliant") and commit 05e0c828955c ("pinctrl: msm: Fix msm_config_group_get() to be compliant"), but it was pointed out that ssbi-gpio has the same problem. Let's fix it there too. Fixes: b4c45fe974bc ("pinctrl: qcom: ssbi: Family A gpio & mpp drivers") Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Reviewed-by: Bjorn Andersson Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c | 28 ++++++++++++++++++------ 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c index f53e32a9d8fc..0e153bae322e 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c @@ -260,22 +260,32 @@ static int pm8xxx_pin_config_get(struct pinctrl_dev *pctldev, switch (param) { case PIN_CONFIG_BIAS_DISABLE: - arg = pin->bias == PM8XXX_GPIO_BIAS_NP; + if (pin->bias != PM8XXX_GPIO_BIAS_NP) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_BIAS_PULL_DOWN: - arg = pin->bias == PM8XXX_GPIO_BIAS_PD; + if (pin->bias != PM8XXX_GPIO_BIAS_PD) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_BIAS_PULL_UP: - arg = pin->bias <= PM8XXX_GPIO_BIAS_PU_1P5_30; + if (pin->bias > PM8XXX_GPIO_BIAS_PU_1P5_30) + return -EINVAL; + arg = 1; break; case PM8XXX_QCOM_PULL_UP_STRENGTH: arg = pin->pull_up_strength; break; case PIN_CONFIG_BIAS_HIGH_IMPEDANCE: - arg = pin->disable; + if (!pin->disable) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_INPUT_ENABLE: - arg = pin->mode == PM8XXX_GPIO_MODE_INPUT; + if (pin->mode != PM8XXX_GPIO_MODE_INPUT) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_OUTPUT: if (pin->mode & PM8XXX_GPIO_MODE_OUTPUT) @@ -290,10 +300,14 @@ static int pm8xxx_pin_config_get(struct pinctrl_dev *pctldev, arg = pin->output_strength; break; case PIN_CONFIG_DRIVE_PUSH_PULL: - arg = !pin->open_drain; + if (pin->open_drain) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_DRIVE_OPEN_DRAIN: - arg = pin->open_drain; + if (!pin->open_drain) + return -EINVAL; + arg = 1; break; default: return -EINVAL; From 189f98295600fc5bfc33cf0aa05392cf7f074650 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 2 Sep 2018 18:13:14 +0200 Subject: [PATCH 0245/2608] net: dsa: mv88e6xxx: Fix writing to a PHY page. [ Upstream commit c309b158090d788e96ee597444965cb79b040484 ] After changing to the needed page, actually write the value to the register! Fixes: 09cb7dfd3f14 ("net: dsa: mv88e6xxx: describe PHY page and SerDes") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mv88e6xxx/phy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/phy.c b/drivers/net/dsa/mv88e6xxx/phy.c index 436668bd50dc..e53ce9610fee 100644 --- a/drivers/net/dsa/mv88e6xxx/phy.c +++ b/drivers/net/dsa/mv88e6xxx/phy.c @@ -110,6 +110,9 @@ int mv88e6xxx_phy_page_write(struct mv88e6xxx_chip *chip, int phy, err = mv88e6xxx_phy_page_get(chip, phy, page); if (!err) { err = mv88e6xxx_phy_write(chip, phy, MV88E6XXX_PHY_PAGE, page); + if (!err) + err = mv88e6xxx_phy_write(chip, phy, reg, val); + mv88e6xxx_phy_page_put(chip, phy); } From 8692ff190ca02538538a2d020294fb2feccfc507 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 1 May 2018 14:54:22 +0300 Subject: [PATCH 0246/2608] iwlwifi: mvm: fix BAR seq ctrl reporting [ Upstream commit 941ab4eb66c10bc5c7234e83a7a858b2806ed151 ] There is a bug in FW where the sequence control may be incorrect, and the driver overrides it with the value of the ieee80211 header. However, in BAR there is no sequence control in the header, which result with arbitrary sequence. This access to an unknown location is bad and it makes the logs very confusing - so fix it. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 6c014c273922..62a6e293cf12 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1345,6 +1345,7 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, while (!skb_queue_empty(&skbs)) { struct sk_buff *skb = __skb_dequeue(&skbs); struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *)skb->data; bool flushed = false; skb_freed++; @@ -1389,11 +1390,11 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, info->flags |= IEEE80211_TX_STAT_AMPDU_NO_BACK; info->flags &= ~IEEE80211_TX_CTL_AMPDU; - /* W/A FW bug: seq_ctl is wrong when the status isn't success */ - if (status != TX_STATUS_SUCCESS) { - struct ieee80211_hdr *hdr = (void *)skb->data; + /* W/A FW bug: seq_ctl is wrong upon failure / BAR frame */ + if (ieee80211_is_back_req(hdr->frame_control)) + seq_ctl = 0; + else if (status != TX_STATUS_SUCCESS) seq_ctl = le16_to_cpu(hdr->seq_ctrl); - } if (unlikely(!seq_ctl)) { struct ieee80211_hdr *hdr = (void *)skb->data; From ca18ed4416bb83d91e6759ae6d9cfd84e2b11442 Mon Sep 17 00:00:00 2001 From: Sebastian Basierski Date: Thu, 9 Aug 2018 11:45:40 +0200 Subject: [PATCH 0247/2608] ixgbevf: VF2VF TCP RSS [ Upstream commit 7fb94bd58dd6650a0158e68d414e185077d8b57a ] While VF2VF with RSS communication, RSS Type were wrongly recognized and RSS hash was not calculated as it should be. Packets was distributed on various queues by accident. This commit fixes that behaviour and causes proper RSS Type recognition. Signed-off-by: Sebastian Basierski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 90be4385bf36..e238f6e85ab6 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -3427,6 +3427,10 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, skb_checksum_help(skb); goto no_csum; } + + if (first->protocol == htons(ETH_P_IP)) + type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4; + /* update TX checksum flag */ first->tx_flags |= IXGBE_TX_FLAGS_CSUM; vlan_macip_lens = skb_checksum_start_offset(skb) - From ef5d27e1ed9c97c0670d0aa1f433f68e29de5bfb Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Wed, 22 Aug 2018 09:39:52 +0200 Subject: [PATCH 0248/2608] ath10k: schedule hardware restart if WMI command times out [ Upstream commit a9911937e7d332761e8c4fcbc7ba0426bdc3956f ] When running in AP mode, ath10k sometimes suffers from TX credit starvation. The issue is hard to reproduce and shows up once in a few days, but has been repeatedly seen with QCA9882 and a large range of firmwares, including 10.2.4.70.67. Once the module is in this state, TX credits are never replenished, which results in "SWBA overrun" errors, as no beacons can be sent. Even worse, WMI commands run in a timeout while holding the conf mutex for three seconds each, making any further operations slow and the whole system unresponsive. The firmware/driver never recovers from that state automatically, and triggering TX flush or warm restarts won't work over WMI. So issue a hardware restart if a WMI command times out due to missing TX credits. This implies a connectivity outage of about 1.4s in AP mode, but brings back the interface and the whole system to a usable state. WMI command timeouts have not been seen in absent of this specific issue, so taking such drastic actions seems legitimate. Signed-off-by: Martin Willi Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/wmi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 2ab5311659ea..8cb47858eb00 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -1852,6 +1852,12 @@ int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id) if (ret) dev_kfree_skb_any(skb); + if (ret == -EAGAIN) { + ath10k_warn(ar, "wmi command %d timeout, restarting hardware\n", + cmd_id); + queue_work(ar->workqueue, &ar->restart_work); + } + return ret; } From 0a76f5c5b1014e7d59f87cb054f332ff8b1deea9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 12 Oct 2018 09:20:17 +0200 Subject: [PATCH 0249/2608] thermal: da9062/61: Prevent hardware access during system suspend [ Upstream commit 760eea43f8c6d48684f1f34b8a02fddc1456e849 ] The workqueue used for monitoring the hardware may run while the device is already suspended. Fix this by using the freezable system workqueue instead, cfr. commit 51e20d0e3a60cf46 ("thermal: Prevent polling from happening during system suspend"). Fixes: 608567aac3206ae8 ("thermal: da9062/61: Thermal junction temperature monitoring driver") Signed-off-by: Geert Uytterhoeven Acked-by: Steve Twiss Signed-off-by: Eduardo Valentin Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/da9062-thermal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/da9062-thermal.c b/drivers/thermal/da9062-thermal.c index dd8dd947b7f0..01b0cb994457 100644 --- a/drivers/thermal/da9062-thermal.c +++ b/drivers/thermal/da9062-thermal.c @@ -106,7 +106,7 @@ static void da9062_thermal_poll_on(struct work_struct *work) THERMAL_EVENT_UNSPECIFIED); delay = msecs_to_jiffies(thermal->zone->passive_delay); - schedule_delayed_work(&thermal->work, delay); + queue_delayed_work(system_freezable_wq, &thermal->work, delay); return; } @@ -125,7 +125,7 @@ static irqreturn_t da9062_thermal_irq_handler(int irq, void *data) struct da9062_thermal *thermal = data; disable_irq_nosync(thermal->irq); - schedule_delayed_work(&thermal->work, 0); + queue_delayed_work(system_freezable_wq, &thermal->work, 0); return IRQ_HANDLED; } From 09d43edcdaef8961f01ae499b836e3cb39e47f83 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 18 Oct 2018 10:56:17 +0200 Subject: [PATCH 0250/2608] cgroup, netclassid: add a preemption point to write_classid [ Upstream commit a90e90b7d55e789c71d85b946ffb5c1ab2f137ca ] We have seen a customer complaining about soft lockups on !PREEMPT kernel config with 4.4 based kernel [1072141.435366] NMI watchdog: BUG: soft lockup - CPU#21 stuck for 22s! [systemd:1] [1072141.444090] Modules linked in: mpt3sas raid_class binfmt_misc af_packet 8021q garp mrp stp llc xfs libcrc32c bonding iscsi_ibft iscsi_boot_sysfs msr ext4 crc16 jbd2 mbcache cdc_ether usbnet mii joydev hid_generic usbhid intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ipmi_ssif mgag200 i2c_algo_bit ttm ipmi_devintf drbg ixgbe drm_kms_helper vxlan ansi_cprng ip6_udp_tunnel drm aesni_intel udp_tunnel aes_x86_64 iTCO_wdt syscopyarea ptp xhci_pci lrw iTCO_vendor_support pps_core gf128mul ehci_pci glue_helper sysfillrect mdio pcspkr sb_edac ablk_helper cryptd ehci_hcd sysimgblt xhci_hcd fb_sys_fops edac_core mei_me lpc_ich ses usbcore enclosure dca mfd_core ipmi_si mei i2c_i801 scsi_transport_sas usb_common ipmi_msghandler shpchp fjes wmi processor button acpi_pad btrfs xor raid6_pq sd_mod crc32c_intel megaraid_sas sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod md_mod autofs4 [1072141.444146] Supported: Yes [1072141.444149] CPU: 21 PID: 1 Comm: systemd Not tainted 4.4.121-92.80-default #1 [1072141.444150] Hardware name: LENOVO Lenovo System x3650 M5 -[5462P4U]- -[5462P4U]-/01GR451, BIOS -[TCE136H-2.70]- 06/13/2018 [1072141.444151] task: ffff880191bd0040 ti: ffff880191bd4000 task.ti: ffff880191bd4000 [1072141.444153] RIP: 0010:[] [] update_classid_sock+0x29/0x40 [1072141.444157] RSP: 0018:ffff880191bd7d58 EFLAGS: 00000286 [1072141.444158] RAX: ffff883b177cb7c0 RBX: 0000000000000000 RCX: 0000000000000000 [1072141.444159] RDX: 00000000000009c7 RSI: ffff880191bd7d5c RDI: ffff8822e29bb200 [1072141.444160] RBP: ffff883a72230980 R08: 0000000000000101 R09: 0000000000000000 [1072141.444161] R10: 0000000000000008 R11: f000000000000000 R12: ffffffff815229d0 [1072141.444162] R13: 0000000000000000 R14: ffff881fd0a47ac0 R15: ffff880191bd7f28 [1072141.444163] FS: 00007f3e2f1eb8c0(0000) GS:ffff882000340000(0000) knlGS:0000000000000000 [1072141.444164] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1072141.444165] CR2: 00007f3e2f200000 CR3: 0000001ffea4e000 CR4: 00000000001606f0 [1072141.444166] Stack: [1072141.444166] ffffffa800000246 00000000000009c7 ffffffff8121d583 ffff8818312a05c0 [1072141.444168] ffff8818312a1100 ffff880197c3b280 ffff881861422858 ffffffffffffffea [1072141.444170] ffffffff81522b1c ffffffff81d0ca20 ffff8817fa17b950 ffff883fdd8121e0 [1072141.444171] Call Trace: [1072141.444179] [] iterate_fd+0x53/0x80 [1072141.444182] [] write_classid+0x4c/0x80 [1072141.444187] [] cgroup_file_write+0x9b/0x100 [1072141.444193] [] kernfs_fop_write+0x11b/0x150 [1072141.444198] [] __vfs_write+0x26/0x100 [1072141.444201] [] vfs_write+0x9d/0x190 [1072141.444203] [] SyS_write+0x42/0xa0 [1072141.444207] [] entry_SYSCALL_64_fastpath+0x1e/0xca [1072141.445490] DWARF2 unwinder stuck at entry_SYSCALL_64_fastpath+0x1e/0xca If a cgroup has many tasks with many open file descriptors then we would end up in a large loop without any rescheduling point throught the operation. Add cond_resched once per task. Signed-off-by: Michal Hocko Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/core/netclassid_cgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 5e4f04004a49..7bf833598615 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -106,6 +106,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, iterate_fd(p->files, 0, update_classid_sock, (void *)(unsigned long)cs->classid); task_unlock(p); + cond_resched(); } css_task_iter_end(&it); From 94dc21c6a549aa67c8403142496c1531b91ded33 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 16 Oct 2018 16:31:25 +1100 Subject: [PATCH 0251/2608] scsi: esp_scsi: Track residual for PIO transfers [ Upstream commit fd47d919d0c336e7c22862b51ee94927ffea227a ] If a target disconnects during a PIO data transfer the command may fail when the target reconnects: scsi host1: DMA length is zero! scsi host1: cur adr[04380000] len[00000000] The scsi bus is then reset. This happens because the residual reached zero before the transfer was completed. The usual residual calculation relies on the Transfer Count registers. That works for DMA transfers but not for PIO transfers. Fix the problem by storing the PIO transfer residual and using that to correctly calculate bytes_sent. Fixes: 6fe07aaffbf0 ("[SCSI] m68k: new mac_esp scsi driver") Tested-by: Stan Johnson Signed-off-by: Finn Thain Tested-by: Michael Schmitz Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/esp_scsi.c | 1 + drivers/scsi/esp_scsi.h | 2 ++ drivers/scsi/mac_esp.c | 2 ++ 3 files changed, 5 insertions(+) diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c index c3fc34b9964d..9e5d3f7d29ae 100644 --- a/drivers/scsi/esp_scsi.c +++ b/drivers/scsi/esp_scsi.c @@ -1338,6 +1338,7 @@ static int esp_data_bytes_sent(struct esp *esp, struct esp_cmd_entry *ent, bytes_sent = esp->data_dma_len; bytes_sent -= ecount; + bytes_sent -= esp->send_cmd_residual; /* * The am53c974 has a DMA 'pecularity'. The doc states: diff --git a/drivers/scsi/esp_scsi.h b/drivers/scsi/esp_scsi.h index 8163dca2071b..a77772777a30 100644 --- a/drivers/scsi/esp_scsi.h +++ b/drivers/scsi/esp_scsi.h @@ -540,6 +540,8 @@ struct esp { void *dma; int dmarev; + + u32 send_cmd_residual; }; /* A front-end driver for the ESP chip should do the following in diff --git a/drivers/scsi/mac_esp.c b/drivers/scsi/mac_esp.c index eb551f3cc471..71879f2207e0 100644 --- a/drivers/scsi/mac_esp.c +++ b/drivers/scsi/mac_esp.c @@ -427,6 +427,8 @@ static void mac_esp_send_pio_cmd(struct esp *esp, u32 addr, u32 esp_count, scsi_esp_cmd(esp, ESP_CMD_TI); } } + + esp->send_cmd_residual = esp_count; } static int mac_esp_irq_pending(struct esp *esp) From be5658a1dce1664c9b1b414a2ce1a14eceea590a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Sep 2018 10:19:24 +0100 Subject: [PATCH 0252/2608] UAPI: ndctl: Fix g++-unsupported initialisation in headers [ Upstream commit 9607871f37dc3e717639694b8d0dc738f2a68efc ] The following code in the linux/ndctl header file: static inline const char *nvdimm_bus_cmd_name(unsigned cmd) { static const char * const names[] = { [ND_CMD_ARS_CAP] = "ars_cap", [ND_CMD_ARS_START] = "ars_start", [ND_CMD_ARS_STATUS] = "ars_status", [ND_CMD_CLEAR_ERROR] = "clear_error", [ND_CMD_CALL] = "cmd_call", }; if (cmd < ARRAY_SIZE(names) && names[cmd]) return names[cmd]; return "unknown"; } is broken in a number of ways: (1) ARRAY_SIZE() is not generally defined. (2) g++ does not support "non-trivial" array initialisers fully yet. (3) Every file that calls this function will acquire a copy of names[]. The same goes for nvdimm_cmd_name(). Fix all three by converting to a switch statement where each case returns a string. That way if cmd is a constant, the compiler can trivially reduce it and, if not, the compiler can use a shared lookup table if it thinks that is more efficient. A better way would be to remove these functions and their arrays from the header entirely. Signed-off-by: David Howells Signed-off-by: Dan Williams Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/ndctl.h | 48 +++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 3f03567631cb..145f242c7c90 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -176,37 +176,31 @@ enum { static inline const char *nvdimm_bus_cmd_name(unsigned cmd) { - static const char * const names[] = { - [ND_CMD_ARS_CAP] = "ars_cap", - [ND_CMD_ARS_START] = "ars_start", - [ND_CMD_ARS_STATUS] = "ars_status", - [ND_CMD_CLEAR_ERROR] = "clear_error", - [ND_CMD_CALL] = "cmd_call", - }; - - if (cmd < ARRAY_SIZE(names) && names[cmd]) - return names[cmd]; - return "unknown"; + switch (cmd) { + case ND_CMD_ARS_CAP: return "ars_cap"; + case ND_CMD_ARS_START: return "ars_start"; + case ND_CMD_ARS_STATUS: return "ars_status"; + case ND_CMD_CLEAR_ERROR: return "clear_error"; + case ND_CMD_CALL: return "cmd_call"; + default: return "unknown"; + } } static inline const char *nvdimm_cmd_name(unsigned cmd) { - static const char * const names[] = { - [ND_CMD_SMART] = "smart", - [ND_CMD_SMART_THRESHOLD] = "smart_thresh", - [ND_CMD_DIMM_FLAGS] = "flags", - [ND_CMD_GET_CONFIG_SIZE] = "get_size", - [ND_CMD_GET_CONFIG_DATA] = "get_data", - [ND_CMD_SET_CONFIG_DATA] = "set_data", - [ND_CMD_VENDOR_EFFECT_LOG_SIZE] = "effect_size", - [ND_CMD_VENDOR_EFFECT_LOG] = "effect_log", - [ND_CMD_VENDOR] = "vendor", - [ND_CMD_CALL] = "cmd_call", - }; - - if (cmd < ARRAY_SIZE(names) && names[cmd]) - return names[cmd]; - return "unknown"; + switch (cmd) { + case ND_CMD_SMART: return "smart"; + case ND_CMD_SMART_THRESHOLD: return "smart_thresh"; + case ND_CMD_DIMM_FLAGS: return "flags"; + case ND_CMD_GET_CONFIG_SIZE: return "get_size"; + case ND_CMD_GET_CONFIG_DATA: return "get_data"; + case ND_CMD_SET_CONFIG_DATA: return "set_data"; + case ND_CMD_VENDOR_EFFECT_LOG_SIZE: return "effect_size"; + case ND_CMD_VENDOR_EFFECT_LOG: return "effect_log"; + case ND_CMD_VENDOR: return "vendor"; + case ND_CMD_CALL: return "cmd_call"; + default: return "unknown"; + } } #define ND_IOCTL 'N' From 7d6c6f839e989f0488e0c4a5e2b789bc3cbd0078 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Fri, 21 Sep 2018 10:36:17 -0700 Subject: [PATCH 0253/2608] KVM: nVMX: Clear reserved bits of #DB exit qualification [ Upstream commit cfb634fe3052aefc4e1360fa322018c9a0b49755 ] According to volume 3 of the SDM, bits 63:15 and 12:4 of the exit qualification field for debug exceptions are reserved (cleared to 0). However, the SDM is incorrect about bit 16 (corresponding to DR6.RTM). This bit should be set if a debug exception (#DB) or a breakpoint exception (#BP) occurred inside an RTM region while advanced debugging of RTM transactional regions was enabled. Note that this is the opposite of DR6.RTM, which "indicates (when clear) that a debug exception (#DB) or breakpoint exception (#BP) occurred inside an RTM region while advanced debugging of RTM transactional regions was enabled." There is still an issue with stale DR6 bits potentially being misreported for the current debug exception. DR6 should not have been modified before vectoring the #DB exception, and the "new DR6 bits" should be available somewhere, but it was and they aren't. Fixes: b96fb439774e1 ("KVM: nVMX: fixes to nested virt interrupt injection") Signed-off-by: Jim Mattson Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4015b88383ce..367cdd263a5c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -174,6 +174,7 @@ enum { #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) +#define DR6_BT (1 << 15) #define DR6_RTM (1 << 16) #define DR6_FIXED_1 0xfffe0ff0 #define DR6_INIT 0xffff0ff0 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fd46d890296c..ec588cf4fe95 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2733,10 +2733,13 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit } } else { if (vmcs12->exception_bitmap & (1u << nr)) { - if (nr == DB_VECTOR) + if (nr == DB_VECTOR) { *exit_qual = vcpu->arch.dr6; - else + *exit_qual &= ~(DR6_FIXED_1 | DR6_BT); + *exit_qual ^= DR6_RTM; + } else { *exit_qual = 0; + } return 1; } } From 4877db2eab42c7f451f15a1d47a7832b76cd6b98 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sat, 6 Oct 2018 13:34:21 -0500 Subject: [PATCH 0254/2608] scsi: megaraid_sas: fix a missing-check bug [ Upstream commit 47db7873136a9c57c45390a53b57019cf73c8259 ] In megasas_mgmt_compat_ioctl_fw(), to handle the structure compat_megasas_iocpacket 'cioc', a user-space structure megasas_iocpacket 'ioc' is allocated before megasas_mgmt_ioctl_fw() is invoked to handle the packet. Since the two data structures have different fields, the data is copied from 'cioc' to 'ioc' field by field. In the copy process, 'sense_ptr' is prepared if the field 'sense_len' is not null, because it will be used in megasas_mgmt_ioctl_fw(). To prepare 'sense_ptr', the user-space data 'ioc->sense_off' and 'cioc->sense_off' are copied and saved to kernel-space variables 'local_sense_off' and 'user_sense_off' respectively. Given that 'ioc->sense_off' is also copied from 'cioc->sense_off', 'local_sense_off' and 'user_sense_off' should have the same value. However, 'cioc' is in the user space and a malicious user can race to change the value of 'cioc->sense_off' after it is copied to 'ioc->sense_off' but before it is copied to 'user_sense_off'. By doing so, the attacker can inject different values into 'local_sense_off' and 'user_sense_off'. This can cause undefined behavior in the following execution, because the two variables are supposed to be same. This patch enforces a check on the two kernel variables 'local_sense_off' and 'user_sense_off' to make sure they are the same after the copy. In case they are not, an error code EINVAL will be returned. Signed-off-by: Wenwen Wang Acked-by: Sumit Saxena Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index d55c365be238..d0abee3e6ed9 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -7361,6 +7361,9 @@ static int megasas_mgmt_compat_ioctl_fw(struct file *file, unsigned long arg) get_user(user_sense_off, &cioc->sense_off)) return -EFAULT; + if (local_sense_off != user_sense_off) + return -EINVAL; + if (local_sense_len) { void __user **sense_ioc_ptr = (void __user **)((u8 *)((unsigned long)&ioc->frame.raw) + local_sense_off); From 670ebdf0c41d366ca024f4d1016248e166f8b046 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 7 Oct 2018 12:12:40 +0300 Subject: [PATCH 0255/2608] RDMA/core: Do not expose unsupported counters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0f6ef65d1c6ec8deb5d0f11f86631ec4cfe8f22e ] If the provider driver (such as rdma_rxe) doesn't support pma counters, avoid exposing its directory similar to optional hw_counters directory. If core fails to read the PMA counter, return an error so that user can retry later if needed. Fixes: 35c4cbb17811 ("IB/core: Create get_perf_mad function in sysfs.c") Reported-by: Holger Hoffstätte Tested-by: Holger Hoffstätte Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/sysfs.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 0a1e96c25ca3..f75f99476ad0 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -489,7 +489,7 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data, 40 + offset / 8, sizeof(data)); if (ret < 0) - return sprintf(buf, "N/A (no PMA)\n"); + return ret; switch (width) { case 4: @@ -1012,10 +1012,12 @@ static int add_port(struct ib_device *device, int port_num, goto err_put; } - p->pma_table = get_counter_table(device, port_num); - ret = sysfs_create_group(&p->kobj, p->pma_table); - if (ret) - goto err_put_gid_attrs; + if (device->process_mad) { + p->pma_table = get_counter_table(device, port_num); + ret = sysfs_create_group(&p->kobj, p->pma_table); + if (ret) + goto err_put_gid_attrs; + } p->gid_group.name = "gids"; p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len); @@ -1128,7 +1130,8 @@ err_free_gid: p->gid_group.attrs = NULL; err_remove_pma: - sysfs_remove_group(&p->kobj, p->pma_table); + if (p->pma_table) + sysfs_remove_group(&p->kobj, p->pma_table); err_put_gid_attrs: kobject_put(&p->gid_attr_group->kobj); @@ -1240,7 +1243,9 @@ static void free_port_list_attributes(struct ib_device *device) kfree(port->hw_stats); free_hsag(&port->kobj, port->hw_stats_ag); } - sysfs_remove_group(p, port->pma_table); + + if (port->pma_table) + sysfs_remove_group(p, port->pma_table); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); sysfs_remove_group(&port->gid_attr_group->kobj, From b020347269610f3ac05d27c3851e9aa8d1257141 Mon Sep 17 00:00:00 2001 From: Denis Drozdov Date: Thu, 11 Oct 2018 22:33:57 +0300 Subject: [PATCH 0256/2608] IB/ipoib: Clear IPCB before icmp_send [ Upstream commit 4d6e4d12da2c308f8f976d3955c45ee62539ac98 ] IPCB should be cleared before icmp_send, since it may contain data from previous layers and the data could be misinterpreted as ip header options, which later caused the ihl to be set to an invalid value and resulted in the following stack corruption: [ 1083.031512] ib0: packet len 57824 (> 2048) too long to send, dropping [ 1083.031843] ib0: packet len 37904 (> 2048) too long to send, dropping [ 1083.032004] ib0: packet len 4040 (> 2048) too long to send, dropping [ 1083.032253] ib0: packet len 63800 (> 2048) too long to send, dropping [ 1083.032481] ib0: packet len 23960 (> 2048) too long to send, dropping [ 1083.033149] ib0: packet len 63800 (> 2048) too long to send, dropping [ 1083.033439] ib0: packet len 63800 (> 2048) too long to send, dropping [ 1083.033700] ib0: packet len 63800 (> 2048) too long to send, dropping [ 1083.034124] ib0: packet len 63800 (> 2048) too long to send, dropping [ 1083.034387] ================================================================== [ 1083.034602] BUG: KASAN: stack-out-of-bounds in __ip_options_echo+0xf08/0x1310 [ 1083.034798] Write of size 4 at addr ffff880353457c5f by task kworker/u16:0/7 [ 1083.034990] [ 1083.035104] CPU: 7 PID: 7 Comm: kworker/u16:0 Tainted: G O 4.19.0-rc5+ #1 [ 1083.035316] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu2 04/01/2014 [ 1083.035573] Workqueue: ipoib_wq ipoib_cm_skb_reap [ib_ipoib] [ 1083.035750] Call Trace: [ 1083.035888] dump_stack+0x9a/0xeb [ 1083.036031] print_address_description+0xe3/0x2e0 [ 1083.036213] kasan_report+0x18a/0x2e0 [ 1083.036356] ? __ip_options_echo+0xf08/0x1310 [ 1083.036522] __ip_options_echo+0xf08/0x1310 [ 1083.036688] icmp_send+0x7b9/0x1cd0 [ 1083.036843] ? icmp_route_lookup.constprop.9+0x1070/0x1070 [ 1083.037018] ? netif_schedule_queue+0x5/0x200 [ 1083.037180] ? debug_show_all_locks+0x310/0x310 [ 1083.037341] ? rcu_dynticks_curr_cpu_in_eqs+0x85/0x120 [ 1083.037519] ? debug_locks_off+0x11/0x80 [ 1083.037673] ? debug_check_no_obj_freed+0x207/0x4c6 [ 1083.037841] ? check_flags.part.27+0x450/0x450 [ 1083.037995] ? debug_check_no_obj_freed+0xc3/0x4c6 [ 1083.038169] ? debug_locks_off+0x11/0x80 [ 1083.038318] ? skb_dequeue+0x10e/0x1a0 [ 1083.038476] ? ipoib_cm_skb_reap+0x2b5/0x650 [ib_ipoib] [ 1083.038642] ? netif_schedule_queue+0xa8/0x200 [ 1083.038820] ? ipoib_cm_skb_reap+0x544/0x650 [ib_ipoib] [ 1083.038996] ipoib_cm_skb_reap+0x544/0x650 [ib_ipoib] [ 1083.039174] process_one_work+0x912/0x1830 [ 1083.039336] ? wq_pool_ids_show+0x310/0x310 [ 1083.039491] ? lock_acquire+0x145/0x3a0 [ 1083.042312] worker_thread+0x87/0xbb0 [ 1083.045099] ? process_one_work+0x1830/0x1830 [ 1083.047865] kthread+0x322/0x3e0 [ 1083.050624] ? kthread_create_worker_on_cpu+0xc0/0xc0 [ 1083.053354] ret_from_fork+0x3a/0x50 For instance __ip_options_echo is failing to proceed with invalid srr and optlen passed from another layer via IPCB [ 762.139568] IPv4: __ip_options_echo rr=0 ts=0 srr=43 cipso=0 [ 762.139720] IPv4: ip_options_build: IPCB 00000000f3cd969e opt 000000002ccb3533 [ 762.139838] IPv4: __ip_options_echo in srr: optlen 197 soffset 84 [ 762.139852] IPv4: ip_options_build srr=0 is_frag=0 rr_needaddr=0 ts_needaddr=0 ts_needtime=0 rr=0 ts=0 [ 762.140269] ================================================================== [ 762.140713] IPv4: __ip_options_echo rr=0 ts=0 srr=0 cipso=0 [ 762.141078] BUG: KASAN: stack-out-of-bounds in __ip_options_echo+0x12ec/0x1680 [ 762.141087] Write of size 4 at addr ffff880353457c7f by task kworker/u16:0/7 Signed-off-by: Denis Drozdov Reviewed-by: Erez Shitrit Reviewed-by: Feras Daoud Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 9939f32d0154..0e85b3445c07 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1427,11 +1427,15 @@ static void ipoib_cm_skb_reap(struct work_struct *work) spin_unlock_irqrestore(&priv->lock, flags); netif_tx_unlock_bh(dev); - if (skb->protocol == htons(ETH_P_IP)) + if (skb->protocol == htons(ETH_P_IP)) { + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + } #if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) + else if (skb->protocol == htons(ETH_P_IPV6)) { + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + } #endif dev_kfree_skb_any(skb); From 88018bc071bcfd70b151ff236b6029cc3a9ff7c6 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 8 Oct 2018 03:27:53 -0700 Subject: [PATCH 0257/2608] RDMA/bnxt_re: Fix recursive lock warning in debug kernel [ Upstream commit d455f29f6d76a5f94881ca1289aaa1e90617ff5d ] Fix possible recursive lock warning. Its a false warning as the locks are part of two differnt HW Queue data structure - cmdq and creq. Debug kernel is throwing the following warning and stack trace. [ 783.914967] ============================================ [ 783.914970] WARNING: possible recursive locking detected [ 783.914973] 4.19.0-rc2+ #33 Not tainted [ 783.914976] -------------------------------------------- [ 783.914979] swapper/2/0 is trying to acquire lock: [ 783.914982] 000000002aa3949d (&(&hwq->lock)->rlock){..-.}, at: bnxt_qplib_service_creq+0x232/0x350 [bnxt_re] [ 783.914999] but task is already holding lock: [ 783.915002] 00000000be73920d (&(&hwq->lock)->rlock){..-.}, at: bnxt_qplib_service_creq+0x2a/0x350 [bnxt_re] [ 783.915013] other info that might help us debug this: [ 783.915016] Possible unsafe locking scenario: [ 783.915019] CPU0 [ 783.915021] ---- [ 783.915034] lock(&(&hwq->lock)->rlock); [ 783.915035] lock(&(&hwq->lock)->rlock); [ 783.915037] *** DEADLOCK *** [ 783.915038] May be due to missing lock nesting notation [ 783.915039] 1 lock held by swapper/2/0: [ 783.915040] #0: 00000000be73920d (&(&hwq->lock)->rlock){..-.}, at: bnxt_qplib_service_creq+0x2a/0x350 [bnxt_re] [ 783.915044] stack backtrace: [ 783.915046] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.19.0-rc2+ #33 [ 783.915047] Hardware name: Dell Inc. PowerEdge R730/0599V5, BIOS 1.0.4 08/28/2014 [ 783.915048] Call Trace: [ 783.915049] [ 783.915054] dump_stack+0x90/0xe3 [ 783.915058] __lock_acquire+0x106c/0x1080 [ 783.915061] ? sched_clock+0x5/0x10 [ 783.915063] lock_acquire+0xbd/0x1a0 [ 783.915065] ? bnxt_qplib_service_creq+0x232/0x350 [bnxt_re] [ 783.915069] _raw_spin_lock_irqsave+0x4a/0x90 [ 783.915071] ? bnxt_qplib_service_creq+0x232/0x350 [bnxt_re] [ 783.915073] bnxt_qplib_service_creq+0x232/0x350 [bnxt_re] [ 783.915078] tasklet_action_common.isra.17+0x197/0x1b0 [ 783.915081] __do_softirq+0xcb/0x3a6 [ 783.915084] irq_exit+0xe9/0x100 [ 783.915085] do_IRQ+0x6a/0x120 [ 783.915087] common_interrupt+0xf/0xf [ 783.915088] Use nested notation for the spin_lock to avoid this warning. Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 8d91733009a4..ad74988837c9 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -311,8 +311,17 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, bnxt_qplib_release_cq_locks(qp, &flags); break; default: - /* Command Response */ - spin_lock_irqsave(&cmdq->lock, flags); + /* + * Command Response + * cmdq->lock needs to be acquired to synchronie + * the command send and completion reaping. This function + * is always called with creq->lock held. Using + * the nested variant of spin_lock. + * + */ + + spin_lock_irqsave_nested(&cmdq->lock, flags, + SINGLE_DEPTH_NESTING); cookie = le16_to_cpu(qp_event->cookie); mcookie = qp_event->cookie; blocked = cookie & RCFW_CMD_IS_BLOCKING; From 07335466dd5aa91f0e4b60b3254bd032f98d4fd8 Mon Sep 17 00:00:00 2001 From: "Tudor.Ambarus@microchip.com" Date: Mon, 15 Oct 2018 09:00:54 +0000 Subject: [PATCH 0258/2608] usb: host: ohci-at91: fix request of irq for optional gpio [ Upstream commit 325b9313ec3be56c8e2fe03f977fee19cec75820 ] atmel,oc-gpio is optional. Request its irq only when atmel,oc is set in device tree. devm_gpiod_get_index_optional returns NULL if -ENOENT. Check its return value for NULL before error, because it is more probable that atmel,oc is not set. This fixes the following errors on boards where atmel,oc is not set in device tree: [ 0.960000] at91_ohci 500000.ohci: failed to request gpio "overcurrent" IRQ [ 0.960000] at91_ohci 500000.ohci: failed to request gpio "overcurrent" IRQ [ 0.970000] at91_ohci 500000.ohci: failed to request gpio "overcurrent" IRQ Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-at91.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c index 5302f988e7e6..e0ebd3d513c6 100644 --- a/drivers/usb/host/ohci-at91.c +++ b/drivers/usb/host/ohci-at91.c @@ -550,6 +550,8 @@ static int ohci_hcd_at91_drv_probe(struct platform_device *pdev) pdata->overcurrent_pin[i] = devm_gpiod_get_index_optional(&pdev->dev, "atmel,oc", i, GPIOD_IN); + if (!pdata->overcurrent_pin[i]) + continue; if (IS_ERR(pdata->overcurrent_pin[i])) { err = PTR_ERR(pdata->overcurrent_pin[i]); dev_err(&pdev->dev, "unable to claim gpio \"overcurrent\": %d\n", err); From 38ee346ac1fee2f3da3bfb6ae0e0c07fb85c9f77 Mon Sep 17 00:00:00 2001 From: Honghui Zhang Date: Mon, 15 Oct 2018 16:08:52 +0800 Subject: [PATCH 0259/2608] PCI: mediatek: Fix mtk_pcie_find_port() endpoint/port matching logic [ Upstream commit 074d6f32689ce05a084b6fa3db38445745bf11cc ] The Mediatek's host controller has two slots, each with its own control registers. The host driver needs to identify what slot is connected to what port in order to access the device's configuration space. Current code retrieving slot connected to a given endpoint device. Assuming each slot is connected to one endpoint device as below: host bridge bus 0 --> __________|_______ | | | | slot 0 slot 1 bus 1 -->| bus 2 --> | | | EP 0 EP 1 During PCI enumeration, system software will scan all the PCI devices on every bus starting from devfn 0. Using PCI_SLOT(devfn) for matching an endpoint to its slot is erroneous in that the devfn does not contain the hierarchical bus numbering in it. In order to match an endpoint with its slot (and related port), the PCI tree must be walked up to the root bus (where the root ports are situated) and then the PCI_SLOT(devfn) matching logic can be correctly applied for matching. This patch fixes the mtk_pcie_find_port() slot matching logic by adding appropriate PCI tree walking code to retrieve the slot/port a given endpoint is connected to. Signed-off-by: Honghui Zhang [lorenzo.pieralisi@arm.com: rewrote the commit log] Signed-off-by: Lorenzo Pieralisi Acked-by: Ryder Lee Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pcie-mediatek.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/pci/host/pcie-mediatek.c b/drivers/pci/host/pcie-mediatek.c index db93efdf1d63..c896bb9ef968 100644 --- a/drivers/pci/host/pcie-mediatek.c +++ b/drivers/pci/host/pcie-mediatek.c @@ -333,6 +333,17 @@ static struct mtk_pcie_port *mtk_pcie_find_port(struct pci_bus *bus, { struct mtk_pcie *pcie = bus->sysdata; struct mtk_pcie_port *port; + struct pci_dev *dev = NULL; + + /* + * Walk the bus hierarchy to get the devfn value + * of the port in the root bus. + */ + while (bus && bus->number) { + dev = bus->self; + bus = dev->bus; + devfn = dev->devfn; + } list_for_each_entry(port, &pcie->ports, list) if (port->slot == PCI_SLOT(devfn)) From 6057caad215b163aca43c7e0346e2dbc052cc10a Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 30 Aug 2018 16:40:05 +0200 Subject: [PATCH 0260/2608] tpm: suppress transmit cmd error logs when TPM 1.2 is disabled/deactivated [ Upstream commit 0d6d0d62d9505a9816716aa484ebd0b04c795063 ] For TPM 1.2 chips the system setup utility allows to set the TPM device in one of the following states: * Active: Security chip is functional * Inactive: Security chip is visible, but is not functional * Disabled: Security chip is hidden and is not functional When choosing the "Inactive" state, the TPM 1.2 device is enumerated and registered, but sending TPM commands fail with either TPM_DEACTIVATED or TPM_DISABLED depending if the firmware deactivated or disabled the TPM. Since these TPM 1.2 error codes don't have special treatment, inactivating the TPM leads to a very noisy kernel log buffer that shows messages like the following: tpm_tis 00:05: 1.2 TPM (device-id 0x0, rev-id 78) tpm tpm0: A TPM error (6) occurred attempting to read a pcr value tpm tpm0: TPM is disabled/deactivated (0x6) tpm tpm0: A TPM error (6) occurred attempting get random tpm tpm0: A TPM error (6) occurred attempting to read a pcr value ima: No TPM chip found, activating TPM-bypass! (rc=6) tpm tpm0: A TPM error (6) occurred attempting get random tpm tpm0: A TPM error (6) occurred attempting get random tpm tpm0: A TPM error (6) occurred attempting get random tpm tpm0: A TPM error (6) occurred attempting get random Let's just suppress error log messages for the TPM_{DEACTIVATED,DISABLED} return codes, since this is expected when the TPM 1.2 is set to Inactive. In that case the kernel log is cleaner and less confusing for users, i.e: tpm_tis 00:05: 1.2 TPM (device-id 0x0, rev-id 78) tpm tpm0: TPM is disabled/deactivated (0x6) ima: No TPM chip found, activating TPM-bypass! (rc=6) Reported-by: Hans de Goede Signed-off-by: Javier Martinez Canillas Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-interface.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 89d5915b1a3f..6e93df272c20 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -653,7 +653,8 @@ ssize_t tpm_transmit_cmd(struct tpm_chip *chip, struct tpm_space *space, return len; err = be32_to_cpu(header->return_code); - if (err != 0 && desc) + if (err != 0 && err != TPM_ERR_DISABLED && err != TPM_ERR_DEACTIVATED + && desc) dev_err(&chip->dev, "A TPM error (%d) occurred %s\n", err, desc); if (err) From 4f4850858f8584a45652ac83fe21feaf7b45c87c Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sun, 23 Sep 2018 21:10:44 +0000 Subject: [PATCH 0261/2608] Drivers: hv: vmbus: Use cpumask_var_t for on-stack cpu mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 25355252607ca288f329ee033f387764883393f6 ] A cpumask structure on the stack can cause a warning with CONFIG_NR_CPUS=8192 (e.g. Ubuntu 16.04 and 18.04 use this): drivers/hv//channel_mgmt.c: In function ‘init_vp_index’: drivers/hv//channel_mgmt.c:702:1: warning: the frame size of 1032 bytes is larger than 1024 bytes [-Wframe-larger-than=] Nowadays it looks most distros enable CONFIG_CPUMASK_OFFSTACK=y, and hence we can work around the warning by using cpumask_var_t. Signed-off-by: Dexuan Cui Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 1700b4e7758d..752c52f7353d 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -599,16 +599,18 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) bool perf_chn = vmbus_devs[dev_type].perf_device; struct vmbus_channel *primary = channel->primary_channel; int next_node; - struct cpumask available_mask; + cpumask_var_t available_mask; struct cpumask *alloced_mask; if ((vmbus_proto_version == VERSION_WS2008) || - (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) { + (vmbus_proto_version == VERSION_WIN7) || (!perf_chn) || + !alloc_cpumask_var(&available_mask, GFP_KERNEL)) { /* * Prior to win8, all channel interrupts are * delivered on cpu 0. * Also if the channel is not a performance critical * channel, bind it to cpu 0. + * In case alloc_cpumask_var() fails, bind it to cpu 0. */ channel->numa_node = 0; channel->target_cpu = 0; @@ -646,7 +648,7 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) cpumask_clear(alloced_mask); } - cpumask_xor(&available_mask, alloced_mask, + cpumask_xor(available_mask, alloced_mask, cpumask_of_node(primary->numa_node)); cur_cpu = -1; @@ -664,10 +666,10 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) } while (true) { - cur_cpu = cpumask_next(cur_cpu, &available_mask); + cur_cpu = cpumask_next(cur_cpu, available_mask); if (cur_cpu >= nr_cpu_ids) { cur_cpu = -1; - cpumask_copy(&available_mask, + cpumask_copy(available_mask, cpumask_of_node(primary->numa_node)); continue; } @@ -697,6 +699,8 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) channel->target_cpu = cur_cpu; channel->target_vp = hv_cpu_number_to_vp_number(cur_cpu); + + free_cpumask_var(available_mask); } static void vmbus_wait_for_unload(void) From a9ecbed15e83db8b2ff245c3b6138225e6f9783b Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Fri, 21 Sep 2018 00:31:05 -0700 Subject: [PATCH 0262/2608] VMCI: Resource wildcard match fixed [ Upstream commit 11924ba5e671d6caef1516923e2bd8c72929a3fe ] When adding a VMCI resource, the check for an existing entry would ignore that the new entry could be a wildcard. This could result in multiple resource entries that would match a given handle. One disastrous outcome of this is that the refcounting used to ensure that delayed callbacks for VMCI datagrams have run before the datagram is destroyed can be wrong, since the refcount could be increased on the duplicate entry. This in turn leads to a use after free bug. This issue was discovered by Hangbin Liu using KASAN and syzkaller. Fixes: bc63dedb7d46 ("VMCI: resource object implementation") Reported-by: Hangbin Liu Reviewed-by: Adit Ranadive Reviewed-by: Vishnu Dasa Signed-off-by: Jorgen Hansen Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_vmci/vmci_driver.c | 2 +- drivers/misc/vmw_vmci/vmci_resource.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/misc/vmw_vmci/vmci_driver.c b/drivers/misc/vmw_vmci/vmci_driver.c index d7eaf1eb11e7..003bfba40758 100644 --- a/drivers/misc/vmw_vmci/vmci_driver.c +++ b/drivers/misc/vmw_vmci/vmci_driver.c @@ -113,5 +113,5 @@ module_exit(vmci_drv_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface."); -MODULE_VERSION("1.1.5.0-k"); +MODULE_VERSION("1.1.6.0-k"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/vmw_vmci/vmci_resource.c b/drivers/misc/vmw_vmci/vmci_resource.c index 1ab6e8737a5f..da1ee2e1ba99 100644 --- a/drivers/misc/vmw_vmci/vmci_resource.c +++ b/drivers/misc/vmw_vmci/vmci_resource.c @@ -57,7 +57,8 @@ static struct vmci_resource *vmci_resource_lookup(struct vmci_handle handle, if (r->type == type && rid == handle.resource && - (cid == handle.context || cid == VMCI_INVALID_ID)) { + (cid == handle.context || cid == VMCI_INVALID_ID || + handle.context == VMCI_INVALID_ID)) { resource = r; break; } From e105738583b02017b92ddfd2676f8bc408b88781 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 27 Sep 2018 16:54:13 -0500 Subject: [PATCH 0263/2608] PCI / ACPI: Enable wake automatically for power managed bridges [ Upstream commit 6299cf9ec3985cac70bede8a855b5087b81a6640 ] We enable power management automatically for bridges where pci_bridge_d3_possible() returns true. However, these bridges may have ACPI methods such as _DSW that need to be called before D3 entry. For example in Lenovo Thinkpad X1 Carbon 6th _DSW method is used to prepare D3cold for the PCIe root port hosting Thunderbolt chain. Because wake is not enabled _DSW method is never called and the port does not enter D3cold properly consuming more power than necessary. Users can work this around by writing "enabled" to "wakeup" sysfs file under the device in question but that is not something an ordinary user is expected to do. Since we already automatically enable power management for PCIe ports with ->bridge_d3 set extend that to enable wake for them as well, assuming the port has any ACPI wakeup related objects implemented in the namespace (adev->wakeup.flags.valid is true). This ensures the necessary ACPI methods get called at appropriate times and allows the root port in Thinkpad X1 Carbon 6th to go into D3cold. Signed-off-by: Mika Westerberg Signed-off-by: Bjorn Helgaas Reviewed-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-acpi.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 4708eb9df71b..a3cedf8de863 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -738,19 +738,33 @@ static void pci_acpi_setup(struct device *dev) return; device_set_wakeup_capable(dev, true); + /* + * For bridges that can do D3 we enable wake automatically (as + * we do for the power management itself in that case). The + * reason is that the bridge may have additional methods such as + * _DSW that need to be called. + */ + if (pci_dev->bridge_d3) + device_wakeup_enable(dev); + acpi_pci_wakeup(pci_dev, false); } static void pci_acpi_cleanup(struct device *dev) { struct acpi_device *adev = ACPI_COMPANION(dev); + struct pci_dev *pci_dev = to_pci_dev(dev); if (!adev) return; pci_acpi_remove_pm_notifier(adev); - if (adev->wakeup.flags.valid) + if (adev->wakeup.flags.valid) { + if (pci_dev->bridge_d3) + device_wakeup_disable(dev); + device_set_wakeup_capable(dev, false); + } } static bool pci_acpi_bus_match(struct device *dev) From 19216bf2820e8b8f98e76baf6964a86faed8027e Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 10 Sep 2018 22:12:49 +0200 Subject: [PATCH 0264/2608] usb: gadget: udc: atmel: handle at91sam9rl PMC [ Upstream commit bb80e4fa57eb75ebd64ae9be4155da6d12c1a997 ] The at91sam9rl PMC is not quite the same as the at91sam9g45 one and now has its own compatible string. Add support for that. Fixes: 217bace8e548 ("ARM: dts: fix PMC compatible") Acked-by: Cristian Birsan Signed-off-by: Alexandre Belloni Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/atmel_usba_udc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index a884c022df7a..cb66f982c313 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -2071,6 +2071,8 @@ static struct usba_ep * atmel_udc_of_init(struct platform_device *pdev, udc->errata = match->data; udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9g45-pmc"); + if (IS_ERR(udc->pmc)) + udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9rl-pmc"); if (IS_ERR(udc->pmc)) udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9x5-pmc"); if (udc->errata && IS_ERR(udc->pmc)) From e75ca3babf06fd59af2e807415f0016d7793d3c9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 2 Oct 2018 01:34:44 -0400 Subject: [PATCH 0265/2608] ext4: fix argument checking in EXT4_IOC_MOVE_EXT [ Upstream commit f18b2b83a727a3db208308057d2c7945f368e625 ] If the starting block number of either the source or destination file exceeds the EOF, EXT4_IOC_MOVE_EXT should return EINVAL. Also fixed the helper function mext_check_coverage() so that if the logical block is beyond EOF, make it return immediately, instead of looping until the block number wraps all the away around. This takes long enough that if there are multiple threads trying to do pound on an the same inode doing non-sensical things, it can end up triggering the kernel's soft lockup detector. Reported-by: syzbot+c61979f6f2cba5cb3c06@syzkaller.appspotmail.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/ext4/move_extent.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 9bb36909ec92..cd8d481e0c48 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -526,9 +526,13 @@ mext_check_arguments(struct inode *orig_inode, orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } - if (orig_eof < orig_start + *len - 1) + if (orig_eof <= orig_start) + *len = 0; + else if (orig_eof < orig_start + *len - 1) *len = orig_eof - orig_start; - if (donor_eof < donor_start + *len - 1) + if (donor_eof <= donor_start) + *len = 0; + else if (donor_eof < donor_start + *len - 1) *len = donor_eof - donor_start; if (!*len) { ext4_debug("ext4 move extent: len should not be 0 " From 442e2ba5724121abc355712715d027b8453e33c9 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 1 Oct 2018 18:36:36 -0700 Subject: [PATCH 0266/2608] MD: fix invalid stored role for a disk [ Upstream commit d595567dc4f0c1d90685ec1e2e296e2cad2643ac ] If we change the number of array's device after device is removed from array, then add the device back to array, we can see that device is added as active role instead of spare which we expected. Please see the below link for details: https://marc.info/?l=linux-raid&m=153736982015076&w=2 This is caused by that we prefer to use device's previous role which is recorded by saved_raid_disk, but we should respect the new number of conf->raid_disks since it could be changed after device is removed. Reported-by: Gioh Kim Tested-by: Gioh Kim Acked-by: Guoqing Jiang Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 5599712d478e..4c1a6abed606 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1766,6 +1766,10 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) } else set_bit(In_sync, &rdev->flags); rdev->raid_disk = role; + if (role >= mddev->raid_disks) { + rdev->saved_raid_disk = -1; + rdev->raid_disk = -1; + } break; } if (sb->devflags & WriteMostly1) From 34068ae1fa0a91f48ceea69d063ae6f6701d083d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:35:59 +0800 Subject: [PATCH 0267/2608] f2fs: fix to recover inode's i_flags during POR [ Upstream commit 19c73a691ccf6fb2f12d4e9cf9830023966cec88 ] Testcase to reproduce this bug: 1. mkfs.f2fs /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. sync 5. chattr +A /mnt/f2fs/file 6. xfs_io -f /mnt/f2fs/file -c "fsync" 7. godown /mnt/f2fs 8. umount /mnt/f2fs 9. mount -t f2fs /dev/sdd /mnt/f2fs 10. lsattr /mnt/f2fs/file -----------------N- /mnt/f2fs/file But actually, we expect the corrct result is: -------A---------N- /mnt/f2fs/file The reason is we didn't recover inode.i_flags field during mount, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/recovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9626758bc762..765fadf954af 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -210,6 +210,7 @@ static void recover_inode(struct inode *inode, struct page *page) inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); F2FS_I(inode)->i_advise = raw->i_advise; + F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); if (file_enc_name(inode)) name = ""; From 8cc76433d8c9dd666c19b1aaefd5c55a31115f89 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 24 Sep 2018 07:00:41 -0700 Subject: [PATCH 0268/2608] PCI/MSI: Warn and return error if driver enables MSI/MSI-X twice [ Upstream commit 4c1ef72e9b71a19fb405ebfcd37c0a5e16fa44ca ] It is a serious driver defect to enable MSI or MSI-X more than once. Doing so may panic the kernel as in the stack trace below: Call Trace: sysfs_add_one+0xa5/0xd0 create_dir+0x7c/0xe0 sysfs_create_subdir+0x1c/0x20 internal_create_group+0x6d/0x290 sysfs_create_groups+0x4a/0xa0 populate_msi_sysfs+0x1cd/0x210 pci_enable_msix+0x31c/0x3e0 igbuio_pci_open+0x72/0x300 [igb_uio] uio_open+0xcc/0x120 [uio] chrdev_open+0xa1/0x1e0 [...] do_sys_open+0xf3/0x1f0 SyS_open+0x1e/0x20 system_call_fastpath+0x16/0x1b ---[ end trace 11042e2848880209 ]--- Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffffffa056b4fa We want to keep the WARN_ON() and stack trace so the driver can be fixed, but we can avoid the kernel panic by returning an error. We may still get warnings like this: Call Trace: pci_enable_msix+0x3c9/0x3e0 igbuio_pci_open+0x72/0x300 [igb_uio] uio_open+0xcc/0x120 [uio] chrdev_open+0xa1/0x1e0 [...] do_sys_open+0xf3/0x1f0 SyS_open+0x1e/0x20 system_call_fastpath+0x16/0x1b ------------[ cut here ]------------ WARNING: at fs/sysfs/dir.c:526 sysfs_add_one+0xa5/0xd0() sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:03.0/0000:01:00.1/msi_irqs' Signed-off-by: Tonghao Zhang [bhelgaas: changelog, fix patch whitespace, remove !!] Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/msi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 496ed9130600..536e9a5cd2b1 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -958,7 +958,6 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, } } } - WARN_ON(!!dev->msix_enabled); /* Check whether driver already requested for MSI irq */ if (dev->msi_enabled) { @@ -1028,8 +1027,6 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, if (!pci_msi_supported(dev, minvec)) return -EINVAL; - WARN_ON(!!dev->msi_enabled); - /* Check whether driver already requested MSI-X irqs */ if (dev->msix_enabled) { dev_info(&dev->dev, @@ -1040,6 +1037,9 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, if (maxvec < minvec) return -ERANGE; + if (WARN_ON_ONCE(dev->msi_enabled)) + return -EINVAL; + nvec = pci_msi_vec_count(dev); if (nvec < 0) return nvec; @@ -1088,6 +1088,9 @@ static int __pci_enable_msix_range(struct pci_dev *dev, if (maxvec < minvec) return -ERANGE; + if (WARN_ON_ONCE(dev->msix_enabled)) + return -EINVAL; + for (;;) { if (affd) { nvec = irq_calc_affinity_vectors(minvec, nvec, affd); From 0d20b616732e103070f2ffcd061ec0812754808a Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:46 -0600 Subject: [PATCH 0269/2608] coresight: etb10: Fix handling of perf mode [ Upstream commit 987d1e8dcd370d96029a3d76a0031b043c4a69ae ] If the ETB is already enabled in sysfs mode, the ETB reports success even if a perf mode is requested. Fix this by checking the requested mode. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-etb10.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index 56ecd7aff5eb..d14a9cb7959a 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -155,6 +155,10 @@ static int etb_enable(struct coresight_device *csdev, u32 mode) if (val == CS_MODE_PERF) return -EBUSY; + /* Don't let perf disturb sysFS sessions */ + if (val == CS_MODE_SYSFS && mode == CS_MODE_PERF) + return -EBUSY; + /* Nothing to do, the tracer is already enabled. */ if (val == CS_MODE_SYSFS) goto out; From 00de8eac10fdcc2d3fff63bf2cfa122a15c72aee Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Tue, 25 Sep 2018 14:00:24 +0530 Subject: [PATCH 0270/2608] PCI: dwc: pci-dra7xx: Enable errata i870 for both EP and RC mode [ Upstream commit 726d75a6d243bf6730da3216f3592503f6f0f588 ] Errata i870 is applicable in both EP and RC mode. Therefore rename function dra7xx_pcie_ep_unaligned_memaccess(), that implements errata workaround, to dra7xx_pcie_unaligned_memaccess() and call it for both RC and EP. Make sure driver probe does not fail in case the workaround is not applied for RC mode in order to maintain DT backward compatibility. Reported-by: Chris Welch Signed-off-by: Vignesh R [lorenzo.pieralisi@arm.com: reworded the log] Signed-off-by: Lorenzo Pieralisi Acked-by: Kishon Vijay Abraham I Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/dwc/pci-dra7xx.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/pci/dwc/pci-dra7xx.c b/drivers/pci/dwc/pci-dra7xx.c index 362607f727ee..06eae132aff7 100644 --- a/drivers/pci/dwc/pci-dra7xx.c +++ b/drivers/pci/dwc/pci-dra7xx.c @@ -546,7 +546,7 @@ static const struct of_device_id of_dra7xx_pcie_match[] = { }; /* - * dra7xx_pcie_ep_unaligned_memaccess: workaround for AM572x/AM571x Errata i870 + * dra7xx_pcie_unaligned_memaccess: workaround for AM572x/AM571x Errata i870 * @dra7xx: the dra7xx device where the workaround should be applied * * Access to the PCIe slave port that are not 32-bit aligned will result @@ -556,7 +556,7 @@ static const struct of_device_id of_dra7xx_pcie_match[] = { * * To avoid this issue set PCIE_SS1_AXI2OCP_LEGACY_MODE_ENABLE to 1. */ -static int dra7xx_pcie_ep_unaligned_memaccess(struct device *dev) +static int dra7xx_pcie_unaligned_memaccess(struct device *dev) { int ret; struct device_node *np = dev->of_node; @@ -707,6 +707,11 @@ static int __init dra7xx_pcie_probe(struct platform_device *pdev) case DW_PCIE_RC_TYPE: dra7xx_pcie_writel(dra7xx, PCIECTRL_TI_CONF_DEVICE_TYPE, DEVICE_TYPE_RC); + + ret = dra7xx_pcie_unaligned_memaccess(dev); + if (ret) + dev_err(dev, "WA for Errata i870 not applied\n"); + ret = dra7xx_add_pcie_port(dra7xx, pdev); if (ret < 0) goto err_gpio; @@ -715,7 +720,7 @@ static int __init dra7xx_pcie_probe(struct platform_device *pdev) dra7xx_pcie_writel(dra7xx, PCIECTRL_TI_CONF_DEVICE_TYPE, DEVICE_TYPE_EP); - ret = dra7xx_pcie_ep_unaligned_memaccess(dev); + ret = dra7xx_pcie_unaligned_memaccess(dev); if (ret) goto err_gpio; From 6a4e231f265052b0da714076143fe2f2425eee89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Wed, 12 Sep 2018 11:59:30 +0300 Subject: [PATCH 0271/2608] crypto: caam - fix implicit casts in endianness helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit aae733a3f46f5ef338fbdde26e14cbb205a23de0 ] Fix the following sparse endianness warnings: drivers/crypto/caam/regs.h:95:1: sparse: incorrect type in return expression (different base types) @@ expected unsigned int @@ got restricted __le32unsigned int @@ drivers/crypto/caam/regs.h:95:1: expected unsigned int drivers/crypto/caam/regs.h:95:1: got restricted __le32 [usertype] drivers/crypto/caam/regs.h:95:1: sparse: incorrect type in return expression (different base types) @@ expected unsigned int @@ got restricted __be32unsigned int @@ drivers/crypto/caam/regs.h:95:1: expected unsigned int drivers/crypto/caam/regs.h:95:1: got restricted __be32 [usertype] drivers/crypto/caam/regs.h:92:1: sparse: cast to restricted __le32 drivers/crypto/caam/regs.h:92:1: sparse: cast to restricted __be32 Fixes: 261ea058f016 ("crypto: caam - handle core endianness != caam endianness") Reported-by: kbuild test robot Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/regs.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index fee363865d88..e5513cc59ec3 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -70,22 +70,22 @@ extern bool caam_little_end; extern bool caam_imx; -#define caam_to_cpu(len) \ -static inline u##len caam##len ## _to_cpu(u##len val) \ -{ \ - if (caam_little_end) \ - return le##len ## _to_cpu(val); \ - else \ - return be##len ## _to_cpu(val); \ +#define caam_to_cpu(len) \ +static inline u##len caam##len ## _to_cpu(u##len val) \ +{ \ + if (caam_little_end) \ + return le##len ## _to_cpu((__force __le##len)val); \ + else \ + return be##len ## _to_cpu((__force __be##len)val); \ } -#define cpu_to_caam(len) \ -static inline u##len cpu_to_caam##len(u##len val) \ -{ \ - if (caam_little_end) \ - return cpu_to_le##len(val); \ - else \ - return cpu_to_be##len(val); \ +#define cpu_to_caam(len) \ +static inline u##len cpu_to_caam##len(u##len val) \ +{ \ + if (caam_little_end) \ + return (__force u##len)cpu_to_le##len(val); \ + else \ + return (__force u##len)cpu_to_be##len(val); \ } caam_to_cpu(16) From 7987dfa6057678f5864309496c4c26662d93385a Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Tue, 4 Sep 2018 17:18:57 +0200 Subject: [PATCH 0272/2608] usb: chipidea: Prevent unbalanced IRQ disable [ Upstream commit 8b97d73c4d72a2abf58f8e49062a7ee1e5f1334e ] The ChipIdea IRQ is disabled before scheduling the otg work and re-enabled on otg work completion. However if the job is already scheduled we have to undo the effect of disable_irq int order to balance the IRQ disable-depth value. Fixes: be6b0c1bd0be ("usb: chipidea: using one inline function to cover queue work operations") Signed-off-by: Loic Poulain Signed-off-by: Peter Chen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/otg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/otg.h b/drivers/usb/chipidea/otg.h index 9ecb598e48f0..a5557c70034a 100644 --- a/drivers/usb/chipidea/otg.h +++ b/drivers/usb/chipidea/otg.h @@ -20,7 +20,8 @@ void ci_handle_vbus_change(struct ci_hdrc *ci); static inline void ci_otg_queue_work(struct ci_hdrc *ci) { disable_irq_nosync(ci->irq); - queue_work(ci->wq, &ci->work); + if (queue_work(ci->wq, &ci->work) == false) + enable_irq(ci->irq); } #endif /* __DRIVERS_USB_CHIPIDEA_OTG_H */ From 24e249524e9b19e1e8027904ed3dcbd5e34e9dea Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 14 Sep 2018 14:53:32 -0400 Subject: [PATCH 0273/2608] driver/dma/ioat: Call del_timer_sync() without holding prep_lock [ Upstream commit cfb03be6c7e8a1591285849c361d67b09f5149f7 ] The following lockdep splat was observed: [ 1222.241750] ====================================================== [ 1222.271301] WARNING: possible circular locking dependency detected [ 1222.301060] 4.16.0-10.el8+5.x86_64+debug #1 Not tainted [ 1222.326659] ------------------------------------------------------ [ 1222.356565] systemd-shutdow/1 is trying to acquire lock: [ 1222.382660] ((&ioat_chan->timer)){+.-.}, at: [<00000000f71e1a28>] del_timer_sync+0x5/0xf0 [ 1222.422928] [ 1222.422928] but task is already holding lock: [ 1222.451743] (&(&ioat_chan->prep_lock)->rlock){+.-.}, at: [<000000008ea98b12>] ioat_shutdown+0x86/0x100 [ioatdma] : [ 1223.524987] Chain exists of: [ 1223.524987] (&ioat_chan->timer) --> &(&ioat_chan->cleanup_lock)->rlock --> &(&ioat_chan->prep_lock)->rlock [ 1223.524987] [ 1223.594082] Possible unsafe locking scenario: [ 1223.594082] [ 1223.622630] CPU0 CPU1 [ 1223.645080] ---- ---- [ 1223.667404] lock(&(&ioat_chan->prep_lock)->rlock); [ 1223.691535] lock(&(&ioat_chan->cleanup_lock)->rlock); [ 1223.728657] lock(&(&ioat_chan->prep_lock)->rlock); [ 1223.765122] lock((&ioat_chan->timer)); [ 1223.784095] [ 1223.784095] *** DEADLOCK *** [ 1223.784095] [ 1223.813492] 4 locks held by systemd-shutdow/1: [ 1223.834677] #0: (reboot_mutex){+.+.}, at: [<0000000056d33456>] SYSC_reboot+0x10f/0x300 [ 1223.873310] #1: (&dev->mutex){....}, at: [<00000000258dfdd7>] device_shutdown+0x1c8/0x660 [ 1223.913604] #2: (&dev->mutex){....}, at: [<0000000068331147>] device_shutdown+0x1d6/0x660 [ 1223.954000] #3: (&(&ioat_chan->prep_lock)->rlock){+.-.}, at: [<000000008ea98b12>] ioat_shutdown+0x86/0x100 [ioatdma] In the ioat_shutdown() function: spin_lock_bh(&ioat_chan->prep_lock); set_bit(IOAT_CHAN_DOWN, &ioat_chan->state); del_timer_sync(&ioat_chan->timer); spin_unlock_bh(&ioat_chan->prep_lock); According to the synchronization rule for the del_timer_sync() function, the caller must not hold locks which would prevent completion of the timer's handler. The timer structure has its own lock that manages its synchronization. Setting the IOAT_CHAN_DOWN bit should prevent other CPUs from trying to use that device anyway, there is probably no need to call del_timer_sync() while holding the prep_lock. So the del_timer_sync() call is now moved outside of the prep_lock critical section to prevent the circular lock dependency. Signed-off-by: Waiman Long Reviewed-by: Dave Jiang Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ioat/init.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 854deb0da07c..68680e4151ea 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -1205,8 +1205,15 @@ static void ioat_shutdown(struct pci_dev *pdev) spin_lock_bh(&ioat_chan->prep_lock); set_bit(IOAT_CHAN_DOWN, &ioat_chan->state); - del_timer_sync(&ioat_chan->timer); spin_unlock_bh(&ioat_chan->prep_lock); + /* + * Synchronization rule for del_timer_sync(): + * - The caller must not hold locks which would prevent + * completion of the timer's handler. + * So prep_lock cannot be held before calling it. + */ + del_timer_sync(&ioat_chan->timer); + /* this should quiesce then reset */ ioat_reset_hw(ioat_chan); } From adfbc0d10727027a17fb1784548245cda2fae30e Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 16 Aug 2018 09:39:41 +0200 Subject: [PATCH 0274/2608] uio: ensure class is registered before devices [ Upstream commit ae61cf5b9913027c6953a79ed3894da4f47061bd ] When both uio and the uio drivers are built in the kernel, it is possible for a driver to register devices before the uio class is registered. This may result in a NULL pointer dereference later on in get_device_parent() when accessing the class glue_dirs spinlock. The trace looks like that: Unable to handle kernel NULL pointer dereference at virtual address 00000140 [...] [] _raw_spin_lock+0x14/0x48 [] device_add+0x154/0x6a0 [] device_create_groups_vargs+0x120/0x128 [] device_create+0x54/0x60 [] __uio_register_device+0x120/0x4a8 [] jaguar2_pci_probe+0x2d4/0x558 [] local_pci_probe+0x3c/0xb8 [] pci_device_probe+0x11c/0x180 [] driver_probe_device+0x22c/0x2d8 [] __driver_attach+0xbc/0xc0 [] bus_for_each_dev+0x4c/0x98 [] driver_attach+0x20/0x28 [] bus_add_driver+0x1b8/0x228 [] driver_register+0x60/0xf8 [] __pci_register_driver+0x40/0x48 Return EPROBE_DEFER in that case so the driver can register the device later. Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 41784798c789..0a730136646d 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -249,6 +249,8 @@ static struct class uio_class = { .dev_groups = uio_groups, }; +bool uio_class_registered; + /* * device functions */ @@ -780,6 +782,9 @@ static int init_uio_class(void) printk(KERN_ERR "class_register failed for uio\n"); goto err_class_register; } + + uio_class_registered = true; + return 0; err_class_register: @@ -790,6 +795,7 @@ exit: static void release_uio_class(void) { + uio_class_registered = false; class_unregister(&uio_class); uio_major_cleanup(); } @@ -809,6 +815,9 @@ int __uio_register_device(struct module *owner, struct uio_device *idev; int ret = 0; + if (!uio_class_registered) + return -EPROBE_DEFER; + if (!parent || !info || !info->name || !info->version) return -EINVAL; From 7f4b23ed2cdb5ba7a6edf002ea9cf0f5f684dca5 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 10 Sep 2018 10:30:45 -0700 Subject: [PATCH 0275/2608] scsi: lpfc: Correct soft lockup when running mds diagnostics [ Upstream commit 0ef01a2d95fd62bb4f536e7ce4d5e8e74b97a244 ] When running an mds diagnostic that passes frames with the switch, soft lockups are detected. The driver is in a CQE processing loop and has sufficient amount of traffic that it never exits the ring processing routine, thus the "lockup". Cap the number of elements in the work processing routine to 64 elements. This ensures that the cpu will be given up and the handler reschedule to process additional items. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_sli.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index dc83498024dc..24b6e56f6e97 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -3585,6 +3585,7 @@ lpfc_sli_handle_slow_ring_event_s4(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf; struct lpfc_cq_event *cq_event; unsigned long iflag; + int count = 0; spin_lock_irqsave(&phba->hbalock, iflag); phba->hba_flag &= ~HBA_SP_QUEUE_EVT; @@ -3606,16 +3607,22 @@ lpfc_sli_handle_slow_ring_event_s4(struct lpfc_hba *phba, if (irspiocbq) lpfc_sli_sp_handle_rspiocb(phba, pring, irspiocbq); + count++; break; case CQE_CODE_RECEIVE: case CQE_CODE_RECEIVE_V1: dmabuf = container_of(cq_event, struct hbq_dmabuf, cq_event); lpfc_sli4_handle_received_buffer(phba, dmabuf); + count++; break; default: break; } + + /* Limit the number of events to 64 to avoid soft lockups */ + if (count == 64) + break; } } From 5115854bb3a8bb7c04a191c3e8778faf04a2ec96 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 10 Sep 2018 10:30:44 -0700 Subject: [PATCH 0276/2608] scsi: lpfc: Correct race with abort on completion path [ Upstream commit ca7fb76e091f889cfda1287c07a9358f73832b39 ] On io completion, the driver is taking an adapter wide lock and nulling the scsi command back pointer. The nulling of the back pointer is to signify the io was completed and the scsi_done() routine was called. However, the routine makes no check to see if the abort routine had done the same thing and possibly nulled the pointer. Thus it may doubly-complete the io. Make the following mods: - Check to make sure forward progress (call scsi_done()) only happens if the command pointer was non-null. - As the taking of the lock, which is adapter wide, is very costly on a system under load, null the pointer using an xchg operation rather than under lock. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_scsi.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 1a6f122bb25d..4ade13d72deb 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -4149,9 +4149,17 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, lpfc_scsi_unprep_dma_buf(phba, lpfc_cmd); - spin_lock_irqsave(&phba->hbalock, flags); - lpfc_cmd->pCmd = NULL; - spin_unlock_irqrestore(&phba->hbalock, flags); + /* If pCmd was set to NULL from abort path, do not call scsi_done */ + if (xchg(&lpfc_cmd->pCmd, NULL) == NULL) { + lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP, + "0711 FCP cmd already NULL, sid: 0x%06x, " + "did: 0x%06x, oxid: 0x%04x\n", + vport->fc_myDID, + (pnode) ? pnode->nlp_DID : 0, + phba->sli_rev == LPFC_SLI_REV4 ? + lpfc_cmd->cur_iocbq.sli4_xritag : 0xffff); + return; + } /* The sdev is not guaranteed to be valid post scsi_done upcall. */ cmd->scsi_done(cmd); From 5ae1c65ae960cdc0b2b9a5a614d54f7cfa33e31a Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Tue, 26 Jun 2018 13:12:43 +0800 Subject: [PATCH 0277/2608] f2fs: report error if quota off error during umount [ Upstream commit cda9cc595f0bb6ffa51a4efc4b6533dfa4039b4c ] Now, we depend on fsck to ensure quota file data is ok, so we scan whole partition if checkpoint without umount flag. It's same for quota off error case, which may make quota file data inconsistent. generic/019 reports below error: __quota_error: 1160 callbacks suppressed Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota VFS: Busy inodes after unmount of zram1. Self-destruct in 5 seconds. Have a nice day... If we failed in below path due to fail to write dquot block, we will miss to release quota inode, fix it. - f2fs_put_super - f2fs_quota_off_umount - f2fs_quota_off - f2fs_quota_sync <-- failed - dquot_quota_off <-- missed to call Signed-off-by: Yunlei He Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/super.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index eae35909fa51..7cda685296b2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1488,7 +1488,9 @@ static int f2fs_quota_off(struct super_block *sb, int type) if (!inode || !igrab(inode)) return dquot_quota_off(sb, type); - f2fs_quota_sync(sb, type); + err = f2fs_quota_sync(sb, type); + if (err) + goto out_put; err = dquot_quota_off(sb, type); if (err) @@ -1507,9 +1509,20 @@ out_put: void f2fs_quota_off_umount(struct super_block *sb) { int type; + int err; - for (type = 0; type < MAXQUOTAS; type++) - f2fs_quota_off(sb, type); + for (type = 0; type < MAXQUOTAS; type++) { + err = f2fs_quota_off(sb, type); + if (err) { + int ret = dquot_quota_off(sb, type); + + f2fs_msg(sb, KERN_ERR, + "Fail to turn off disk quota " + "(type: %d, err: %d, ret:%d), Please " + "run fsck to fix it.", type, err, ret); + set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK); + } + } } int f2fs_get_projid(struct inode *inode, kprojid_t *projid) From 06bd97b79584764d3ff4dbd42d4ca87d2cbe5a46 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 3 Sep 2018 20:02:46 +0200 Subject: [PATCH 0278/2608] signal: Always deliver the kernel's SIGKILL and SIGSTOP to a pid namespace init [ Upstream commit 3597dfe01d12f570bc739da67f857fd222a3ea66 ] Instead of playing whack-a-mole and changing SEND_SIG_PRIV to SEND_SIG_FORCED throughout the kernel to ensure a pid namespace init gets signals sent by the kernel, stop allowing a pid namespace init to ignore SIGKILL or SIGSTOP sent by the kernel. A pid namespace init is only supposed to be able to ignore signals sent from itself and children with SIG_DFL. Fixes: 921cf9f63089 ("signals: protect cinit from unblocked SIG_DFL signals") Reviewed-by: Thomas Gleixner Signed-off-by: "Eric W. Biederman" Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index b74acbec9876..7181215e9b64 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1003,7 +1003,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, result = TRACE_SIGNAL_IGNORED; if (!prepare_signal(sig, t, - from_ancestor_ns || (info == SEND_SIG_FORCED))) + from_ancestor_ns || (info == SEND_SIG_PRIV) || (info == SEND_SIG_FORCED))) goto ret; pending = group ? &t->signal->shared_pending : &t->pending; From 64b48a5cbea969bb7bd2f779dab52c2753c44e46 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 9 Sep 2018 22:48:58 +0200 Subject: [PATCH 0279/2608] mfd: menelaus: Fix possible race condition and leak [ Upstream commit 9612f8f503804d2fd2f63aa6ba1e58bba4612d96 ] The IRQ work is added before the struct rtc is allocated and registered, but this struct is used in the IRQ handler. This may lead to a NULL pointer dereference. Switch to devm_rtc_allocate_device/rtc_register_device to allocate the rtc before calling menelaus_add_irq_work. Also, this solves a possible leak as the RTC is never released. Signed-off-by: Alexandre Belloni Signed-off-by: Lee Jones Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/menelaus.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/mfd/menelaus.c b/drivers/mfd/menelaus.c index 29b7164a823b..d28ebe7ecd21 100644 --- a/drivers/mfd/menelaus.c +++ b/drivers/mfd/menelaus.c @@ -1094,6 +1094,7 @@ static void menelaus_rtc_alarm_work(struct menelaus_chip *m) static inline void menelaus_rtc_init(struct menelaus_chip *m) { int alarm = (m->client->irq > 0); + int err; /* assume 32KDETEN pin is pulled high */ if (!(menelaus_read_reg(MENELAUS_OSC_CTRL) & 0x80)) { @@ -1101,6 +1102,12 @@ static inline void menelaus_rtc_init(struct menelaus_chip *m) return; } + m->rtc = devm_rtc_allocate_device(&m->client->dev); + if (IS_ERR(m->rtc)) + return; + + m->rtc->ops = &menelaus_rtc_ops; + /* support RTC alarm; it can issue wakeups */ if (alarm) { if (menelaus_add_irq_work(MENELAUS_RTCALM_IRQ, @@ -1125,10 +1132,8 @@ static inline void menelaus_rtc_init(struct menelaus_chip *m) menelaus_write_reg(MENELAUS_RTC_CTRL, m->rtc_control); } - m->rtc = rtc_device_register(DRIVER_NAME, - &m->client->dev, - &menelaus_rtc_ops, THIS_MODULE); - if (IS_ERR(m->rtc)) { + err = rtc_register_device(m->rtc); + if (err) { if (alarm) { menelaus_remove_irq_work(MENELAUS_RTCALM_IRQ); device_init_wakeup(&m->client->dev, 0); From 0bc6f0649b539b20ebf2096ead11246027ce9bef Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 29 Aug 2018 23:32:44 +0200 Subject: [PATCH 0280/2608] dmaengine: dma-jz4780: Return error if not probed from DT [ Upstream commit 54f919a04cf221bc1601d1193682d4379dacacbd ] The driver calls clk_get() with the clock name set to NULL, which means that the driver could only work when probed from devicetree. From now on, we explicitly require the driver to be probed from devicetree. Signed-off-by: Paul Cercueil Tested-by: Mathieu Malaterre Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dma-jz4780.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index 7373b7a555ec..803cfb4523b0 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -754,6 +754,11 @@ static int jz4780_dma_probe(struct platform_device *pdev) struct resource *res; int i, ret; + if (!dev->of_node) { + dev_err(dev, "This driver must be probed from devicetree\n"); + return -EINVAL; + } + jzdma = devm_kzalloc(dev, sizeof(*jzdma), GFP_KERNEL); if (!jzdma) return -ENOMEM; From ab5ed779fe0dcf3e81484409530267880f661d95 Mon Sep 17 00:00:00 2001 From: Vijay Immanuel Date: Tue, 12 Jun 2018 18:20:49 -0700 Subject: [PATCH 0281/2608] IB/rxe: fix for duplicate request processing and ack psns [ Upstream commit b97db58557f4aa6d9903f8e1deea6b3d1ed0ba43 ] Don't reset the resp opcode for a replayed read response. The resp opcode could be in the middle of a write or send sequence, when the duplicate read request was received. An example sequence is as follows: - Receive read request for 12KB PSN 20. Transmit read response first, middle and last with PSNs 20,21,22. - Receive write first PSN 23. At this point the resp psn is 24 and resp opcode is write first. - The sender notices that PSN 20 is dropped and retransmits. Receive read request for 12KB PSN 20. Transmit read response first, middle and last with PSNs 20,21,22. The resp opcode is set to -1, the resp psn remains 24. - Receive write first PSN 23. This is processed by duplicate_request(). The resp opcode remains -1 and resp psn remains 24. - Receive write middle PSN 24. check_op_seq() reports a missing first error since the resp opcode is -1. When sending an ack for a duplicate send or write request, use the psn of the previous ack sent. Do not use the psn of a read response for the ack. An example sequence is as follows: - Receive write PSN 30. Transmit ACK for PSN 30. - Receive read request 4KB PSN 31. Transmit read response with PSN 31. The resp psn is now 32. - The sender notices that PSN 30 is dropped and retransmits. Receive write PSN 30. duplicate_request() sends an ACK with PSN 31. That is incorrect since PSN 31 was a read request. Signed-off-by: Vijay Immanuel Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_resp.c | 8 ++++++-- drivers/infiniband/sw/rxe/rxe_verbs.h | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index bd43c1c7a42f..4d84b010b3ee 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -683,6 +683,7 @@ static enum resp_states read_reply(struct rxe_qp *qp, rxe_advance_resp_resource(qp); res->type = RXE_READ_MASK; + res->replay = 0; res->read.va = qp->resp.va; res->read.va_org = qp->resp.va; @@ -753,7 +754,8 @@ static enum resp_states read_reply(struct rxe_qp *qp, state = RESPST_DONE; } else { qp->resp.res = NULL; - qp->resp.opcode = -1; + if (!res->replay) + qp->resp.opcode = -1; if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) qp->resp.psn = res->cur_psn; state = RESPST_CLEANUP; @@ -815,6 +817,7 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) /* next expected psn, read handles this separately */ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + qp->resp.ack_psn = qp->resp.psn; qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; @@ -1071,7 +1074,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { enum resp_states rc; - u32 prev_psn = (qp->resp.psn - 1) & BTH_PSN_MASK; + u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK; if (pkt->mask & RXE_SEND_MASK || pkt->mask & RXE_WRITE_MASK) { @@ -1114,6 +1117,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, res->state = (pkt->psn == res->first_psn) ? rdatm_res_state_new : rdatm_res_state_replay; + res->replay = 1; /* Reset the resource, except length. */ res->read.va_org = iova; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 1019f5e7dbdd..59f6a24db064 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -173,6 +173,7 @@ enum rdatm_res_state { struct resp_res { int type; + int replay; u32 first_psn; u32 last_psn; u32 cur_psn; @@ -197,6 +198,7 @@ struct rxe_resp_info { enum rxe_qp_state state; u32 msn; u32 psn; + u32 ack_psn; int opcode; int drop_msg; int goto_error; From edadd638264f3782d0d166125a7a5515523dbe3c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 11 Aug 2018 23:33:34 +0200 Subject: [PATCH 0282/2608] ALSA: hda: Check the non-cached stream buffers more explicitly [ Upstream commit 78c9be61c3a5cd9e2439fd27a5ffad73a81958c7 ] Introduce a new flag, uc_buffer, to indicate that the controller requires the non-cached pages for stream buffers, either as a chip-specific requirement or specified via snoop=0 option. This improves the code-readability. Also, this patch fixes the incorrect behavior for C-Media chip where the stream buffers were never handled as non-cached due to the check of driver_type even if you pass snoop=0 option. Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_controller.h | 1 + sound/pci/hda/hda_intel.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index a68e75b00ea3..53c3cd28bc99 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -160,6 +160,7 @@ struct azx { unsigned int msi:1; unsigned int probing:1; /* codec probing phase */ unsigned int snoop:1; + unsigned int uc_buffer:1; /* non-cached pages for stream buffers */ unsigned int align_buffer_size:1; unsigned int region_requested:1; unsigned int disabled:1; /* disabled by vga_switcheroo */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 873d9824fbcf..4e38905bc47d 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -410,7 +410,7 @@ static void __mark_pages_wc(struct azx *chip, struct snd_dma_buffer *dmab, bool #ifdef CONFIG_SND_DMA_SGBUF if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_SG) { struct snd_sg_buf *sgbuf = dmab->private_data; - if (chip->driver_type == AZX_DRIVER_CMEDIA) + if (!chip->uc_buffer) return; /* deal with only CORB/RIRB buffers */ if (on) set_pages_array_wc(sgbuf->page_table, sgbuf->pages); @@ -1634,6 +1634,7 @@ static void azx_check_snoop_available(struct azx *chip) dev_info(chip->card->dev, "Force to %s mode by module option\n", snoop ? "snoop" : "non-snoop"); chip->snoop = snoop; + chip->uc_buffer = !snoop; return; } @@ -1654,8 +1655,12 @@ static void azx_check_snoop_available(struct azx *chip) snoop = false; chip->snoop = snoop; - if (!snoop) + if (!snoop) { dev_info(chip->card->dev, "Force to non-snoop mode\n"); + /* C-Media requires non-cached pages only for CORB/RIRB */ + if (chip->driver_type != AZX_DRIVER_CMEDIA) + chip->uc_buffer = true; + } } static void azx_probe_work(struct work_struct *work) @@ -2094,7 +2099,7 @@ static void pcm_mmap_prepare(struct snd_pcm_substream *substream, #ifdef CONFIG_X86 struct azx_pcm *apcm = snd_pcm_substream_chip(substream); struct azx *chip = apcm->chip; - if (!azx_snoop(chip) && chip->driver_type != AZX_DRIVER_CMEDIA) + if (chip->uc_buffer) area->vm_page_prot = pgprot_writecombine(area->vm_page_prot); #endif } From bb61032295732b08a714678e0ccea732dabfc49e Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 8 Oct 2018 11:06:18 -0400 Subject: [PATCH 0283/2608] cpupower: Fix AMD Family 0x17 msr_pstate size [ Upstream commit 8c22e2f695920ebd94f9a53bcf2a65eb36d4dba1 ] The msr_pstate data is only 63 bits long and should be 64 bits. Add in the missing bit from res1 for AMD Family 0x17. Reference: https://www.amd.com/system/files/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf, page 138. Signed-off-by: Prarit Bhargava Cc: Shuah Khan Cc: Stafford Horne Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/power/cpupower/utils/helpers/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index 58d23997424d..9607ada5b29a 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -33,7 +33,7 @@ union msr_pstate { unsigned vid:8; unsigned iddval:8; unsigned idddiv:2; - unsigned res1:30; + unsigned res1:31; unsigned en:1; } fam17h_bits; unsigned long long val; From a3ab5e3c80106fa6b131e0ad0959d5a8376ce589 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 16 Oct 2018 19:30:13 -0700 Subject: [PATCH 0284/2608] Revert "f2fs: fix to clear PG_checked flag in set_page_dirty()" commit 164a63fa6b384e30ceb96ed80bc7dc3379bc0960 upstream. This reverts commit 66110abc4c931f879d70e83e1281f891699364bf. If we clear the cold data flag out of the writeback flow, we can miscount -1 by end_io, which incurs a deadlock caused by all I/Os being blocked during heavy GC. Balancing F2FS Async: - IO (CP: 1, Data: -1, Flush: ( 0 0 1), Discard: ( ... GC thread: IRQ - move_data_page() - set_page_dirty() - clear_cold_data() - f2fs_write_end_io() - type = WB_DATA_TYPE(page); here, we get wrong type - dec_page_count(sbi, type); - f2fs_wait_on_page_writeback() Cc: Reported-and-Tested-by: Park Ju Hyung Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/data.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e10bd73f0723..85142e5df88b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2190,10 +2190,6 @@ static int f2fs_set_data_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); - /* don't remain PG_checked flag which was set during GC */ - if (is_cold_data(page)) - clear_cold_data(page); - if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { register_inmem_page(inode, page); From 2cc5dcf7a9e074c5bdfdefea1801aa0a657750d6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Oct 2018 09:12:51 +0800 Subject: [PATCH 0285/2608] f2fs: fix to account IO correctly commit 4c58ed076875f36dae0f240da1e25e99e5d4afb8 upstream. Below race can cause reversed reference on dirty count, fix it by relocating __submit_bio() and inc_page_count(). Thread A Thread B - f2fs_inplace_write_data - f2fs_submit_page_bio - __submit_bio - f2fs_write_end_io - dec_page_count - inc_page_count Cc: Fixes: d1b3e72d5490 ("f2fs: submit bio of in-place-update pages") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 85142e5df88b..6fbb6d75318a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -381,10 +381,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) } bio_set_op_attrs(bio, fio->op, fio->op_flags); - __submit_bio(fio->sbi, bio, fio->type); - if (!is_read_io(fio->op)) inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); + + __submit_bio(fio->sbi, bio, fio->type); return 0; } From b4438856ea1e6337c1c11fee6b612ffafee055cb Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 9 Feb 2018 14:28:01 +0530 Subject: [PATCH 0286/2608] ARM: dts: exynos: Remove "cooling-{min|max}-level" for CPU nodes commit cd6f55457eb449a388e793abd676e3a5b73510bc upstream. The "cooling-min-level" and "cooling-max-level" properties are not parsed by any part of the kernel currently and the max cooling state of a CPU cooling device is found by referring to the cpufreq table instead. Remove the unused properties from the CPU nodes. Signed-off-by: Viresh Kumar Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos4210.dtsi | 2 -- arch/arm/boot/dts/exynos4412.dtsi | 2 -- arch/arm/boot/dts/exynos5250.dtsi | 2 -- arch/arm/boot/dts/exynos5420-cpus.dtsi | 16 ---------------- arch/arm/boot/dts/exynos5422-cpus.dtsi | 16 ---------------- 5 files changed, 38 deletions(-) diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi index 768fb075b1fd..4e8325b173ea 100644 --- a/arch/arm/boot/dts/exynos4210.dtsi +++ b/arch/arm/boot/dts/exynos4210.dtsi @@ -52,8 +52,6 @@ 400000 975000 200000 950000 >; - cooling-min-level = <4>; - cooling-max-level = <2>; #cooling-cells = <2>; /* min followed by max */ }; diff --git a/arch/arm/boot/dts/exynos4412.dtsi b/arch/arm/boot/dts/exynos4412.dtsi index 7ff03a7e8fb9..1a35e6336e53 100644 --- a/arch/arm/boot/dts/exynos4412.dtsi +++ b/arch/arm/boot/dts/exynos4412.dtsi @@ -45,8 +45,6 @@ clocks = <&clock CLK_ARM_CLK>; clock-names = "cpu"; operating-points-v2 = <&cpu0_opp_table>; - cooling-min-level = <13>; - cooling-max-level = <7>; #cooling-cells = <2>; /* min followed by max */ }; diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi index 35b1949a3e3c..d7fc0e016447 100644 --- a/arch/arm/boot/dts/exynos5250.dtsi +++ b/arch/arm/boot/dts/exynos5250.dtsi @@ -80,8 +80,6 @@ 300000 937500 200000 925000 >; - cooling-min-level = <15>; - cooling-max-level = <9>; #cooling-cells = <2>; /* min followed by max */ }; cpu@1 { diff --git a/arch/arm/boot/dts/exynos5420-cpus.dtsi b/arch/arm/boot/dts/exynos5420-cpus.dtsi index 5c052d7ff554..7e6b55561b1d 100644 --- a/arch/arm/boot/dts/exynos5420-cpus.dtsi +++ b/arch/arm/boot/dts/exynos5420-cpus.dtsi @@ -33,8 +33,6 @@ clock-frequency = <1800000000>; cci-control-port = <&cci_control1>; operating-points-v2 = <&cluster_a15_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <11>; #cooling-cells = <2>; /* min followed by max */ }; @@ -45,8 +43,6 @@ clock-frequency = <1800000000>; cci-control-port = <&cci_control1>; operating-points-v2 = <&cluster_a15_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <11>; #cooling-cells = <2>; /* min followed by max */ }; @@ -57,8 +53,6 @@ clock-frequency = <1800000000>; cci-control-port = <&cci_control1>; operating-points-v2 = <&cluster_a15_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <11>; #cooling-cells = <2>; /* min followed by max */ }; @@ -69,8 +63,6 @@ clock-frequency = <1800000000>; cci-control-port = <&cci_control1>; operating-points-v2 = <&cluster_a15_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <11>; #cooling-cells = <2>; /* min followed by max */ }; @@ -82,8 +74,6 @@ clock-frequency = <1000000000>; cci-control-port = <&cci_control0>; operating-points-v2 = <&cluster_a7_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <7>; #cooling-cells = <2>; /* min followed by max */ }; @@ -94,8 +84,6 @@ clock-frequency = <1000000000>; cci-control-port = <&cci_control0>; operating-points-v2 = <&cluster_a7_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <7>; #cooling-cells = <2>; /* min followed by max */ }; @@ -106,8 +94,6 @@ clock-frequency = <1000000000>; cci-control-port = <&cci_control0>; operating-points-v2 = <&cluster_a7_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <7>; #cooling-cells = <2>; /* min followed by max */ }; @@ -118,8 +104,6 @@ clock-frequency = <1000000000>; cci-control-port = <&cci_control0>; operating-points-v2 = <&cluster_a7_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <7>; #cooling-cells = <2>; /* min followed by max */ }; }; diff --git a/arch/arm/boot/dts/exynos5422-cpus.dtsi b/arch/arm/boot/dts/exynos5422-cpus.dtsi index bf3c6f1ec4ee..c8afdf821a77 100644 --- a/arch/arm/boot/dts/exynos5422-cpus.dtsi +++ b/arch/arm/boot/dts/exynos5422-cpus.dtsi @@ -32,8 +32,6 @@ clock-frequency = <1000000000>; cci-control-port = <&cci_control0>; operating-points-v2 = <&cluster_a7_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <11>; #cooling-cells = <2>; /* min followed by max */ }; @@ -44,8 +42,6 @@ clock-frequency = <1000000000>; cci-control-port = <&cci_control0>; operating-points-v2 = <&cluster_a7_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <11>; #cooling-cells = <2>; /* min followed by max */ }; @@ -56,8 +52,6 @@ clock-frequency = <1000000000>; cci-control-port = <&cci_control0>; operating-points-v2 = <&cluster_a7_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <11>; #cooling-cells = <2>; /* min followed by max */ }; @@ -68,8 +62,6 @@ clock-frequency = <1000000000>; cci-control-port = <&cci_control0>; operating-points-v2 = <&cluster_a7_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <11>; #cooling-cells = <2>; /* min followed by max */ }; @@ -81,8 +73,6 @@ clock-frequency = <1800000000>; cci-control-port = <&cci_control1>; operating-points-v2 = <&cluster_a15_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <15>; #cooling-cells = <2>; /* min followed by max */ }; @@ -93,8 +83,6 @@ clock-frequency = <1800000000>; cci-control-port = <&cci_control1>; operating-points-v2 = <&cluster_a15_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <15>; #cooling-cells = <2>; /* min followed by max */ }; @@ -105,8 +93,6 @@ clock-frequency = <1800000000>; cci-control-port = <&cci_control1>; operating-points-v2 = <&cluster_a15_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <15>; #cooling-cells = <2>; /* min followed by max */ }; @@ -117,8 +103,6 @@ clock-frequency = <1800000000>; cci-control-port = <&cci_control1>; operating-points-v2 = <&cluster_a15_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <15>; #cooling-cells = <2>; /* min followed by max */ }; }; From 4b93b6d7e3f2b6532525c12af06a014a6b71f19f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 25 May 2018 16:01:53 +0530 Subject: [PATCH 0287/2608] arm: dts: exynos: Add missing cooling device properties for CPUs commit 672f33198bee21ee91e6af2cb8f67cfc8bc97ec1 upstream. The cooling device properties, like "#cooling-cells" and "dynamic-power-coefficient", should either be present for all the CPUs of a cluster or none. If these are present only for a subset of CPUs of a cluster then things will start falling apart as soon as the CPUs are brought online in a different order. For example, this will happen because the operating system looks for such properties in the CPU node it is trying to bring up, so that it can register a cooling device. Add such missing properties. Fix other missing properties (clocks, OPP, clock latency) as well to make it all work. Signed-off-by: Viresh Kumar Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos3250.dtsi | 16 ++++++++++++++++ arch/arm/boot/dts/exynos4210.dtsi | 13 +++++++++++++ arch/arm/boot/dts/exynos5250.dtsi | 23 +++++++++++++++++++++++ 3 files changed, 52 insertions(+) diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi index 590ee442d0ae..3ed3d1a0fd40 100644 --- a/arch/arm/boot/dts/exynos3250.dtsi +++ b/arch/arm/boot/dts/exynos3250.dtsi @@ -82,6 +82,22 @@ compatible = "arm,cortex-a7"; reg = <1>; clock-frequency = <1000000000>; + clocks = <&cmu CLK_ARM_CLK>; + clock-names = "cpu"; + #cooling-cells = <2>; + + operating-points = < + 1000000 1150000 + 900000 1112500 + 800000 1075000 + 700000 1037500 + 600000 1000000 + 500000 962500 + 400000 925000 + 300000 887500 + 200000 850000 + 100000 850000 + >; }; }; diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi index 4e8325b173ea..27e17471ab7a 100644 --- a/arch/arm/boot/dts/exynos4210.dtsi +++ b/arch/arm/boot/dts/exynos4210.dtsi @@ -59,6 +59,19 @@ device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <0x901>; + clocks = <&clock CLK_ARM_CLK>; + clock-names = "cpu"; + clock-latency = <160000>; + + operating-points = < + 1200000 1250000 + 1000000 1150000 + 800000 1075000 + 500000 975000 + 400000 975000 + 200000 950000 + >; + #cooling-cells = <2>; /* min followed by max */ }; }; diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi index d7fc0e016447..b1ed7c57c51e 100644 --- a/arch/arm/boot/dts/exynos5250.dtsi +++ b/arch/arm/boot/dts/exynos5250.dtsi @@ -87,6 +87,29 @@ compatible = "arm,cortex-a15"; reg = <1>; clock-frequency = <1700000000>; + clocks = <&clock CLK_ARM_CLK>; + clock-names = "cpu"; + clock-latency = <140000>; + + operating-points = < + 1700000 1300000 + 1600000 1250000 + 1500000 1225000 + 1400000 1200000 + 1300000 1150000 + 1200000 1125000 + 1100000 1100000 + 1000000 1075000 + 900000 1050000 + 800000 1025000 + 700000 1012500 + 600000 1000000 + 500000 975000 + 400000 950000 + 300000 937500 + 200000 925000 + >; + #cooling-cells = <2>; /* min followed by max */ }; }; From 8f9121b4aa0ff10407b5ec91afb4b33317e9f461 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 7 Aug 2018 12:48:48 +0200 Subject: [PATCH 0288/2608] ARM: dts: exynos: Convert exynos5250.dtsi to opp-v2 bindings commit eb9e16d8573e243f8175647f851eb5085dbe97a4 upstream. Convert Exynos5250 to OPP-v2 bindings. This is a preparation to add proper support for suspend operation point, which cannot be marked in opp-v1. Cc: # 4.3.x: cd6f55457eb4: ARM: dts: exynos: Remove "cooling-{min|max}-level" for CPU nodes Cc: # 4.3.x: 672f33198bee: arm: dts: exynos: Add missing cooling device properties for CPUs Cc: # 4.3.x Signed-off-by: Marek Szyprowski Reviewed-by: Chanwoo Choi Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos5250.dtsi | 130 ++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 42 deletions(-) diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi index b1ed7c57c51e..033e4331e7ca 100644 --- a/arch/arm/boot/dts/exynos5250.dtsi +++ b/arch/arm/boot/dts/exynos5250.dtsi @@ -57,62 +57,108 @@ device_type = "cpu"; compatible = "arm,cortex-a15"; reg = <0>; - clock-frequency = <1700000000>; clocks = <&clock CLK_ARM_CLK>; clock-names = "cpu"; - clock-latency = <140000>; - - operating-points = < - 1700000 1300000 - 1600000 1250000 - 1500000 1225000 - 1400000 1200000 - 1300000 1150000 - 1200000 1125000 - 1100000 1100000 - 1000000 1075000 - 900000 1050000 - 800000 1025000 - 700000 1012500 - 600000 1000000 - 500000 975000 - 400000 950000 - 300000 937500 - 200000 925000 - >; + operating-points-v2 = <&cpu0_opp_table>; #cooling-cells = <2>; /* min followed by max */ }; cpu@1 { device_type = "cpu"; compatible = "arm,cortex-a15"; reg = <1>; - clock-frequency = <1700000000>; clocks = <&clock CLK_ARM_CLK>; clock-names = "cpu"; - clock-latency = <140000>; - - operating-points = < - 1700000 1300000 - 1600000 1250000 - 1500000 1225000 - 1400000 1200000 - 1300000 1150000 - 1200000 1125000 - 1100000 1100000 - 1000000 1075000 - 900000 1050000 - 800000 1025000 - 700000 1012500 - 600000 1000000 - 500000 975000 - 400000 950000 - 300000 937500 - 200000 925000 - >; + operating-points-v2 = <&cpu0_opp_table>; #cooling-cells = <2>; /* min followed by max */ }; }; + cpu0_opp_table: opp_table0 { + compatible = "operating-points-v2"; + opp-shared; + + opp-200000000 { + opp-hz = /bits/ 64 <200000000>; + opp-microvolt = <925000>; + clock-latency-ns = <140000>; + }; + opp-300000000 { + opp-hz = /bits/ 64 <300000000>; + opp-microvolt = <937500>; + clock-latency-ns = <140000>; + }; + opp-400000000 { + opp-hz = /bits/ 64 <400000000>; + opp-microvolt = <950000>; + clock-latency-ns = <140000>; + }; + opp-500000000 { + opp-hz = /bits/ 64 <500000000>; + opp-microvolt = <975000>; + clock-latency-ns = <140000>; + }; + opp-600000000 { + opp-hz = /bits/ 64 <600000000>; + opp-microvolt = <1000000>; + clock-latency-ns = <140000>; + }; + opp-700000000 { + opp-hz = /bits/ 64 <700000000>; + opp-microvolt = <1012500>; + clock-latency-ns = <140000>; + }; + opp-800000000 { + opp-hz = /bits/ 64 <800000000>; + opp-microvolt = <1025000>; + clock-latency-ns = <140000>; + }; + opp-900000000 { + opp-hz = /bits/ 64 <900000000>; + opp-microvolt = <1050000>; + clock-latency-ns = <140000>; + }; + opp-1000000000 { + opp-hz = /bits/ 64 <1000000000>; + opp-microvolt = <1075000>; + clock-latency-ns = <140000>; + }; + opp-1100000000 { + opp-hz = /bits/ 64 <1100000000>; + opp-microvolt = <1100000>; + clock-latency-ns = <140000>; + }; + opp-1200000000 { + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt = <1125000>; + clock-latency-ns = <140000>; + }; + opp-1300000000 { + opp-hz = /bits/ 64 <1300000000>; + opp-microvolt = <1150000>; + clock-latency-ns = <140000>; + }; + opp-1400000000 { + opp-hz = /bits/ 64 <1400000000>; + opp-microvolt = <1200000>; + clock-latency-ns = <140000>; + }; + opp-1500000000 { + opp-hz = /bits/ 64 <1500000000>; + opp-microvolt = <1225000>; + clock-latency-ns = <140000>; + }; + opp-1600000000 { + opp-hz = /bits/ 64 <1600000000>; + opp-microvolt = <1250000>; + clock-latency-ns = <140000>; + }; + opp-1700000000 { + opp-hz = /bits/ 64 <1700000000>; + opp-microvolt = <1300000>; + clock-latency-ns = <140000>; + }; + }; + soc: soc { sysram@02020000 { compatible = "mmio-sram"; From 6c0bcd457b943c36c177cf1e901848ec38f5a8c5 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 7 Aug 2018 12:48:49 +0200 Subject: [PATCH 0289/2608] ARM: dts: exynos: Mark 1 GHz CPU OPP as suspend OPP on Exynos5250 commit 645b23da6f8b47f295fa87051335d41d139717a5 upstream. 1 GHz CPU OPP is the default boot value for the Exynos5250 SOC, so mark it as suspend OPP. This fixes suspend/resume on Samsung Exynos5250 Snow Chomebook, which was broken since switching to generic cpufreq-dt driver in v4.3. Cc: # 4.3.x: cd6f55457eb4: ARM: dts: exynos: Remove "cooling-{min|max}-level" for CPU nodes Cc: # 4.3.x: 672f33198bee: arm: dts: exynos: Add missing cooling device properties for CPUs Cc: # 4.3.x Signed-off-by: Marek Szyprowski Reviewed-by: Chanwoo Choi Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos5250.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi index 033e4331e7ca..9f73a8bf6e1c 100644 --- a/arch/arm/boot/dts/exynos5250.dtsi +++ b/arch/arm/boot/dts/exynos5250.dtsi @@ -121,6 +121,7 @@ opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <1075000>; clock-latency-ns = <140000>; + opp-suspend; }; opp-1100000000 { opp-hz = /bits/ 64 <1100000000>; From 9b86c5a510fdc72a083a918a0d4d00e1cce5287a Mon Sep 17 00:00:00 2001 From: Joe Jin Date: Tue, 16 Oct 2018 15:21:16 -0700 Subject: [PATCH 0290/2608] xen-swiotlb: use actually allocated size on check physical continuous commit 7250f422da0480d8512b756640f131b9b893ccda upstream. xen_swiotlb_{alloc,free}_coherent() allocate/free memory based on the order of the pages and not size argument (bytes). This is inconsistent with range_straddles_page_boundary and memset which use the 'size' value, which may lead to not exchanging memory with Xen (range_straddles_page_boundary() returned true). And then the call to xen_swiotlb_free_coherent() would actually try to exchange the memory with Xen, leading to the kernel hitting an BUG (as the hypercall returned an error). This patch fixes it by making the 'size' variable be of the same size as the amount of memory allocated. CC: stable@vger.kernel.org Signed-off-by: Joe Jin Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: Christoph Helwig Cc: Dongli Zhang Cc: John Sobecki Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- drivers/xen/swiotlb-xen.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index f98b8c135db9..95dbee89b758 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -317,6 +317,9 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, */ flags &= ~(__GFP_DMA | __GFP_HIGHMEM); + /* Convert the size to actually allocated. */ + size = 1UL << (order + XEN_PAGE_SHIFT); + /* On ARM this function returns an ioremap'ped virtual address for * which virt_to_phys doesn't return the corresponding physical * address. In fact on ARM virt_to_phys only works for kernel direct @@ -365,6 +368,9 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, * physical address */ phys = xen_bus_to_phys(dev_addr); + /* Convert the size to actually allocated. */ + size = 1UL << (order + XEN_PAGE_SHIFT); + if (((dev_addr + size - 1 <= dma_mask)) || range_straddles_page_boundary(phys, size)) xen_destroy_contiguous_region(phys, order); From d079cf7527518c96ecebda09c2bbf6b6438279dc Mon Sep 17 00:00:00 2001 From: "Dr. Greg Wettstein" Date: Mon, 17 Sep 2018 18:53:33 -0400 Subject: [PATCH 0291/2608] tpm: Restore functionality to xen vtpm driver. commit e487a0f52301293152a6f8c4e217f2a11dd808e3 upstream. Functionality of the xen-tpmfront driver was lost secondary to the introduction of xenbus multi-page support in commit ccc9d90a9a8b ("xenbus_client: Extend interface to support multi-page ring"). In this commit pointer to location of where the shared page address is stored was being passed to the xenbus_grant_ring() function rather then the address of the shared page itself. This resulted in a situation where the driver would attach to the vtpm-stubdom but any attempt to send a command to the stub domain would timeout. A diagnostic finding for this regression is the following error message being generated when the xen-tpmfront driver probes for a device: <3>vtpm vtpm-0: tpm_transmit: tpm_send: error -62 <3>vtpm vtpm-0: A TPM error (-62) occurred attempting to determine the timeouts This fix is relevant to all kernels from 4.1 forward which is the release in which multi-page xenbus support was introduced. Daniel De Graaf formulated the fix by code inspection after the regression point was located. Fixes: ccc9d90a9a8b ("xenbus_client: Extend interface to support multi-page ring") Signed-off-by: Dr. Greg Wettstein Signed-off-by: Greg Kroah-Hartman [boris: Updated commit message, added Fixes tag] Signed-off-by: Boris Ostrovsky Cc: stable@vger.kernel.org # v4.1+ Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/xen-tpmfront.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index 656e8af95d52..2cffaf567d99 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -203,7 +203,7 @@ static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv) return -ENOMEM; } - rv = xenbus_grant_ring(dev, &priv->shr, 1, &gref); + rv = xenbus_grant_ring(dev, priv->shr, 1, &gref); if (rv < 0) return rv; From 57cd3a09665ad99de90b9aa8fd31cec26039163e Mon Sep 17 00:00:00 2001 From: Vasilis Liaskovitis Date: Mon, 15 Oct 2018 15:25:08 +0200 Subject: [PATCH 0292/2608] xen/blkfront: avoid NULL blkfront_info dereference on device removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f92898e7f32e3533bfd95be174044bc349d416ca upstream. If a block device is hot-added when we are out of grants, gnttab_grant_foreign_access fails with -ENOSPC (log message "28 granting access to ring page") in this code path: talk_to_blkback -> setup_blkring -> xenbus_grant_ring -> gnttab_grant_foreign_access and the failing path in talk_to_blkback sets the driver_data to NULL: destroy_blkring: blkif_free(info, 0); mutex_lock(&blkfront_mutex); free_info(info); mutex_unlock(&blkfront_mutex); dev_set_drvdata(&dev->dev, NULL); This results in a NULL pointer BUG when blkfront_remove and blkif_free try to access the failing device's NULL struct blkfront_info. Cc: stable@vger.kernel.org # 4.5 and later Signed-off-by: Vasilis Liaskovitis Reviewed-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/xen-blkfront.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 7d23225f79ed..4b2dcb65b011 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2471,6 +2471,9 @@ static int blkfront_remove(struct xenbus_device *xbdev) dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); + if (!info) + return 0; + blkif_free(info, 0); mutex_lock(&info->mutex); From bdc96cef37441af23ce3c2661de5f1984be7ee0c Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Sun, 7 Oct 2018 16:05:38 -0400 Subject: [PATCH 0293/2608] xen/balloon: Support xend-based toolstack commit 3aa6c19d2f38be9c6e9a8ad5fa8e3c9d29ee3c35 upstream. Xend-based toolstacks don't have static-max entry in xenstore. The equivalent node for those toolstacks is memory_static_max. Fixes: 5266b8e4445c (xen: fix booting ballooned down hvm guest) Signed-off-by: Boris Ostrovsky Cc: # 4.13 Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xen-balloon.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index 294f35ce9e46..cf8ef8cee5a0 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -75,12 +75,15 @@ static void watch_target(struct xenbus_watch *watch, if (!watch_fired) { watch_fired = true; - err = xenbus_scanf(XBT_NIL, "memory", "static-max", "%llu", - &static_max); - if (err != 1) - static_max = new_target; - else + + if ((xenbus_scanf(XBT_NIL, "memory", "static-max", + "%llu", &static_max) == 1) || + (xenbus_scanf(XBT_NIL, "memory", "memory_static_max", + "%llu", &static_max) == 1)) static_max >>= PAGE_SHIFT - 10; + else + static_max = new_target; + target_diff = (xen_pv_domain() || xen_initial_domain()) ? 0 : static_max - balloon_stats.target_pages; } From d3e63ec6468a60c25570451ea587b41025e737e0 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 1 Oct 2018 07:57:42 +0200 Subject: [PATCH 0294/2608] xen: fix race in xen_qlock_wait() commit 2ac2a7d4d9ff4e01e36f9c3d116582f6f655ab47 upstream. In the following situation a vcpu waiting for a lock might not be woken up from xen_poll_irq(): CPU 1: CPU 2: CPU 3: takes a spinlock tries to get lock -> xen_qlock_wait() frees the lock -> xen_qlock_kick(cpu2) -> xen_clear_irq_pending() takes lock again tries to get lock -> *lock = _Q_SLOW_VAL -> *lock == _Q_SLOW_VAL ? -> xen_poll_irq() frees the lock -> xen_qlock_kick(cpu3) And cpu 2 will sleep forever. This can be avoided easily by modifying xen_qlock_wait() to call xen_poll_irq() only if the related irq was not pending and to call xen_clear_irq_pending() only if it was pending. Cc: stable@vger.kernel.org Cc: Waiman.Long@hp.com Cc: peterz@infradead.org Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/spinlock.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 08324c64005d..e730d8e759ca 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -46,17 +46,12 @@ static void xen_qlock_wait(u8 *byte, u8 val) if (irq == -1) return; - /* clear pending */ - xen_clear_irq_pending(irq); - barrier(); + /* If irq pending already clear it and return. */ + if (xen_test_irq_pending(irq)) { + xen_clear_irq_pending(irq); + return; + } - /* - * We check the byte value after clearing pending IRQ to make sure - * that we won't miss a wakeup event because of the clearing. - * - * The sync_clear_bit() call in xen_clear_irq_pending() is atomic. - * So it is effectively a memory barrier for x86. - */ if (READ_ONCE(*byte) != val) return; From cbc3fb7b48dc47d4c14cda0f40721af5b2054c0d Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 1 Oct 2018 07:57:42 +0200 Subject: [PATCH 0295/2608] xen: make xen_qlock_wait() nestable commit a856531951dc8094359dfdac21d59cee5969c18e upstream. xen_qlock_wait() isn't safe for nested calls due to interrupts. A call of xen_qlock_kick() might be ignored in case a deeper nesting level was active right before the call of xen_poll_irq(): CPU 1: CPU 2: spin_lock(lock1) spin_lock(lock1) -> xen_qlock_wait() -> xen_clear_irq_pending() Interrupt happens spin_unlock(lock1) -> xen_qlock_kick(CPU 2) spin_lock_irqsave(lock2) spin_lock_irqsave(lock2) -> xen_qlock_wait() -> xen_clear_irq_pending() clears kick for lock1 -> xen_poll_irq() spin_unlock_irq_restore(lock2) -> xen_qlock_kick(CPU 2) wakes up spin_unlock_irq_restore(lock2) IRET resumes in xen_qlock_wait() -> xen_poll_irq() never wakes up The solution is to disable interrupts in xen_qlock_wait() and not to poll for the irq in case xen_qlock_wait() is called in nmi context. Cc: stable@vger.kernel.org Cc: Waiman.Long@hp.com Cc: peterz@infradead.org Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/spinlock.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index e730d8e759ca..3243fe1b6ea4 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -40,29 +40,25 @@ static void xen_qlock_kick(int cpu) */ static void xen_qlock_wait(u8 *byte, u8 val) { + unsigned long flags; int irq = __this_cpu_read(lock_kicker_irq); /* If kicker interrupts not initialized yet, just spin */ - if (irq == -1) + if (irq == -1 || in_nmi()) return; - /* If irq pending already clear it and return. */ + /* Guard against reentry. */ + local_irq_save(flags); + + /* If irq pending already clear it. */ if (xen_test_irq_pending(irq)) { xen_clear_irq_pending(irq); - return; + } else if (READ_ONCE(*byte) == val) { + /* Block until irq becomes pending (or a spurious wakeup) */ + xen_poll_irq(irq); } - if (READ_ONCE(*byte) != val) - return; - - /* - * If an interrupt happens here, it will leave the wakeup irq - * pending, which will cause xen_poll_irq() to return - * immediately. - */ - - /* Block until irq becomes pending (or perhaps a spurious wakeup) */ - xen_poll_irq(irq); + local_irq_restore(flags); } static irqreturn_t dummy_handler(int irq, void *dev_id) From ba94ecfc74e4e74acb46afe9a7bea10d869657b9 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 9 Oct 2018 12:32:37 +0200 Subject: [PATCH 0296/2608] xen/pvh: increase early stack size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7deecbda3026f5e2a8cc095d7ef7261a920efcf2 upstream. While booting on an AMD EPYC box the stack canary would detect stack overflows when using the current PVH early stack size (256). Switch to using the value defined by BOOT_STACK_SIZE, which prevents the stack overflow. Cc: # 4.11 Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/xen-pvh.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S index 5d7554c025fd..7ecbd3dde2ea 100644 --- a/arch/x86/xen/xen-pvh.S +++ b/arch/x86/xen/xen-pvh.S @@ -178,7 +178,7 @@ canary: .fill 48, 1, 0 early_stack: - .fill 256, 1, 0 + .fill BOOT_STACK_SIZE, 1, 0 early_stack_end: ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, From 9ba9232f654da32311cb5f71a85fc4e792e84d91 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 25 Oct 2018 09:54:15 +0200 Subject: [PATCH 0297/2608] xen/pvh: don't try to unplug emulated devices commit e6111161c0a02d58919d776eec94b313bb57911f upstream. A Xen PVH guest has no associated qemu device model, so trying to unplug any emulated devices is making no sense at all. Bail out early from xen_unplug_emulated_devices() when running as PVH guest. This will avoid issuing the boot message: [ 0.000000] Xen Platform PCI: unrecognised magic value Cc: # 4.11 Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/platform-pci-unplug.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 33a783c77d96..184b36922397 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -146,6 +146,10 @@ void xen_unplug_emulated_devices(void) { int r; + /* PVH guests don't have emulated devices. */ + if (xen_pvh_domain()) + return; + /* user explicitly requested no unplug */ if (xen_emul_unplug & XEN_UNPLUG_NEVER) return; From acc14d41da801242ef7ad21c55a7f2569a1fd2f1 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Sat, 6 Oct 2018 22:12:32 +0200 Subject: [PATCH 0298/2608] libertas: don't set URB_ZERO_PACKET on IN USB transfer commit 6528d88047801b80d2a5370ad46fb6eff2f509e0 upstream. The USB core gets rightfully upset: usb 1-1: BOGUS urb flags, 240 --> 200 WARNING: CPU: 0 PID: 60 at drivers/usb/core/urb.c:503 usb_submit_urb+0x2f8/0x3ed Modules linked in: CPU: 0 PID: 60 Comm: kworker/0:3 Not tainted 4.19.0-rc6-00319-g5206d00a45c7 #39 Hardware name: OLPC XO/XO, BIOS OLPC Ver 1.00.01 06/11/2014 Workqueue: events request_firmware_work_func EIP: usb_submit_urb+0x2f8/0x3ed Code: 75 06 8b 8f 80 00 00 00 8d 47 78 89 4d e4 89 55 e8 e8 35 1c f6 ff 8b 55 e8 56 52 8b 4d e4 51 50 68 e3 ce c7 c0 e8 ed 18 c6 ff <0f> 0b 83 c4 14 80 7d ef 01 74 0a 80 7d ef 03 0f 85 b8 00 00 00 8b EAX: 00000025 EBX: ce7d4980 ECX: 00000000 EDX: 00000001 ESI: 00000200 EDI: ce7d8800 EBP: ce7f5ea8 ESP: ce7f5e70 DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068 EFLAGS: 00210292 CR0: 80050033 CR2: 00000000 CR3: 00e80000 CR4: 00000090 Call Trace: ? if_usb_fw_timeo+0x64/0x64 __if_usb_submit_rx_urb+0x85/0xe6 ? if_usb_fw_timeo+0x64/0x64 if_usb_submit_rx_urb_fwload+0xd/0xf if_usb_prog_firmware+0xc0/0x3db ? _request_firmware+0x54/0x47b ? _request_firmware+0x89/0x47b ? if_usb_probe+0x412/0x412 lbs_fw_loaded+0x55/0xa6 ? debug_smp_processor_id+0x12/0x14 helper_firmware_cb+0x3c/0x3f request_firmware_work_func+0x37/0x6f process_one_work+0x164/0x25a worker_thread+0x1c4/0x284 kthread+0xec/0xf1 ? cancel_delayed_work_sync+0xf/0xf ? kthread_create_on_node+0x1a/0x1a ret_from_fork+0x2e/0x38 ---[ end trace 3ef1e3b2dd53852f ]--- Cc: stable@vger.kernel.org Signed-off-by: Lubomir Rintel Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/libertas/if_usb.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c index 16e54c757dd0..e4ae2b5a71c2 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.c +++ b/drivers/net/wireless/marvell/libertas/if_usb.c @@ -456,8 +456,6 @@ static int __if_usb_submit_rx_urb(struct if_usb_card *cardp, MRVDRV_ETH_RX_PACKET_BUFFER_SIZE, callbackfn, cardp); - cardp->rx_urb->transfer_flags |= URB_ZERO_PACKET; - lbs_deb_usb2(&cardp->udev->dev, "Pointer for rx_urb %p\n", cardp->rx_urb); if ((ret = usb_submit_urb(cardp->rx_urb, GFP_ATOMIC))) { lbs_deb_usbd(&cardp->udev->dev, "Submit Rx URB failed: %d\n", ret); From 255624a35b4ca228bac9a1f135263e28c27c4640 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Thu, 18 Oct 2018 10:19:29 -0600 Subject: [PATCH 0299/2608] usbip:vudc: BUG kmalloc-2048 (Not tainted): Poison overwritten commit e28fd56ad5273be67d0fae5bedc7e1680e729952 upstream. In rmmod path, usbip_vudc does platform_device_put() twice once from platform_device_unregister() and then from put_vudc_device(). The second put results in: BUG kmalloc-2048 (Not tainted): Poison overwritten error or BUG: KASAN: use-after-free in kobject_put+0x1e/0x230 if KASAN is enabled. [ 169.042156] calling init+0x0/0x1000 [usbip_vudc] @ 1697 [ 169.042396] ============================================================================= [ 169.043678] probe of usbip-vudc.0 returned 1 after 350 usecs [ 169.044508] BUG kmalloc-2048 (Not tainted): Poison overwritten [ 169.044509] ----------------------------------------------------------------------------- ... [ 169.057849] INFO: Freed in device_release+0x2b/0x80 age=4223 cpu=3 pid=1693 [ 169.057852] kobject_put+0x86/0x1b0 [ 169.057853] 0xffffffffc0c30a96 [ 169.057855] __x64_sys_delete_module+0x157/0x240 Fix it to call platform_device_del() instead and let put_vudc_device() do the platform_device_put(). Reported-by: Randy Dunlap Signed-off-by: Shuah Khan (Samsung OSG) Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/usb/usbip/vudc_main.c b/drivers/usb/usbip/vudc_main.c index 9e655714e389..916e2eefc886 100644 --- a/drivers/usb/usbip/vudc_main.c +++ b/drivers/usb/usbip/vudc_main.c @@ -85,6 +85,10 @@ static int __init init(void) cleanup: list_for_each_entry_safe(udc_dev, udc_dev2, &vudc_devices, dev_entry) { list_del(&udc_dev->dev_entry); + /* + * Just do platform_device_del() here, put_vudc_device() + * calls the platform_device_put() + */ platform_device_del(udc_dev->pdev); put_vudc_device(udc_dev); } @@ -101,7 +105,11 @@ static void __exit cleanup(void) list_for_each_entry_safe(udc_dev, udc_dev2, &vudc_devices, dev_entry) { list_del(&udc_dev->dev_entry); - platform_device_unregister(udc_dev->pdev); + /* + * Just do platform_device_del() here, put_vudc_device() + * calls the platform_device_put() + */ + platform_device_del(udc_dev->pdev); put_vudc_device(udc_dev); } platform_driver_unregister(&vudc_driver); From c459fed018c33ae79dbb892ba2a5874cc7932781 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 2 Oct 2018 20:57:44 +0900 Subject: [PATCH 0300/2608] usb: gadget: udc: renesas_usb3: Fix b-device mode for "workaround" commit afc92514a34c7414b28047b1205a6b709103c699 upstream. If the "workaround_for_vbus" is true, the driver will not call usb_disconnect(). So, since the controller keeps some registers' value, the driver doesn't re-enumarate suitable speed after the b-device mode is disabled. To fix the issue, this patch adds usb_disconnect() calling in renesas_usb3_b_device_write() if workaround_for_vbus is true. Fixes: 43ba968b00ea ("usb: gadget: udc: renesas_usb3: add debugfs to set the b-device mode") Cc: # v4.14+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/renesas_usb3.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 36a706f475d2..ade0723787e5 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -2374,6 +2374,9 @@ static ssize_t renesas_usb3_b_device_write(struct file *file, else usb3->forced_b_device = false; + if (usb3->workaround_for_vbus) + usb3_disconnect(usb3); + /* Let this driver call usb3_connect() anyway */ usb3_check_id(usb3); From 0c38cad1d15004c2d6dc188a6f3f362a98883334 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Sat, 13 Oct 2018 09:46:08 +0300 Subject: [PATCH 0301/2608] iwlwifi: mvm: check return value of rs_rate_from_ucode_rate() commit 3d71c3f1f50cf309bd20659422af549bc784bfff upstream. The rs_rate_from_ucode_rate() function may return -EINVAL if the rate is invalid, but none of the callsites check for the error, potentially making us access arrays with index IWL_RATE_INVALID, which is larger than the arrays, causing an out-of-bounds access. This will trigger KASAN warnings, such as the one reported in the bugzilla issue mentioned below. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=200659 Cc: stable@vger.kernel.org Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 24 ++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 386fdee23eb0..bd48cd0eb395 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -1226,7 +1226,11 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta, !(info->flags & IEEE80211_TX_STAT_AMPDU)) return; - rs_rate_from_ucode_rate(tx_resp_hwrate, info->band, &tx_resp_rate); + if (rs_rate_from_ucode_rate(tx_resp_hwrate, info->band, + &tx_resp_rate)) { + WARN_ON_ONCE(1); + return; + } #ifdef CONFIG_MAC80211_DEBUGFS /* Disable last tx check if we are debugging with fixed rate but @@ -1277,7 +1281,10 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta, */ table = &lq_sta->lq; lq_hwrate = le32_to_cpu(table->rs_table[0]); - rs_rate_from_ucode_rate(lq_hwrate, info->band, &lq_rate); + if (rs_rate_from_ucode_rate(lq_hwrate, info->band, &lq_rate)) { + WARN_ON_ONCE(1); + return; + } /* Here we actually compare this rate to the latest LQ command */ if (lq_color != LQ_FLAG_COLOR_GET(table->flags)) { @@ -1379,8 +1386,12 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta, /* Collect data for each rate used during failed TX attempts */ for (i = 0; i <= retries; ++i) { lq_hwrate = le32_to_cpu(table->rs_table[i]); - rs_rate_from_ucode_rate(lq_hwrate, info->band, - &lq_rate); + if (rs_rate_from_ucode_rate(lq_hwrate, info->band, + &lq_rate)) { + WARN_ON_ONCE(1); + return; + } + /* * Only collect stats if retried rate is in the same RS * table as active/search. @@ -3244,7 +3255,10 @@ static void rs_build_rates_table_from_fixed(struct iwl_mvm *mvm, for (i = 0; i < num_rates; i++) lq_cmd->rs_table[i] = ucode_rate_le32; - rs_rate_from_ucode_rate(ucode_rate, band, &rate); + if (rs_rate_from_ucode_rate(ucode_rate, band, &rate)) { + WARN_ON_ONCE(1); + return; + } if (is_mimo(&rate)) lq_cmd->mimo_delim = num_rates - 1; From 1d982ccf0e67d9248a10a5aab1f3d96825099a19 Mon Sep 17 00:00:00 2001 From: Stefan Nuernberger Date: Mon, 17 Sep 2018 19:46:53 +0200 Subject: [PATCH 0302/2608] net/ipv4: defensive cipso option parsing commit 076ed3da0c9b2f88d9157dbe7044a45641ae369e upstream. commit 40413955ee26 ("Cipso: cipso_v4_optptr enter infinite loop") fixed a possible infinite loop in the IP option parsing of CIPSO. The fix assumes that ip_options_compile filtered out all zero length options and that no other one-byte options beside IPOPT_END and IPOPT_NOOP exist. While this assumption currently holds true, add explicit checks for zero length and invalid length options to be safe for the future. Even though ip_options_compile should have validated the options, the introduction of new one-byte options can still confuse this code without the additional checks. Signed-off-by: Stefan Nuernberger Cc: David Woodhouse Cc: Simon Veith Cc: stable@vger.kernel.org Acked-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/cipso_ipv4.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 82178cc69c96..777fa3b7fb13 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1512,7 +1512,7 @@ static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def, * * Description: * Parse the packet's IP header looking for a CIPSO option. Returns a pointer - * to the start of the CIPSO option on success, NULL if one if not found. + * to the start of the CIPSO option on success, NULL if one is not found. * */ unsigned char *cipso_v4_optptr(const struct sk_buff *skb) @@ -1522,10 +1522,8 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb) int optlen; int taglen; - for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) { + for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 1; ) { switch (optptr[0]) { - case IPOPT_CIPSO: - return optptr; case IPOPT_END: return NULL; case IPOPT_NOOP: @@ -1534,6 +1532,11 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb) default: taglen = optptr[1]; } + if (!taglen || taglen > optlen) + return NULL; + if (optptr[0] == IPOPT_CIPSO) + return optptr; + optlen -= taglen; optptr += taglen; } From 1e4465dae54959cb92d1bb949df67b0b40372eac Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sun, 14 Oct 2018 23:28:50 +0200 Subject: [PATCH 0303/2608] dmaengine: ppc4xx: fix off-by-one build failure commit 27d8d2d7a9b7eb05c4484b74b749eaee7b50b845 upstream. There are two poly_store, but one should have been poly_show. |adma.c:4382:16: error: conflicting types for 'poly_store' | static ssize_t poly_store(struct device_driver *dev, const char *buf, | ^~~~~~~~~~ |adma.c:4363:16: note: previous definition of 'poly_store' was here | static ssize_t poly_store(struct device_driver *dev, char *buf) | ^~~~~~~~~~ CC: stable@vger.kernel.org Fixes: 13efe1a05384 ("dmaengine: ppc4xx: remove DRIVER_ATTR() usage") Signed-off-by: Christian Lamparter Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ppc4xx/adma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c index 4cf0d4d0cecf..25610286979f 100644 --- a/drivers/dma/ppc4xx/adma.c +++ b/drivers/dma/ppc4xx/adma.c @@ -4360,7 +4360,7 @@ static ssize_t enable_store(struct device_driver *dev, const char *buf, } static DRIVER_ATTR_RW(enable); -static ssize_t poly_store(struct device_driver *dev, char *buf) +static ssize_t poly_show(struct device_driver *dev, char *buf) { ssize_t size = 0; u32 reg; From 381fa28746b552875e3eccfbfdd45a7bc97f4d92 Mon Sep 17 00:00:00 2001 From: Pierre Yves MORDRET Date: Tue, 13 Mar 2018 17:42:06 +0100 Subject: [PATCH 0304/2608] dmaengine: stm32-dma: fix incomplete configuration in cyclic mode commit e57cb3b3f10d005410f09d4598cc6d62b833f2b0 upstream. When in cyclic mode, the configuration is updated after having started the DMA hardware (STM32_DMA_SCR_EN) leading to incomplete configuration of SMxAR registers. Signed-off-by: Pierre-Yves MORDRET Signed-off-by: Hugues Fruchet Signed-off-by: Vinod Koul Cc: "Joel Fernandes (Google)" Signed-off-by: Greg Kroah-Hartman --- drivers/dma/stm32-dma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index 786fc8fcc38e..32192e98159b 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -429,6 +429,8 @@ static void stm32_dma_dump_reg(struct stm32_dma_chan *chan) dev_dbg(chan2dev(chan), "SFCR: 0x%08x\n", sfcr); } +static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan); + static void stm32_dma_start_transfer(struct stm32_dma_chan *chan) { struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); @@ -471,6 +473,9 @@ static void stm32_dma_start_transfer(struct stm32_dma_chan *chan) if (status) stm32_dma_irq_clear(chan, status); + if (chan->desc->cyclic) + stm32_dma_configure_next_sg(chan); + stm32_dma_dump_reg(chan); /* Start DMA */ @@ -564,8 +569,7 @@ static void stm32_dma_issue_pending(struct dma_chan *c) if (vchan_issue_pending(&chan->vchan) && !chan->desc && !chan->busy) { dev_dbg(chan2dev(chan), "vchan %p: issued\n", &chan->vchan); stm32_dma_start_transfer(chan); - if (chan->desc->cyclic) - stm32_dma_configure_next_sg(chan); + } spin_unlock_irqrestore(&chan->vchan.lock, flags); } From 1490de2bb0836fc0631c04d0559fdf81545b672f Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 25 Sep 2018 13:53:02 -0700 Subject: [PATCH 0305/2608] libnvdimm: Hold reference on parent while scheduling async init commit b6eae0f61db27748606cc00dafcfd1e2c032f0a5 upstream. Unlike asynchronous initialization in the core we have not yet associated the device with the parent, and as such the device doesn't hold a reference to the parent. In order to resolve that we should be holding a reference on the parent until the asynchronous initialization has completed. Cc: Fixes: 4d88a97aa9e8 ("libnvdimm: ...base ... infrastructure") Signed-off-by: Alexander Duyck Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/bus.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index fb5ab5812a22..a6746a1f20ae 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -484,6 +484,8 @@ static void nd_async_device_register(void *d, async_cookie_t cookie) put_device(dev); } put_device(dev); + if (dev->parent) + put_device(dev->parent); } static void nd_async_device_unregister(void *d, async_cookie_t cookie) @@ -503,6 +505,8 @@ void __nd_device_register(struct device *dev) if (!dev) return; dev->bus = &nvdimm_bus_type; + if (dev->parent) + get_device(dev->parent); get_device(dev); async_schedule_domain(nd_async_device_register, dev, &nd_async_domain); From 75c7ba4853719d8efec51f45d3f26ff2b4991a2e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 27 Sep 2018 15:01:55 -0700 Subject: [PATCH 0306/2608] libnvdimm, region: Fail badblocks listing for inactive regions commit 5d394eee2c102453278d81d9a7cf94c80253486a upstream. While experimenting with region driver loading the following backtrace was triggered: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. [..] Call Trace: dump_stack+0x85/0xcb register_lock_class+0x571/0x580 ? __lock_acquire+0x2ba/0x1310 ? kernfs_seq_start+0x2a/0x80 __lock_acquire+0xd4/0x1310 ? dev_attr_show+0x1c/0x50 ? __lock_acquire+0x2ba/0x1310 ? kernfs_seq_start+0x2a/0x80 ? lock_acquire+0x9e/0x1a0 lock_acquire+0x9e/0x1a0 ? dev_attr_show+0x1c/0x50 badblocks_show+0x70/0x190 ? dev_attr_show+0x1c/0x50 dev_attr_show+0x1c/0x50 This results from a missing successful call to devm_init_badblocks() from nd_region_probe(). Block attempts to show badblocks while the region is not enabled. Fixes: 6a6bef90425e ("libnvdimm: add mechanism to publish badblocks...") Cc: Reviewed-by: Johannes Thumshirn Reviewed-by: Dave Jiang Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/region_devs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index abaf38c61220..050deb56ee62 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -513,10 +513,17 @@ static ssize_t region_badblocks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nd_region *nd_region = to_nd_region(dev); + ssize_t rc; - return badblocks_show(&nd_region->bb, buf, 0); + device_lock(dev); + if (dev->driver) + rc = badblocks_show(&nd_region->bb, buf, 0); + else + rc = -ENXIO; + device_unlock(dev); + + return rc; } - static DEVICE_ATTR(badblocks, 0444, region_badblocks_show, NULL); static ssize_t resource_show(struct device *dev, From 83fc44a1498347d5fcb670d3b7536bea356a5c9f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 3 Oct 2018 19:31:44 +0200 Subject: [PATCH 0307/2608] ASoC: intel: skylake: Add missing break in skl_tplg_get_token() commit 9c80c5a8831471e0a3e139aad1b0d4c0fdc50b2f upstream. skl_tplg_get_token() misses a break in the big switch() block for SKL_TKN_U8_CORE_ID entry. Spotted nicely by -Wimplicit-fallthrough compiler option. Fixes: 6277e83292a2 ("ASoC: Intel: Skylake: Parse vendor tokens to build module data") Cc: Signed-off-by: Takashi Iwai Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/skylake/skl-topology.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c index 22f768ca3c73..b45c1ae60f94 100644 --- a/sound/soc/intel/skylake/skl-topology.c +++ b/sound/soc/intel/skylake/skl-topology.c @@ -2360,6 +2360,7 @@ static int skl_tplg_get_token(struct device *dev, case SKL_TKN_U8_CORE_ID: mconfig->core_id = tkn_elem->value; + break; case SKL_TKN_U8_MOD_TYPE: mconfig->m_type = tkn_elem->value; From 97a063dfcbdf910bfe330c1ca53bd7c70ae1ae75 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Mon, 15 Oct 2018 14:13:35 +0300 Subject: [PATCH 0308/2608] IB/mlx5: Fix MR cache initialization commit 013c2403bf32e48119aeb13126929f81352cc7ac upstream. Schedule MR cache work only after bucket was initialized. Cc: # 4.10 Fixes: 49780d42dfc9 ("IB/mlx5: Expose MR cache for mlx5_ib") Signed-off-by: Artemy Kovalyov Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9866c5d1b99f..e88bb71056cd 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -675,7 +675,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) init_completion(&ent->compl); INIT_WORK(&ent->work, cache_work_func); INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); - queue_work(cache->wq, &ent->work); if (i > MR_CACHE_LAST_STD_ENTRY) { mlx5_odp_init_mr_cache_entry(ent); @@ -694,6 +693,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->limit = dev->mdev->profile->mr_cache[i].limit; else ent->limit = 0; + queue_work(cache->wq, &ent->work); } err = mlx5_mr_cache_debugfs_init(dev); From cf08282805374fbf0c3c6a124f8e4687c21c6615 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 5 Oct 2018 18:44:40 -0400 Subject: [PATCH 0309/2608] jbd2: fix use after free in jbd2_log_do_checkpoint() commit ccd3c4373eacb044eb3832966299d13d2631f66f upstream. The code cleaning transaction's lists of checkpoint buffers has a bug where it increases bh refcount only after releasing journal->j_list_lock. Thus the following race is possible: CPU0 CPU1 jbd2_log_do_checkpoint() jbd2_journal_try_to_free_buffers() __journal_try_to_free_buffer(bh) ... while (transaction->t_checkpoint_io_list) ... if (buffer_locked(bh)) { <-- IO completes now, buffer gets unlocked --> spin_unlock(&journal->j_list_lock); spin_lock(&journal->j_list_lock); __jbd2_journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); try_to_free_buffers(page); get_bh(bh) <-- accesses freed bh Fix the problem by grabbing bh reference before unlocking journal->j_list_lock. Fixes: dc6e8d669cf5 ("jbd2: don't call get_bh() before calling __jbd2_journal_remove_checkpoint()") Fixes: be1158cc615f ("jbd2: fold __process_buffer() into jbd2_log_do_checkpoint()") Reported-by: syzbot+7f4a27091759e2fe7453@syzkaller.appspotmail.com CC: stable@vger.kernel.org Reviewed-by: Lukas Czerner Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/checkpoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 4055f51617ef..fe4fe155b7fb 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -254,8 +254,8 @@ restart: bh = jh2bh(jh); if (buffer_locked(bh)) { - spin_unlock(&journal->j_list_lock); get_bh(bh); + spin_unlock(&journal->j_list_lock); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); @@ -336,8 +336,8 @@ restart2: jh = transaction->t_checkpoint_io_list; bh = jh2bh(jh); if (buffer_locked(bh)) { - spin_unlock(&journal->j_list_lock); get_bh(bh); + spin_unlock(&journal->j_list_lock); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); From 63be2065bf4d66c50d3d43935c4687d25642dbbd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 13 Oct 2018 00:19:13 -0400 Subject: [PATCH 0310/2608] gfs2_meta: ->mount() can get NULL dev_name commit 3df629d873f8683af6f0d34dfc743f637966d483 upstream. get in sync with mount_bdev() handling of the same Reported-by: syzbot+c54f8e94e6bba03b04e9@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/ops_fstype.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index a3711f543405..28d6c65c8bb3 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1352,6 +1352,9 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type, struct path path; int error; + if (!dev_name || !*dev_name) + return ERR_PTR(-EINVAL); + error = kern_path(dev_name, LOOKUP_FOLLOW, &path); if (error) { pr_warn("path_lookup on %s returned error %d\n", From 13b63ba40348d9c43c3842632d3f371a97ca5fe4 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 2 Oct 2018 21:18:45 -0400 Subject: [PATCH 0311/2608] ext4: initialize retries variable in ext4_da_write_inline_data_begin() commit 625ef8a3acd111d5f496d190baf99d1a815bd03e upstream. Variable retries is not initialized in ext4_da_write_inline_data_begin() which can lead to nondeterministic number of retries in case we hit ENOSPC. Initialize retries to zero as we do everywhere else. Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Fixes: bc0ca9df3b2a ("ext4: retry allocation when inline->extent conversion failed") Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 4e1d62ba0703..ac2e0516c16f 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -869,7 +869,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping, handle_t *handle; struct page *page; struct ext4_iloc iloc; - int retries; + int retries = 0; ret = ext4_get_inode_loc(inode, &iloc); if (ret) From 65dc3dd8c550d7b16b4901b2fe4c4cd022931a0e Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 3 Oct 2018 10:33:32 -0400 Subject: [PATCH 0312/2608] ext4: fix setattr project check in fssetxattr ioctl commit dc7ac6c4cae3b58724c2f1e21a7c05ce19ecd5a8 upstream. Currently, project quota could be changed by fssetxattr ioctl, and existed permission check inode_owner_or_capable() is obviously not enough, just think that common users could change project id of file, that could make users to break project quota easily. This patch try to follow same regular of xfs project quota: "Project Quota ID state is only allowed to change from within the init namespace. Enforce that restriction only if we are trying to change the quota ID state. Everything else is allowed in user namespaces." Besides that, check and set project id'state should be an atomic operation, protect whole operation with inode lock, ext4_ioctl_setproject() is only used for ioctl EXT4_IOC_FSSETXATTR, we have held mnt_want_write_file() before ext4_ioctl_setflags(), and ext4_ioctl_setproject() is called after ext4_ioctl_setflags(), we could share codes, so remove it inside ext4_ioctl_setproject(). Signed-off-by: Wang Shilong Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ioctl.c | 60 ++++++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 1eb68e626931..d75b7aae3d74 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -344,19 +344,14 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) if (projid_eq(kprojid, EXT4_I(inode)->i_projid)) return 0; - err = mnt_want_write_file(filp); - if (err) - return err; - err = -EPERM; - inode_lock(inode); /* Is it quota file? Do not allow user to mess with it */ if (ext4_is_quota_file(inode)) - goto out_unlock; + return err; err = ext4_get_inode_loc(inode, &iloc); if (err) - goto out_unlock; + return err; raw_inode = ext4_raw_inode(&iloc); if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) { @@ -364,7 +359,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) EXT4_SB(sb)->s_want_extra_isize, &iloc); if (err) - goto out_unlock; + return err; } else { brelse(iloc.bh); } @@ -374,10 +369,8 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) handle = ext4_journal_start(inode, EXT4_HT_QUOTA, EXT4_QUOTA_INIT_BLOCKS(sb) + EXT4_QUOTA_DEL_BLOCKS(sb) + 3); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - goto out_unlock; - } + if (IS_ERR(handle)) + return PTR_ERR(handle); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) @@ -405,9 +398,6 @@ out_dirty: err = rc; out_stop: ext4_journal_stop(handle); -out_unlock: - inode_unlock(inode); - mnt_drop_write_file(filp); return err; } #else @@ -592,6 +582,30 @@ static int ext4_ioc_getfsmap(struct super_block *sb, return 0; } +static int ext4_ioctl_check_project(struct inode *inode, struct fsxattr *fa) +{ + /* + * Project Quota ID state is only allowed to change from within the init + * namespace. Enforce that restriction only if we are trying to change + * the quota ID state. Everything else is allowed in user namespaces. + */ + if (current_user_ns() == &init_user_ns) + return 0; + + if (__kprojid_val(EXT4_I(inode)->i_projid) != fa->fsx_projid) + return -EINVAL; + + if (ext4_test_inode_flag(inode, EXT4_INODE_PROJINHERIT)) { + if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT)) + return -EINVAL; + } else { + if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT) + return -EINVAL; + } + + return 0; +} + long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1029,19 +1043,19 @@ resizefs_out: return err; inode_lock(inode); + err = ext4_ioctl_check_project(inode, &fa); + if (err) + goto out; flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) | (flags & EXT4_FL_XFLAG_VISIBLE); err = ext4_ioctl_setflags(inode, flags); + if (err) + goto out; + err = ext4_ioctl_setproject(filp, fa.fsx_projid); +out: inode_unlock(inode); mnt_drop_write_file(filp); - if (err) - return err; - - err = ext4_ioctl_setproject(filp, fa.fsx_projid); - if (err) - return err; - - return 0; + return err; } case EXT4_IOC_SHUTDOWN: return ext4_shutdown(sb, arg); From d197b725e9f0ef87ba014247211b0b900d868e04 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 3 Oct 2018 12:19:21 -0400 Subject: [PATCH 0313/2608] ext4: propagate error from dquot_initialize() in EXT4_IOC_FSSETXATTR commit 182a79e0c17147d2c2d3990a9a7b6b58a1561c7a upstream. We return most failure of dquota_initialize() except inode evict, this could make a bit sense, for example we allow file removal even quota files are broken? But it dosen't make sense to allow setting project if quota files etc are broken. Signed-off-by: Wang Shilong Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ioctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index d75b7aae3d74..b2a47058e04c 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -364,7 +364,9 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) brelse(iloc.bh); } - dquot_initialize(inode); + err = dquot_initialize(inode); + if (err) + return err; handle = ext4_journal_start(inode, EXT4_HT_QUOTA, EXT4_QUOTA_INIT_BLOCKS(sb) + From 44dcd01ee170f776fec7bd6386d08a525d713b91 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 12 Oct 2018 09:28:09 -0400 Subject: [PATCH 0314/2608] ext4: fix use-after-free race in ext4_remount()'s error path commit 33458eaba4dfe778a426df6a19b7aad2ff9f7eec upstream. It's possible for ext4_show_quota_options() to try reading s_qf_names[i] while it is being modified by ext4_remount() --- most notably, in ext4_remount's error path when the original values of the quota file name gets restored. Reported-by: syzbot+a2872d6feea6918008a9@syzkaller.appspotmail.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.2+ Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 3 +- fs/ext4/super.c | 73 ++++++++++++++++++++++++++++++++----------------- 2 files changed, 50 insertions(+), 26 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c96778c39885..c0c6562b3c44 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1421,7 +1421,8 @@ struct ext4_sb_info { u32 s_min_batch_time; struct block_device *journal_bdev; #ifdef CONFIG_QUOTA - char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */ + /* Names of quota files with journalled quota */ + char __rcu *s_qf_names[EXT4_MAXQUOTAS]; int s_jquota_fmt; /* Format of quota to use */ #endif unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9dbd27f7b778..46ad267ef6d6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -855,6 +855,18 @@ static inline void ext4_quota_off_umount(struct super_block *sb) for (type = 0; type < EXT4_MAXQUOTAS; type++) ext4_quota_off(sb, type); } + +/* + * This is a helper function which is used in the mount/remount + * codepaths (which holds s_umount) to fetch the quota file name. + */ +static inline char *get_qf_name(struct super_block *sb, + struct ext4_sb_info *sbi, + int type) +{ + return rcu_dereference_protected(sbi->s_qf_names[type], + lockdep_is_held(&sb->s_umount)); +} #else static inline void ext4_quota_off_umount(struct super_block *sb) { @@ -907,7 +919,7 @@ static void ext4_put_super(struct super_block *sb) percpu_free_rwsem(&sbi->s_journal_flag_rwsem); #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); + kfree(get_qf_name(sb, sbi, i)); #endif /* Debugging code just in case the in-memory inode orphan list @@ -1473,11 +1485,10 @@ static const char deprecated_msg[] = static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) { struct ext4_sb_info *sbi = EXT4_SB(sb); - char *qname; + char *qname, *old_qname = get_qf_name(sb, sbi, qtype); int ret = -1; - if (sb_any_quota_loaded(sb) && - !sbi->s_qf_names[qtype]) { + if (sb_any_quota_loaded(sb) && !old_qname) { ext4_msg(sb, KERN_ERR, "Cannot change journaled " "quota options when quota turned on"); @@ -1494,8 +1505,8 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) "Not enough memory for storing quotafile name"); return -1; } - if (sbi->s_qf_names[qtype]) { - if (strcmp(sbi->s_qf_names[qtype], qname) == 0) + if (old_qname) { + if (strcmp(old_qname, qname) == 0) ret = 1; else ext4_msg(sb, KERN_ERR, @@ -1508,7 +1519,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) "quotafile must be on filesystem root"); goto errout; } - sbi->s_qf_names[qtype] = qname; + rcu_assign_pointer(sbi->s_qf_names[qtype], qname); set_opt(sb, QUOTA); return 1; errout: @@ -1520,15 +1531,16 @@ static int clear_qf_name(struct super_block *sb, int qtype) { struct ext4_sb_info *sbi = EXT4_SB(sb); + char *old_qname = get_qf_name(sb, sbi, qtype); - if (sb_any_quota_loaded(sb) && - sbi->s_qf_names[qtype]) { + if (sb_any_quota_loaded(sb) && old_qname) { ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" " when quota turned on"); return -1; } - kfree(sbi->s_qf_names[qtype]); - sbi->s_qf_names[qtype] = NULL; + rcu_assign_pointer(sbi->s_qf_names[qtype], NULL); + synchronize_rcu(); + kfree(old_qname); return 1; } #endif @@ -1901,7 +1913,7 @@ static int parse_options(char *options, struct super_block *sb, int is_remount) { struct ext4_sb_info *sbi = EXT4_SB(sb); - char *p; + char *p, __maybe_unused *usr_qf_name, __maybe_unused *grp_qf_name; substring_t args[MAX_OPT_ARGS]; int token; @@ -1932,11 +1944,13 @@ static int parse_options(char *options, struct super_block *sb, "Cannot enable project quota enforcement."); return 0; } - if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { - if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) + usr_qf_name = get_qf_name(sb, sbi, USRQUOTA); + grp_qf_name = get_qf_name(sb, sbi, GRPQUOTA); + if (usr_qf_name || grp_qf_name) { + if (test_opt(sb, USRQUOTA) && usr_qf_name) clear_opt(sb, USRQUOTA); - if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) + if (test_opt(sb, GRPQUOTA) && grp_qf_name) clear_opt(sb, GRPQUOTA); if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { @@ -1970,6 +1984,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq, { #if defined(CONFIG_QUOTA) struct ext4_sb_info *sbi = EXT4_SB(sb); + char *usr_qf_name, *grp_qf_name; if (sbi->s_jquota_fmt) { char *fmtname = ""; @@ -1988,11 +2003,14 @@ static inline void ext4_show_quota_options(struct seq_file *seq, seq_printf(seq, ",jqfmt=%s", fmtname); } - if (sbi->s_qf_names[USRQUOTA]) - seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]); - - if (sbi->s_qf_names[GRPQUOTA]) - seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]); + rcu_read_lock(); + usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]); + grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]); + if (usr_qf_name) + seq_show_option(seq, "usrjquota", usr_qf_name); + if (grp_qf_name) + seq_show_option(seq, "grpjquota", grp_qf_name); + rcu_read_unlock(); #endif } @@ -5038,6 +5056,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) int err = 0; #ifdef CONFIG_QUOTA int i, j; + char *to_free[EXT4_MAXQUOTAS]; #endif char *orig_data = kstrdup(data, GFP_KERNEL); @@ -5054,8 +5073,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) old_opts.s_jquota_fmt = sbi->s_jquota_fmt; for (i = 0; i < EXT4_MAXQUOTAS; i++) if (sbi->s_qf_names[i]) { - old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], - GFP_KERNEL); + char *qf_name = get_qf_name(sb, sbi, i); + + old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL); if (!old_opts.s_qf_names[i]) { for (j = 0; j < i; j++) kfree(old_opts.s_qf_names[j]); @@ -5277,9 +5297,12 @@ restore_opts: #ifdef CONFIG_QUOTA sbi->s_jquota_fmt = old_opts.s_jquota_fmt; for (i = 0; i < EXT4_MAXQUOTAS; i++) { - kfree(sbi->s_qf_names[i]); - sbi->s_qf_names[i] = old_opts.s_qf_names[i]; + to_free[i] = get_qf_name(sb, sbi, i); + rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]); } + synchronize_rcu(); + for (i = 0; i < EXT4_MAXQUOTAS; i++) + kfree(to_free[i]); #endif kfree(orig_data); return err; @@ -5469,7 +5492,7 @@ static int ext4_write_info(struct super_block *sb, int type) */ static int ext4_quota_on_mount(struct super_block *sb, int type) { - return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], + return dquot_quota_on_mount(sb, get_qf_name(sb, EXT4_SB(sb), type), EXT4_SB(sb)->s_jquota_fmt, type); } From 872f9d86659a84fca92c4908c3c0fbaebf9d007c Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 19 Oct 2018 17:01:33 -0300 Subject: [PATCH 0315/2608] HID: hiddev: fix potential Spectre v1 commit f11274396a538b31bc010f782e05c2ce3f804c13 upstream. uref->usage_index can be indirectly controlled by userspace, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This field is used as an array index by the hiddev_ioctl_usage() function, when 'cmd' is either HIDIOCGCOLLECTIONINDEX, HIDIOCGUSAGES or HIDIOCSUSAGES. For cmd == HIDIOCGCOLLECTIONINDEX case, uref->usage_index is compared to field->maxusage and then used as an index to dereference field->usage array. The same thing happens to the cmd == HIDIOC{G,S}USAGES cases, where uref->usage_index is checked against an array maximum value and then it is used as an index in an array. This is a summary of the HIDIOCGCOLLECTIONINDEX case, which matches the traditional Spectre V1 first load: copy_from_user(uref, user_arg, sizeof(*uref)) if (uref->usage_index >= field->maxusage) goto inval; i = field->usage[uref->usage_index].collection_index; return i; This patch fixes this by sanitizing field uref->usage_index before using it to index field->usage (HIDIOCGCOLLECTIONINDEX) or field->value in HIDIOC{G,S}USAGES arrays, thus, avoiding speculation in the first load. Cc: Signed-off-by: Breno Leitao v2: Contemplate cmd == HIDIOC{G,S}USAGES case Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/usbhid/hiddev.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index cf307bdc3d53..89761551c15d 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -512,14 +512,24 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd, if (cmd == HIDIOCGCOLLECTIONINDEX) { if (uref->usage_index >= field->maxusage) goto inval; + uref->usage_index = + array_index_nospec(uref->usage_index, + field->maxusage); } else if (uref->usage_index >= field->report_count) goto inval; } - if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) && - (uref_multi->num_values > HID_MAX_MULTI_USAGES || - uref->usage_index + uref_multi->num_values > field->report_count)) - goto inval; + if (cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) { + if (uref_multi->num_values > HID_MAX_MULTI_USAGES || + uref->usage_index + uref_multi->num_values > + field->report_count) + goto inval; + + uref->usage_index = + array_index_nospec(uref->usage_index, + field->report_count - + uref_multi->num_values); + } switch (cmd) { case HIDIOCGUSAGE: From 52314c7f81d8a3af54c40b213bfa3d9b060038e1 Mon Sep 17 00:00:00 2001 From: Michael Jin Date: Thu, 16 Aug 2018 15:28:40 -0400 Subject: [PATCH 0316/2608] EDAC, amd64: Add Family 17h, models 10h-2fh support commit 8960de4a5ca7980ed1e19e7ca5a774d3b7a55c38 upstream. Add new device IDs for family 17h, models 10h-2fh. This is required by amd64_edac_mod in order to properly detect PCI device functions 0 and 6. Signed-off-by: Michael Jin Reviewed-by: Yazen Ghannam Cc: Link: http://lkml.kernel.org/r/20180816192840.31166-1-mikhail.jin@gmail.com Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- drivers/edac/amd64_edac.c | 14 ++++++++++++++ drivers/edac/amd64_edac.h | 3 +++ 2 files changed, 17 insertions(+) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 59ce32e405ac..667f5ba0403c 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2200,6 +2200,15 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f17_base_addr_to_cs_size, } }, + [F17_M10H_CPUS] = { + .ctl_name = "F17h_M10h", + .f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_base_addr_to_cs_size, + } + }, }; /* @@ -3188,6 +3197,11 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) break; case 0x17: + if (pvt->model >= 0x10 && pvt->model <= 0x2f) { + fam_type = &family_types[F17_M10H_CPUS]; + pvt->ops = &family_types[F17_M10H_CPUS].ops; + break; + } fam_type = &family_types[F17_CPUS]; pvt->ops = &family_types[F17_CPUS].ops; break; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 1d4b74e9a037..4242f8e39c18 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -115,6 +115,8 @@ #define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582 #define PCI_DEVICE_ID_AMD_17H_DF_F0 0x1460 #define PCI_DEVICE_ID_AMD_17H_DF_F6 0x1466 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F0 0x15e8 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee /* * Function 1 - Address Map @@ -281,6 +283,7 @@ enum amd_families { F16_CPUS, F16_M30H_CPUS, F17_CPUS, + F17_M10H_CPUS, NUM_FAMILIES, }; From 65ce0542891852aa680a5478e091ea2f9adec7d8 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 28 Sep 2018 14:39:34 -0700 Subject: [PATCH 0317/2608] EDAC, {i7core,sb,skx}_edac: Fix uncorrected error counting commit 432de7fd7630c84ad24f1c2acd1e3bb4ce3741ca upstream. The count of errors is picked up from bits 52:38 of the machine check bank status register. But this is the count of *corrected* errors. If an uncorrected error is being logged, the h/w sets this field to 0. Which means that when edac_mc_handle_error() is called, the EDAC core will carefully add zero to the appropriate uncorrected error counts. Signed-off-by: Tony Luck [ Massage commit message. ] Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Cc: Aristeu Rozanski Cc: Mauro Carvalho Chehab Cc: Qiuxu Zhuo Cc: linux-edac Link: http://lkml.kernel.org/r/20180928213934.19890-1-tony.luck@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/edac/i7core_edac.c | 1 + drivers/edac/sb_edac.c | 1 + drivers/edac/skx_edac.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 6c7d5f20eacb..2054a24b41d7 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1711,6 +1711,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, u32 errnum = find_first_bit(&error, 32); if (uncorrected_error) { + core_err_cnt = 1; if (ripv) tp_event = HW_EVENT_ERR_FATAL; else diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 0dc0d595c47c..b0b390a1da15 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -2891,6 +2891,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, recoverable = GET_BITFIELD(m->status, 56, 56); if (uncorrected_error) { + core_err_cnt = 1; if (ripv) { type = "FATAL"; tp_event = HW_EVENT_ERR_FATAL; diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index 16dea97568a1..8547eb8656a2 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -895,6 +895,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, recoverable = GET_BITFIELD(m->status, 56, 56); if (uncorrected_error) { + core_err_cnt = 1; if (ripv) { type = "FATAL"; tp_event = HW_EVENT_ERR_FATAL; From 074df512d4d02eaf793ffddf46ab45cdaba374e1 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Tue, 9 Oct 2018 10:20:25 -0700 Subject: [PATCH 0318/2608] EDAC, skx_edac: Fix logical channel intermediate decoding commit 8f18973877204dc8ca4ce1004a5d28683b9a7086 upstream. The code "lchan = (lchan << 1) | ~lchan" for logical channel intermediate decoding is wrong. The wrong intermediate decoding result is {0xffffffff, 0xfffffffe}. Fix it by replacing '~' with '!'. The correct intermediate decoding result is {0x1, 0x2}. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov CC: Aristeu Rozanski CC: Mauro Carvalho Chehab CC: linux-edac Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20181009172025.18594-1-tony.luck@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/edac/skx_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index 8547eb8656a2..5dafd4fa8f5e 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -604,7 +604,7 @@ sad_found: break; case 2: lchan = (addr >> shift) % 2; - lchan = (lchan << 1) | ~lchan; + lchan = (lchan << 1) | !lchan; break; case 3: lchan = ((addr >> shift) % 2) << 1; From 255fb2e03694901f8178d9766ab3c043be6ed33b Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Tue, 25 Sep 2018 10:51:51 +0530 Subject: [PATCH 0319/2608] ARM: dts: dra7: Fix up unaligned access setting for PCIe EP commit 6d0af44a82be87c13f2320821e9fbb8b8cf5a56f upstream. Bit positions of PCIE_SS1_AXI2OCP_LEGACY_MODE_ENABLE and PCIE_SS1_AXI2OCP_LEGACY_MODE_ENABLE in CTRL_CORE_SMA_SW_7 are incorrectly documented in the TRM. In fact, the bit positions are swapped. Update the DT bindings for PCIe EP to reflect the same. Fixes: d23f3839fe97 ("ARM: dts: DRA7: Add pcie1 dt node for EP mode") Cc: stable@vger.kernel.org Signed-off-by: Vignesh R Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/dra7.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index a5bd8f0205e8..0bf354024ef5 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -333,7 +333,7 @@ ti,hwmods = "pcie1"; phys = <&pcie1_phy>; phy-names = "pcie-phy0"; - ti,syscon-unaligned-access = <&scm_conf1 0x14 2>; + ti,syscon-unaligned-access = <&scm_conf1 0x14 1>; status = "disabled"; }; }; From 19e14e8816d0a079671eb917606a4b586321ec12 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 4 Sep 2018 12:34:18 -0500 Subject: [PATCH 0320/2608] PCI/ASPM: Fix link_state teardown on device removal commit aeae4f3e5c38d47bdaef50446dc0ec857307df68 upstream. Upon removal of the last device on a bus, the link_state of the bridge leading to that bus is sought to be torn down by having pci_stop_dev() call pcie_aspm_exit_link_state(). When ASPM was originally introduced by commit 7d715a6c1ae5 ("PCI: add PCI Express ASPM support"), it determined whether the device being removed is the last one by calling list_empty() on the bridge's subordinate devices list. That didn't work because the device is only removed from the list slightly later in pci_destroy_dev(). Commit 3419c75e15f8 ("PCI: properly clean up ASPM link state on device remove") attempted to fix it by calling list_is_last(), but that's not correct either because it checks whether the device is at the *end* of the list, not whether it's the last one *left* in the list. If the user removes the device which happens to be at the end of the list via sysfs but other devices are preceding the device in the list, the link_state is torn down prematurely. The real fix is to move the invocation of pcie_aspm_exit_link_state() to pci_destroy_dev() and reinstate the call to list_empty(). Remove a duplicate check for dev->bus->self because pcie_aspm_exit_link_state() already contains an identical check. Fixes: 7d715a6c1ae5 ("PCI: add PCI Express ASPM support") Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Cc: Shaohua Li Cc: stable@vger.kernel.org # v2.6.26 Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pcie/aspm.c | 2 +- drivers/pci/remove.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 633e55c57b13..c0e1985e4c75 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -937,7 +937,7 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) * All PCIe functions are in one slot, remove one function will remove * the whole slot, so just wait until we are the last function left. */ - if (!list_is_last(&pdev->bus_list, &parent->subordinate->devices)) + if (!list_empty(&parent->subordinate->devices)) goto out; link = parent->link_state; diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 2fa0dbde36b7..0911217467bc 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -24,9 +24,6 @@ static void pci_stop_dev(struct pci_dev *dev) pci_remove_sysfs_dev_files(dev); dev->is_added = 0; } - - if (dev->bus->self) - pcie_aspm_exit_link_state(dev); } static void pci_destroy_dev(struct pci_dev *dev) @@ -40,6 +37,7 @@ static void pci_destroy_dev(struct pci_dev *dev) list_del(&dev->bus_list); up_write(&pci_bus_sem); + pcie_aspm_exit_link_state(dev); pci_bridge_d3_update(dev); pci_free_resources(dev); put_device(&dev->dev); From 2b216de5804f6dac761bbf6667bc707ea1aeb771 Mon Sep 17 00:00:00 2001 From: Bin Meng Date: Wed, 26 Sep 2018 08:14:01 -0700 Subject: [PATCH 0321/2608] PCI: Add Device IDs for Intel GPU "spurious interrupt" quirk commit d0c9606b31a21028fb5b753c8ad79626292accfd upstream. Add Device IDs to the Intel GPU "spurious interrupt" quirk table. For these devices, unplugging the VGA cable and plugging it in again causes spurious interrupts from the IGD. Linux eventually disables the interrupt, but of course that disables any other devices sharing the interrupt. The theory is that this is a VGA BIOS defect: it should have disabled the IGD interrupt but failed to do so. See f67fd55fa96f ("PCI: Add quirk for still enabled interrupts on Intel Sandy Bridge GPUs") and 7c82126a94e6 ("PCI: Add new ID for Intel GPU "spurious interrupt" quirk") for some history. [bhelgaas: See link below for discussion about how to fix this more generically instead of adding device IDs for every new Intel GPU. I hope this is the last patch to add device IDs.] Link: https://lore.kernel.org/linux-pci/1537974841-29928-1-git-send-email-bmeng.cn@gmail.com Signed-off-by: Bin Meng [bhelgaas: changelog] Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v3.4+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 35c9b2f4b293..d442afa195ab 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3163,7 +3163,11 @@ static void disable_igfx_irq(struct pci_dev *dev) pci_iounmap(dev, regs); } +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0042, disable_igfx_irq); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0046, disable_igfx_irq); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x004a, disable_igfx_irq); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0106, disable_igfx_irq); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0152, disable_igfx_irq); From 635c8c9ce51bcee0902c5a4d2996163e30213320 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 8 May 2018 10:00:22 -0600 Subject: [PATCH 0322/2608] PCI: vmd: White list for fast interrupt handlers commit a7f58b9ecfd3c0f63703ec10f4a592cc38dbd1b8 upstream. Devices with slow interrupt handlers are significantly harming performance when their interrupt vector is shared with a fast device. Create a class code white list for devices with known fast interrupt handlers and let all other devices share a single vector so that they don't interfere with performance. At the moment, only the NVM Express class code is on the list, but more may be added if VMD users desire to use other low-latency devices in these domains. Signed-off-by: Keith Busch [lorenzo.pieralisi@arm.com: changelog] Signed-off-by: Lorenzo Pieralisi Acked-by: Jon Derrick: Cc: "Heitke, Kenneth" Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/vmd.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/pci/host/vmd.c b/drivers/pci/host/vmd.c index 509893bc3e63..2537b022f42d 100644 --- a/drivers/pci/host/vmd.c +++ b/drivers/pci/host/vmd.c @@ -183,9 +183,20 @@ static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *d int i, best = 1; unsigned long flags; - if (pci_is_bridge(msi_desc_to_pci_dev(desc)) || vmd->msix_count == 1) + if (vmd->msix_count == 1) return &vmd->irqs[0]; + /* + * White list for fast-interrupt handlers. All others will share the + * "slow" interrupt vector. + */ + switch (msi_desc_to_pci_dev(desc)->class) { + case PCI_CLASS_STORAGE_EXPRESS: + break; + default: + return &vmd->irqs[0]; + } + raw_spin_lock_irqsave(&list_lock, flags); for (i = 1; i < vmd->msix_count; i++) if (vmd->irqs[i].count < vmd->irqs[best].count) From eb7f3c513d1c898abdc66809406d62ce9ec7c50d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 13 Sep 2018 11:28:01 +0200 Subject: [PATCH 0323/2608] signal/GenWQE: Fix sending of SIGKILL commit 0ab93e9c99f8208c0a1a7b7170c827936268c996 upstream. The genweq_add_file and genwqe_del_file by caching current without using reference counting embed the assumption that a file descriptor will never be passed from one process to another. It even embeds the assumption that the the thread that opened the file will be in existence when the process terminates. Neither of which are guaranteed to be true. Therefore replace caching the task_struct of the opener with pid of the openers thread group id. All the knowledge of the opener is used for is as the target of SIGKILL and a SIGKILL will kill the entire process group. Rename genwqe_force_sig to genwqe_terminate, remove it's unncessary signal argument, update it's ownly caller, and use kill_pid instead of force_sig. The work force_sig does in changing signal handling state is not relevant to SIGKILL sent as SEND_SIG_PRIV. The exact same processess will be killed just with less work, and less confusion. The work done by force_sig is really only needed for handling syncrhonous exceptions. It will still be possible to cause genwqe_device_remove to wait 8 seconds by passing a file descriptor to another process but the possible user after free is fixed. Fixes: eaf4722d4645 ("GenWQE Character device and DDCB queue") Cc: stable@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Frank Haverkamp Cc: Joerg-Stephan Vogt Cc: Michael Jung Cc: Michael Ruettger Cc: Kleber Sacilotto de Souza Cc: Sebastian Ott Cc: Eberhard S. Amann Cc: Gabriel Krisman Bertazi Cc: Guilherme G. Piccoli Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- drivers/misc/genwqe/card_base.h | 2 +- drivers/misc/genwqe/card_dev.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h index 5813b5f25006..135e02f257c1 100644 --- a/drivers/misc/genwqe/card_base.h +++ b/drivers/misc/genwqe/card_base.h @@ -403,7 +403,7 @@ struct genwqe_file { struct file *filp; struct fasync_struct *async_queue; - struct task_struct *owner; + struct pid *opener; struct list_head list; /* entry in list of open files */ spinlock_t map_lock; /* lock for dma_mappings */ diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c index dd4617764f14..dbd5eaa69311 100644 --- a/drivers/misc/genwqe/card_dev.c +++ b/drivers/misc/genwqe/card_dev.c @@ -52,7 +52,7 @@ static void genwqe_add_file(struct genwqe_dev *cd, struct genwqe_file *cfile) { unsigned long flags; - cfile->owner = current; + cfile->opener = get_pid(task_tgid(current)); spin_lock_irqsave(&cd->file_lock, flags); list_add(&cfile->list, &cd->file_list); spin_unlock_irqrestore(&cd->file_lock, flags); @@ -65,6 +65,7 @@ static int genwqe_del_file(struct genwqe_dev *cd, struct genwqe_file *cfile) spin_lock_irqsave(&cd->file_lock, flags); list_del(&cfile->list); spin_unlock_irqrestore(&cd->file_lock, flags); + put_pid(cfile->opener); return 0; } @@ -275,7 +276,7 @@ static int genwqe_kill_fasync(struct genwqe_dev *cd, int sig) return files; } -static int genwqe_force_sig(struct genwqe_dev *cd, int sig) +static int genwqe_terminate(struct genwqe_dev *cd) { unsigned int files = 0; unsigned long flags; @@ -283,7 +284,7 @@ static int genwqe_force_sig(struct genwqe_dev *cd, int sig) spin_lock_irqsave(&cd->file_lock, flags); list_for_each_entry(cfile, &cd->file_list, list) { - force_sig(sig, cfile->owner); + kill_pid(cfile->opener, SIGKILL, 1); files++; } spin_unlock_irqrestore(&cd->file_lock, flags); @@ -1356,7 +1357,7 @@ static int genwqe_inform_and_stop_processes(struct genwqe_dev *cd) dev_warn(&pci_dev->dev, "[%s] send SIGKILL and wait ...\n", __func__); - rc = genwqe_force_sig(cd, SIGKILL); /* force terminate */ + rc = genwqe_terminate(cd); if (rc) { /* Give kill_timout more seconds to end processes */ for (i = 0; (i < genwqe_kill_timeout) && From 51f62e827191f4ba02c2a001b9e6f25605bfa649 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 10 Oct 2018 20:29:44 -0500 Subject: [PATCH 0324/2608] signal: Guard against negative signal numbers in copy_siginfo_from_user32 commit a36700589b85443e28170be59fa11c8a104130a5 upstream. While fixing an out of bounds array access in known_siginfo_layout reported by the kernel test robot it became apparent that the same bug exists in siginfo_layout and affects copy_siginfo_from_user32. The straight forward fix that makes guards against making this mistake in the future and should keep the code size small is to just take an unsigned signal number instead of a signed signal number, as I did to fix known_siginfo_layout. Cc: stable@vger.kernel.org Fixes: cc731525f26a ("signal: Remove kernel interal si_code magic") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- include/linux/signal.h | 2 +- kernel/signal.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/signal.h b/include/linux/signal.h index 042968dd98f0..843bd62b1ead 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -34,7 +34,7 @@ enum siginfo_layout { #endif }; -enum siginfo_layout siginfo_layout(int sig, int si_code); +enum siginfo_layout siginfo_layout(unsigned sig, int si_code); /* * Define some primitives to manipulate sigset_t. diff --git a/kernel/signal.c b/kernel/signal.c index 7181215e9b64..164c36ef0825 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2700,7 +2700,7 @@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset, } #endif -enum siginfo_layout siginfo_layout(int sig, int si_code) +enum siginfo_layout siginfo_layout(unsigned sig, int si_code) { enum siginfo_layout layout = SIL_KILL; if ((si_code > SI_USER) && (si_code < SI_KERNEL)) { From e86f4842f84a4494227aa4d1ab76acb68d86fb1a Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Thu, 13 Sep 2018 10:51:31 +0200 Subject: [PATCH 0325/2608] crypto: lrw - Fix out-of bounds access on counter overflow commit fbe1a850b3b1522e9fc22319ccbbcd2ab05328d2 upstream. When the LRW block counter overflows, the current implementation returns 128 as the index to the precomputed multiplication table, which has 128 entries. This patch fixes it to return the correct value (127). Fixes: 64470f1b8510 ("[CRYPTO] lrw: Liskov Rivest Wagner, a tweakable narrow block cipher mode") Cc: # 2.6.20+ Reported-by: Eric Biggers Signed-off-by: Ondrej Mosnacek Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/lrw.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crypto/lrw.c b/crypto/lrw.c index fdba6dd6db63..886f91f2426c 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -139,7 +139,12 @@ static inline int get_index128(be128 *block) return x + ffz(val); } - return x; + /* + * If we get here, then x == 128 and we are incrementing the counter + * from all ones to all zeros. This means we must return index 127, i.e. + * the one corresponding to key2*{ 1,...,1 }. + */ + return 127; } static int post_crypt(struct skcipher_request *req) From 0c8496c52a98039b6d8ca46db2f4478779b957fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Wed, 12 Sep 2018 16:20:48 +0300 Subject: [PATCH 0326/2608] crypto: tcrypt - fix ghash-generic speed test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 331351f89c36bf7d03561a28b6f64fa10a9f6f3a upstream. ghash is a keyed hash algorithm, thus setkey needs to be called. Otherwise the following error occurs: $ modprobe tcrypt mode=318 sec=1 testing speed of async ghash-generic (ghash-generic) tcrypt: test 0 ( 16 byte blocks, 16 bytes per update, 1 updates): tcrypt: hashing failed ret=-126 Cc: # 4.6+ Fixes: 0660511c0bee ("crypto: tcrypt - Use ahash") Tested-by: Franck Lenormand Signed-off-by: Horia Geantă Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/tcrypt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index e339960dcac7..f7affe7cf0b4 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -727,6 +727,9 @@ static void test_ahash_speed_common(const char *algo, unsigned int secs, break; } + if (speed[i].klen) + crypto_ahash_setkey(tfm, tvmem[0], speed[i].klen); + pr_info("test%3u " "(%5u byte blocks,%5u bytes per update,%4u updates): ", i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen); From 0c5e357fa89d64f414014046ec14979f72de0261 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 26 Oct 2018 15:02:16 -0700 Subject: [PATCH 0327/2608] mm: /proc/pid/smaps_rollup: fix NULL pointer deref in smaps_pte_range() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fa76da461bb0be13c8339d984dcf179151167c8f upstream. Leonardo reports an apparent regression in 4.19-rc7: BUG: unable to handle kernel NULL pointer dereference at 00000000000000f0 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 3 PID: 6032 Comm: python Not tainted 4.19.0-041900rc7-lowlatency #201810071631 Hardware name: LENOVO 80UG/Toronto 4A2, BIOS 0XCN45WW 08/09/2018 RIP: 0010:smaps_pte_range+0x32d/0x540 Code: 80 00 00 00 00 74 a9 48 89 de 41 f6 40 52 40 0f 85 04 02 00 00 49 2b 30 48 c1 ee 0c 49 03 b0 98 00 00 00 49 8b 80 a0 00 00 00 <48> 8b b8 f0 00 00 00 e8 b7 ef ec ff 48 85 c0 0f 84 71 ff ff ff a8 RSP: 0018:ffffb0cbc484fb88 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 0000560ddb9e9000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000560ddb9e9 RDI: 0000000000000001 RBP: ffffb0cbc484fbc0 R08: ffff94a5a227a578 R09: ffff94a5a227a578 R10: 0000000000000000 R11: 0000560ddbbe7000 R12: ffffe903098ba728 R13: ffffb0cbc484fc78 R14: ffffb0cbc484fcf8 R15: ffff94a5a2e9cf48 FS: 00007f6dfb683740(0000) GS:ffff94a5aaf80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000f0 CR3: 000000011c118001 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __walk_page_range+0x3c2/0x6f0 walk_page_vma+0x42/0x60 smap_gather_stats+0x79/0xe0 ? gather_pte_stats+0x320/0x320 ? gather_hugetlb_stats+0x70/0x70 show_smaps_rollup+0xcd/0x1c0 seq_read+0x157/0x400 __vfs_read+0x3a/0x180 ? security_file_permission+0x93/0xc0 ? security_file_permission+0x93/0xc0 vfs_read+0x8f/0x140 ksys_read+0x55/0xc0 __x64_sys_read+0x1a/0x20 do_syscall_64+0x5a/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Decoded code matched to local compilation+disassembly points to smaps_pte_entry(): } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap && pte_none(*pte))) { page = find_get_entry(vma->vm_file->f_mapping, linear_page_index(vma, addr)); Here, vma->vm_file is NULL. mss->check_shmem_swap should be false in that case, however for smaps_rollup, smap_gather_stats() can set the flag true for one vma and leave it true for subsequent vma's where it should be false. To fix, reset the check_shmem_swap flag to false. There's also related bug which sets mss->swap to shmem_swapped, which in the context of smaps_rollup overwrites any value accumulated from previous vma's. Fix that as well. Note that the report suggests a regression between 4.17.19 and 4.19-rc7, which makes the 4.19 series ending with commit 258f669e7e88 ("mm: /proc/pid/smaps_rollup: convert to single value seq_file") suspicious. But the mss was reused for rollup since 493b0e9d945f ("mm: add /proc/pid/smaps_rollup") so let's play it safe with the stable backport. Link: http://lkml.kernel.org/r/555fbd1f-4ac9-0b58-dcd4-5dc4380ff7ca@suse.cz Link: https://bugzilla.kernel.org/show_bug.cgi?id=201377 Fixes: 493b0e9d945f ("mm: add /proc/pid/smaps_rollup") Signed-off-by: Vlastimil Babka Reported-by: Leonardo Soares Müller Tested-by: Leonardo Soares Müller Cc: Greg Kroah-Hartman Cc: Daniel Colascione Cc: Alexey Dobriyan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/task_mmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 519522d39bde..2b47757c9c68 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -768,6 +768,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) smaps_walk.private = mss; #ifdef CONFIG_SHMEM + /* In case of smaps_rollup, reset the value from previous vma */ + mss->check_shmem_swap = false; if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) { /* * For shared or readonly shmem mappings we know that all @@ -783,7 +785,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) if (!shmem_swapped || (vma->vm_flags & VM_SHARED) || !(vma->vm_flags & VM_WRITE)) { - mss->swap = shmem_swapped; + mss->swap += shmem_swapped; } else { mss->check_shmem_swap = true; smaps_walk.pte_hole = smaps_pte_hole; From 4c6fda12ff1d187590083a5615cee5501c60142c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 7 Sep 2018 14:33:24 -0700 Subject: [PATCH 0328/2608] ima: fix showing large 'violations' or 'runtime_measurements_count' commit 1e4c8dafbb6bf72fb5eca035b861e39c5896c2b7 upstream. The 12 character temporary buffer is not necessarily long enough to hold a 'long' value. Increase it. Signed-off-by: Eric Biggers Cc: stable@vger.kernel.org Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima_fs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index ad491c51e833..2c4e83f6409e 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -39,14 +39,14 @@ static int __init default_canonical_fmt_setup(char *str) __setup("ima_canonical_fmt", default_canonical_fmt_setup); static int valid_policy = 1; -#define TMPBUFLEN 12 + static ssize_t ima_show_htable_value(char __user *buf, size_t count, loff_t *ppos, atomic_long_t *val) { - char tmpbuf[TMPBUFLEN]; + char tmpbuf[32]; /* greater than largest 'long' string value */ ssize_t len; - len = scnprintf(tmpbuf, TMPBUFLEN, "%li\n", atomic_long_read(val)); + len = scnprintf(tmpbuf, sizeof(tmpbuf), "%li\n", atomic_long_read(val)); return simple_read_from_buffer(buf, count, ppos, tmpbuf, len); } From d197121a077a39f6c33729ae5d4bef2d1a9c1596 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 26 Oct 2018 15:10:58 -0700 Subject: [PATCH 0329/2608] hugetlbfs: dirty pages as they are added to pagecache commit 22146c3ce98962436e401f7b7016a6f664c9ffb5 upstream. Some test systems were experiencing negative huge page reserve counts and incorrect file block counts. This was traced to /proc/sys/vm/drop_caches removing clean pages from hugetlbfs file pagecaches. When non-hugetlbfs explicit code removes the pages, the appropriate accounting is not performed. This can be recreated as follows: fallocate -l 2M /dev/hugepages/foo echo 1 > /proc/sys/vm/drop_caches fallocate -l 2M /dev/hugepages/foo grep -i huge /proc/meminfo AnonHugePages: 0 kB ShmemHugePages: 0 kB HugePages_Total: 2048 HugePages_Free: 2047 HugePages_Rsvd: 18446744073709551615 HugePages_Surp: 0 Hugepagesize: 2048 kB Hugetlb: 4194304 kB ls -lsh /dev/hugepages/foo 4.0M -rw-r--r--. 1 root root 2.0M Oct 17 20:05 /dev/hugepages/foo To address this issue, dirty pages as they are added to pagecache. This can easily be reproduced with fallocate as shown above. Read faulted pages will eventually end up being marked dirty. But there is a window where they are clean and could be impacted by code such as drop_caches. So, just dirty them all as they are added to the pagecache. Link: http://lkml.kernel.org/r/b5be45b8-5afe-56cd-9482-28384699a049@oracle.com Fixes: 6bda666a03f0 ("hugepages: fold find_or_alloc_pages into huge_no_page()") Signed-off-by: Mike Kravetz Acked-by: Mihcla Hocko Reviewed-by: Khalid Aziz Cc: Hugh Dickins Cc: Naoya Horiguchi Cc: "Aneesh Kumar K . V" Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Cc: Davidlohr Bueso Cc: Alexander Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 9801dc0250e2..e073099083ca 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3644,6 +3644,12 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping, return err; ClearPagePrivate(page); + /* + * set page dirty so that it will not be removed from cache/file + * by non-hugetlbfs specific code paths. + */ + set_page_dirty(page); + spin_lock(&inode->i_lock); inode->i_blocks += blocks_per_huge_page(h); spin_unlock(&inode->i_lock); From 9d3cc761c5d7cbcef96758bac4e4cee53d2136c8 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 30 Oct 2018 15:04:11 -0700 Subject: [PATCH 0330/2608] mm/rmap: map_pte() was not handling private ZONE_DEVICE page properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit aab8d0520e6e7c2a61f71195e6ce7007a4843afb upstream. Private ZONE_DEVICE pages use a special pte entry and thus are not present. Properly handle this case in map_pte(), it is already handled in check_pte(), the map_pte() part was lost in some rebase most probably. Without this patch the slow migration path can not migrate back to any private ZONE_DEVICE memory to regular memory. This was found after stress testing migration back to system memory. This ultimatly can lead to the CPU constantly page fault looping on the special swap entry. Link: http://lkml.kernel.org/r/20181019160442.18723-3-jglisse@redhat.com Signed-off-by: Ralph Campbell Signed-off-by: Jérôme Glisse Reviewed-by: Balbir Singh Cc: Andrew Morton Cc: Kirill A. Shutemov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_vma_mapped.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 956015614395..e00d985a51c5 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -21,7 +21,29 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw) if (!is_swap_pte(*pvmw->pte)) return false; } else { - if (!pte_present(*pvmw->pte)) + /* + * We get here when we are trying to unmap a private + * device page from the process address space. Such + * page is not CPU accessible and thus is mapped as + * a special swap entry, nonetheless it still does + * count as a valid regular mapping for the page (and + * is accounted as such in page maps count). + * + * So handle this special case as if it was a normal + * page mapping ie lock CPU page table and returns + * true. + * + * For more details on device private memory see HMM + * (include/linux/hmm.h or mm/hmm.c). + */ + if (is_swap_pte(*pvmw->pte)) { + swp_entry_t entry; + + /* Handle un-addressable ZONE_DEVICE memory */ + entry = pte_to_swp_entry(*pvmw->pte); + if (!is_device_private_entry(entry)) + return false; + } else if (!pte_present(*pvmw->pte)) return false; } } From f1df765432518a30b1bcf56d1d03bdd5a1821dee Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 17 Oct 2018 17:42:10 +0100 Subject: [PATCH 0331/2608] KVM: arm64: Fix caching of host MDCR_EL2 value commit da5a3ce66b8bb51b0ea8a89f42aac153903f90fb upstream. At boot time, KVM stashes the host MDCR_EL2 value, but only does this when the kernel is not running in hyp mode (i.e. is non-VHE). In these cases, the stashed value of MDCR_EL2.HPMN happens to be zero, which can lead to CONSTRAINED UNPREDICTABLE behaviour. Since we use this value to derive the MDCR_EL2 value when switching to/from a guest, after a guest have been run, the performance counters do not behave as expected. This has been observed to result in accesses via PMXEVTYPER_EL0 and PMXEVCNTR_EL0 not affecting the relevant counters, resulting in events not being counted. In these cases, only the fixed-purpose cycle counter appears to work as expected. Fix this by always stashing the host MDCR_EL2 value, regardless of VHE. Cc: Christopher Dall Cc: James Morse Cc: Will Deacon Cc: stable@vger.kernel.org Fixes: 1e947bad0b63b351 ("arm64: KVM: Skip HYP setup when already running in HYP") Tested-by: Robin Murphy Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/arm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index d5f1d8364571..ed42b8cf6f5b 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1148,8 +1148,6 @@ static void cpu_init_hyp_mode(void *dummy) __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); __cpu_init_stage2(); - - kvm_arm_init_debug(); } static void cpu_hyp_reset(void) @@ -1173,6 +1171,8 @@ static void cpu_hyp_reinit(void) cpu_init_hyp_mode(NULL); } + kvm_arm_init_debug(); + if (vgic_present) kvm_vgic_init_cpu_hardware(); } From d6ea9f3055378e7e520264025b6cc63c88615577 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 30 Oct 2018 15:07:32 -0700 Subject: [PATCH 0332/2608] kbuild: fix kernel/bounds.c 'W=1' warning commit 6a32c2469c3fbfee8f25bcd20af647326650a6cf upstream. Building any configuration with 'make W=1' produces a warning: kernel/bounds.c:16:6: warning: no previous prototype for 'foo' [-Wmissing-prototypes] When also passing -Werror, this prevents us from building any other files. Nobody ever calls the function, but we can't make it 'static' either since we want the compiler output. Calling it 'main' instead however avoids the warning, because gcc does not insist on having a declaration for main. Link: http://lkml.kernel.org/r/20181005083313.2088252-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Reported-by: Kieran Bingham Reviewed-by: Kieran Bingham Cc: David Laight Cc: Masahiro Yamada Cc: Greg Kroah-Hartman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/bounds.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bounds.c b/kernel/bounds.c index c373e887c066..9795d75b09b2 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -13,7 +13,7 @@ #include #include -void foo(void) +int main(void) { /* The enum constants to put into include/generated/bounds.h */ DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); @@ -23,4 +23,6 @@ void foo(void) #endif DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); /* End of constants */ + + return 0; } From 8b38d82b0d278d5725e02e470f3cdd46423ba7b1 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 28 Sep 2018 11:23:40 +0200 Subject: [PATCH 0333/2608] iio: ad5064: Fix regulator handling commit 8911a43bc198877fad9f4b0246a866b26bb547ab upstream. The correct way to handle errors returned by regualtor_get() and friends is to propagate the error since that means that an regulator was specified, but something went wrong when requesting it. For handling optional regulators, e.g. when the device has an internal vref, regulator_get_optional() should be used to avoid getting the dummy regulator that the regulator core otherwise provides. Signed-off-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/dac/ad5064.c | 53 ++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/drivers/iio/dac/ad5064.c b/drivers/iio/dac/ad5064.c index 3f9399c27869..efc762da2ba8 100644 --- a/drivers/iio/dac/ad5064.c +++ b/drivers/iio/dac/ad5064.c @@ -809,6 +809,40 @@ static int ad5064_set_config(struct ad5064_state *st, unsigned int val) return ad5064_write(st, cmd, 0, val, 0); } +static int ad5064_request_vref(struct ad5064_state *st, struct device *dev) +{ + unsigned int i; + int ret; + + for (i = 0; i < ad5064_num_vref(st); ++i) + st->vref_reg[i].supply = ad5064_vref_name(st, i); + + if (!st->chip_info->internal_vref) + return devm_regulator_bulk_get(dev, ad5064_num_vref(st), + st->vref_reg); + + /* + * This assumes that when the regulator has an internal VREF + * there is only one external VREF connection, which is + * currently the case for all supported devices. + */ + st->vref_reg[0].consumer = devm_regulator_get_optional(dev, "vref"); + if (!IS_ERR(st->vref_reg[0].consumer)) + return 0; + + ret = PTR_ERR(st->vref_reg[0].consumer); + if (ret != -ENODEV) + return ret; + + /* If no external regulator was supplied use the internal VREF */ + st->use_internal_vref = true; + ret = ad5064_set_config(st, AD5064_CONFIG_INT_VREF_ENABLE); + if (ret) + dev_err(dev, "Failed to enable internal vref: %d\n", ret); + + return ret; +} + static int ad5064_probe(struct device *dev, enum ad5064_type type, const char *name, ad5064_write_func write) { @@ -829,22 +863,11 @@ static int ad5064_probe(struct device *dev, enum ad5064_type type, st->dev = dev; st->write = write; - for (i = 0; i < ad5064_num_vref(st); ++i) - st->vref_reg[i].supply = ad5064_vref_name(st, i); + ret = ad5064_request_vref(st, dev); + if (ret) + return ret; - ret = devm_regulator_bulk_get(dev, ad5064_num_vref(st), - st->vref_reg); - if (ret) { - if (!st->chip_info->internal_vref) - return ret; - st->use_internal_vref = true; - ret = ad5064_set_config(st, AD5064_CONFIG_INT_VREF_ENABLE); - if (ret) { - dev_err(dev, "Failed to enable internal vref: %d\n", - ret); - return ret; - } - } else { + if (!st->use_internal_vref) { ret = regulator_bulk_enable(ad5064_num_vref(st), st->vref_reg); if (ret) return ret; From 39b6d86b316e5d3d338d661ab6468beef1b5bb91 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 22 Sep 2018 00:58:02 +0300 Subject: [PATCH 0334/2608] iio: adc: imx25-gcq: Fix leak of device_node in mx25_gcq_setup_cfgs() commit d3fa21c73c391975488818b085b894c2980ea052 upstream. Leaving for_each_child_of_node loop we should release child device node, if it is not stored for future use. Found by Linux Driver Verification project (linuxtesting.org). JC: I'm not sending this as a quick fix as it's been wrong for years, but good to pick up for stable after the merge window. Signed-off-by: Alexey Khoroshilov Fixes: 6df2e98c3ea56 ("iio: adc: Add imx25-gcq ADC driver") Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/fsl-imx25-gcq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iio/adc/fsl-imx25-gcq.c b/drivers/iio/adc/fsl-imx25-gcq.c index ea264fa9e567..929c617db364 100644 --- a/drivers/iio/adc/fsl-imx25-gcq.c +++ b/drivers/iio/adc/fsl-imx25-gcq.c @@ -209,12 +209,14 @@ static int mx25_gcq_setup_cfgs(struct platform_device *pdev, ret = of_property_read_u32(child, "reg", ®); if (ret) { dev_err(dev, "Failed to get reg property\n"); + of_node_put(child); return ret; } if (reg >= MX25_NUM_CFGS) { dev_err(dev, "reg value is greater than the number of available configuration registers\n"); + of_node_put(child); return -EINVAL; } @@ -228,6 +230,7 @@ static int mx25_gcq_setup_cfgs(struct platform_device *pdev, if (IS_ERR(priv->vref[refp])) { dev_err(dev, "Error, trying to use external voltage reference without a vref-%s regulator.", mx25_gcq_refp_names[refp]); + of_node_put(child); return PTR_ERR(priv->vref[refp]); } priv->channel_vref_mv[reg] = @@ -240,6 +243,7 @@ static int mx25_gcq_setup_cfgs(struct platform_device *pdev, break; default: dev_err(dev, "Invalid positive reference %d\n", refp); + of_node_put(child); return -EINVAL; } @@ -254,10 +258,12 @@ static int mx25_gcq_setup_cfgs(struct platform_device *pdev, if ((refp & MX25_ADCQ_CFG_REFP_MASK) != refp) { dev_err(dev, "Invalid fsl,adc-refp property value\n"); + of_node_put(child); return -EINVAL; } if ((refn & MX25_ADCQ_CFG_REFN_MASK) != refn) { dev_err(dev, "Invalid fsl,adc-refn property value\n"); + of_node_put(child); return -EINVAL; } From c4ecff9bc4daf9b0d7d77ae8211ada97ac732079 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 24 Sep 2018 10:51:43 +0300 Subject: [PATCH 0335/2608] iio: adc: at91: fix acking DRDY irq on simple conversions commit bc1b45326223e7e890053cf6266357adfa61942d upstream. When doing simple conversions, the driver did not acknowledge the DRDY irq. If this irq status is not acked, it will be left pending, and as soon as a trigger is enabled, the irq handler will be called, it doesn't know why this status has occurred because no channel is pending, and then it will go int a irq loop and board will hang. To avoid this situation, read the LCDR after a raw conversion is done. Fixes: 0e589d5fb ("ARM: AT91: IIO: Add AT91 ADC driver.") Cc: Maxime Ripard Signed-off-by: Eugen Hristev Acked-by: Ludovic Desroches Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/at91_adc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 15109728cae7..8882b80419f6 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -279,6 +279,8 @@ static void handle_adc_eoc_trigger(int irq, struct iio_dev *idev) iio_trigger_poll(idev->trig); } else { st->last_value = at91_adc_readl(st, AT91_ADC_CHAN(st, st->chnb)); + /* Needed to ACK the DRDY interruption */ + at91_adc_readl(st, AT91_ADC_LCDR); st->done = true; wake_up_interruptible(&st->wq_data_avail); } From 28a1aaeda80f815bccb7c0dada9a0cbe86889e6b Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 24 Sep 2018 10:51:44 +0300 Subject: [PATCH 0336/2608] iio: adc: at91: fix wrong channel number in triggered buffer mode commit aea835f2dc8a682942b859179c49ad1841a6c8b9 upstream. When channels are registered, the hardware channel number is not the actual iio channel number. This is because the driver is probed with a certain number of accessible channels. Some pins are routed and some not, depending on the description of the board in the DT. Because of that, channels 0,1,2,3 can correspond to hardware channels 2,3,4,5 for example. In the buffered triggered case, we need to do the translation accordingly. Fixed the channel number to stop reading the wrong channel. Fixes: 0e589d5fb ("ARM: AT91: IIO: Add AT91 ADC driver.") Cc: Maxime Ripard Signed-off-by: Eugen Hristev Acked-by: Ludovic Desroches Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/at91_adc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 8882b80419f6..cd686179aa92 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -248,12 +248,14 @@ static irqreturn_t at91_adc_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *idev = pf->indio_dev; struct at91_adc_state *st = iio_priv(idev); + struct iio_chan_spec const *chan; int i, j = 0; for (i = 0; i < idev->masklength; i++) { if (!test_bit(i, idev->active_scan_mask)) continue; - st->buffer[j] = at91_adc_readl(st, AT91_ADC_CHAN(st, i)); + chan = idev->channels + i; + st->buffer[j] = at91_adc_readl(st, AT91_ADC_CHAN(st, chan->channel)); j++; } From 6838bb61b194aa17981ee242e331bd65b41661a1 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Sat, 22 Sep 2018 21:20:54 +0200 Subject: [PATCH 0337/2608] w1: omap-hdq: fix missing bus unregister at removal commit a007734618fee1bf35556c04fa498d41d42c7301 upstream. The bus master was not removed after unloading the module or unbinding the driver. That lead to oopses like this [ 127.842987] Unable to handle kernel paging request at virtual address bf01d04c [ 127.850646] pgd = 70e3cd9a [ 127.853698] [bf01d04c] *pgd=8f908811, *pte=00000000, *ppte=00000000 [ 127.860412] Internal error: Oops: 80000007 [#1] PREEMPT SMP ARM [ 127.866668] Modules linked in: bq27xxx_battery overlay [last unloaded: omap_hdq] [ 127.874542] CPU: 0 PID: 1022 Comm: w1_bus_master1 Not tainted 4.19.0-rc4-00001-g2d51da718324 #12 [ 127.883819] Hardware name: Generic OMAP36xx (Flattened Device Tree) [ 127.890441] PC is at 0xbf01d04c [ 127.893798] LR is at w1_search_process_cb+0x4c/0xfc [ 127.898956] pc : [] lr : [] psr: a0070013 [ 127.905609] sp : cf885f48 ip : bf01d04c fp : ddf1e11c [ 127.911132] r10: cf8fe040 r9 : c05f8d00 r8 : cf8fe040 [ 127.916656] r7 : 000000f0 r6 : cf8fe02c r5 : cf8fe000 r4 : cf8fe01c [ 127.923553] r3 : c05f8d00 r2 : 000000f0 r1 : cf8fe000 r0 : dde1ef10 [ 127.930450] Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [ 127.938018] Control: 10c5387d Table: 8f8f0019 DAC: 00000051 [ 127.944091] Process w1_bus_master1 (pid: 1022, stack limit = 0x9135699f) [ 127.951171] Stack: (0xcf885f48 to 0xcf886000) [ 127.955810] 5f40: cf8fe000 00000000 cf884000 cf8fe090 000003e8 c05f8d00 [ 127.964477] 5f60: dde5fc34 c05f9700 ddf1e100 ddf1e540 cf884000 cf8fe000 c05f9694 00000000 [ 127.973114] 5f80: dde5fc34 c01499a4 00000000 ddf1e540 c0149874 00000000 00000000 00000000 [ 127.981781] 5fa0: 00000000 00000000 00000000 c01010e8 00000000 00000000 00000000 00000000 [ 127.990447] 5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 127.999114] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 [ 128.007781] [] (w1_search_process_cb) from [] (w1_process+0x6c/0x118) [ 128.016479] [] (w1_process) from [] (kthread+0x130/0x148) [ 128.024047] [] (kthread) from [] (ret_from_fork+0x14/0x2c) [ 128.031677] Exception stack(0xcf885fb0 to 0xcf885ff8) [ 128.037017] 5fa0: 00000000 00000000 00000000 00000000 [ 128.045684] 5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 128.054351] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000 [ 128.061340] Code: bad PC value [ 128.064697] ---[ end trace af066e33c0e14119 ]--- Cc: Signed-off-by: Andreas Kemnade Signed-off-by: Greg Kroah-Hartman --- drivers/w1/masters/omap_hdq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/w1/masters/omap_hdq.c b/drivers/w1/masters/omap_hdq.c index 83fc9aab34e8..3099052e1243 100644 --- a/drivers/w1/masters/omap_hdq.c +++ b/drivers/w1/masters/omap_hdq.c @@ -763,6 +763,8 @@ static int omap_hdq_remove(struct platform_device *pdev) /* remove module dependency */ pm_runtime_disable(&pdev->dev); + w1_remove_master_device(&omap_w1_master); + return 0; } From 54341b9fa4cf5022170302e8f9ac6026546e24f4 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 15 Sep 2018 23:04:41 -0500 Subject: [PATCH 0338/2608] smb3: allow stats which track session and share reconnects to be reset commit 2c887635cd6ab3af619dc2be94e5bf8f2e172b78 upstream. Currently, "echo 0 > /proc/fs/cifs/Stats" resets all of the stats except the session and share reconnect counts. Fix it to reset those as well. CC: Stable Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_debug.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 2565cee702e4..106a715101f9 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -289,6 +289,9 @@ static ssize_t cifs_stats_proc_write(struct file *file, atomic_set(&totBufAllocCount, 0); atomic_set(&totSmBufAllocCount, 0); #endif /* CONFIG_CIFS_STATS2 */ + atomic_set(&tcpSesReconnectCount, 0); + atomic_set(&tconInfoReconnectCount, 0); + spin_lock(&GlobalMid_Lock); GlobalMaxActiveXid = 0; GlobalCurrentXid = 0; From 122f2bd8d3e0998561f0c4c51bf242c0f6267861 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 19 Oct 2018 00:45:21 -0500 Subject: [PATCH 0339/2608] smb3: do not attempt cifs operation in smb3 query info error path commit 1e77a8c204c9d1b655c61751b8ad0fde22421dbb upstream. If backupuid mount option is sent, we can incorrectly retry (on access denied on query info) with a cifs (FindFirst) operation on an smb3 mount which causes the server to force the session close. We set backup intent on open so no need for this fallback. See kernel bugzilla 201435 Signed-off-by: Steve French CC: Stable Reviewed-by: Ronnie Sahlberg Signed-off-by: Greg Kroah-Hartman --- fs/cifs/inode.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index d01cbca84701..a90a637ae79a 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -776,7 +776,15 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, } else if (rc == -EREMOTE) { cifs_create_dfs_fattr(&fattr, sb); rc = 0; - } else if (rc == -EACCES && backup_cred(cifs_sb)) { + } else if ((rc == -EACCES) && backup_cred(cifs_sb) && + (strcmp(server->vals->version_string, SMB1_VERSION_STRING) + == 0)) { + /* + * For SMB2 and later the backup intent flag is already + * sent if needed on open and there is no path based + * FindFirst operation to use to retry with + */ + srchinf = kzalloc(sizeof(struct cifs_search_info), GFP_KERNEL); if (srchinf == NULL) { From e629461b6b2d1a115779281411b49640cd9a8db8 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 28 Oct 2018 13:13:23 -0500 Subject: [PATCH 0340/2608] smb3: on kerberos mount if server doesn't specify auth type use krb5 commit 926674de6705f0f1dbf29a62fd758d0977f535d6 upstream. Some servers (e.g. Azure) do not include a spnego blob in the SMB3 negotiate protocol response, so on kerberos mounts ("sec=krb5") we can fail, as we expected the server to list its supported auth types (OIDs in the spnego blob in the negprot response). Change this so that on krb5 mounts we default to trying krb5 if the server doesn't list its supported protocol mechanisms. Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_spnego.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index b611fc2e8984..7f01c6e60791 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -147,8 +147,10 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo) sprintf(dp, ";sec=krb5"); else if (server->sec_mskerberos) sprintf(dp, ";sec=mskrb5"); - else - goto out; + else { + cifs_dbg(VFS, "unknown or missing server auth type, use krb5\n"); + sprintf(dp, ";sec=krb5"); + } dp = description + strlen(description); sprintf(dp, ";uid=0x%x", From cc4dcea8b0f76c01dd6b64f1f84032c768896bba Mon Sep 17 00:00:00 2001 From: He Zhe Date: Sun, 30 Sep 2018 00:45:50 +0800 Subject: [PATCH 0341/2608] printk: Fix panic caused by passing log_buf_len to command line commit 277fcdb2cfee38ccdbe07e705dbd4896ba0c9930 upstream. log_buf_len_setup does not check input argument before passing it to simple_strtoull. The argument would be a NULL pointer if "log_buf_len", without its value, is set in command line and thus causes the following panic. PANIC: early exception 0xe3 IP 10:ffffffffaaeacd0d error 0 cr2 0x0 [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 4.19.0-rc4-yocto-standard+ #1 [ 0.000000] RIP: 0010:_parse_integer_fixup_radix+0xd/0x70 ... [ 0.000000] Call Trace: [ 0.000000] simple_strtoull+0x29/0x70 [ 0.000000] memparse+0x26/0x90 [ 0.000000] log_buf_len_setup+0x17/0x22 [ 0.000000] do_early_param+0x57/0x8e [ 0.000000] parse_args+0x208/0x320 [ 0.000000] ? rdinit_setup+0x30/0x30 [ 0.000000] parse_early_options+0x29/0x2d [ 0.000000] ? rdinit_setup+0x30/0x30 [ 0.000000] parse_early_param+0x36/0x4d [ 0.000000] setup_arch+0x336/0x99e [ 0.000000] start_kernel+0x6f/0x4ee [ 0.000000] x86_64_start_reservations+0x24/0x26 [ 0.000000] x86_64_start_kernel+0x6f/0x72 [ 0.000000] secondary_startup_64+0xa4/0xb0 This patch adds a check to prevent the panic. Link: http://lkml.kernel.org/r/1538239553-81805-1-git-send-email-zhe.he@windriver.com Cc: stable@vger.kernel.org Cc: rostedt@goodmis.org Cc: linux-kernel@vger.kernel.org Signed-off-by: He Zhe Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Signed-off-by: Greg Kroah-Hartman --- kernel/printk/printk.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f0223a7d9ed1..7161312593dd 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1043,7 +1043,12 @@ static void __init log_buf_len_update(unsigned size) /* save requested log_buf_len since it's too early to process it */ static int __init log_buf_len_setup(char *str) { - unsigned size = memparse(str, &str); + unsigned int size; + + if (!str) + return -EINVAL; + + size = memparse(str, &str); log_buf_len_update(size); From e6b8a4d76ae25fbb26b9e72f03007406329bd021 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 18 Oct 2018 15:15:05 +0200 Subject: [PATCH 0342/2608] genirq: Fix race on spurious interrupt detection commit 746a923b863a1065ef77324e1e43f19b1a3eab5c upstream. Commit 1e77d0a1ed74 ("genirq: Sanitize spurious interrupt detection of threaded irqs") made detection of spurious interrupts work for threaded handlers by: a) incrementing a counter every time the thread returns IRQ_HANDLED, and b) checking whether that counter has increased every time the thread is woken. However for oneshot interrupts, the commit unmasks the interrupt before incrementing the counter. If another interrupt occurs right after unmasking but before the counter is incremented, that interrupt is incorrectly considered spurious: time | irq_thread() | irq_thread_fn() | action->thread_fn() | irq_finalize_oneshot() | unmask_threaded_irq() /* interrupt is unmasked */ | | /* interrupt fires, incorrectly deemed spurious */ | | atomic_inc(&desc->threads_handled); /* counter is incremented */ v This is observed with a hi3110 CAN controller receiving data at high volume (from a separate machine sending with "cangen -g 0 -i -x"): The controller signals a huge number of interrupts (hundreds of millions per day) and every second there are about a dozen which are deemed spurious. In theory with high CPU load and the presence of higher priority tasks, the number of incorrectly detected spurious interrupts might increase beyond the 99,900 threshold and cause disablement of the interrupt. In practice it just increments the spurious interrupt count. But that can cause people to waste time investigating it over and over. Fix it by moving the accounting before the invocation of irq_finalize_oneshot(). [ tglx: Folded change log update ] Fixes: 1e77d0a1ed74 ("genirq: Sanitize spurious interrupt detection of threaded irqs") Signed-off-by: Lukas Wunner Signed-off-by: Thomas Gleixner Cc: Mathias Duckeck Cc: Akshay Bhat Cc: Casey Fitzpatrick Cc: stable@vger.kernel.org # v3.16+ Link: https://lkml.kernel.org/r/1dfd8bbd16163940648045495e3e9698e63b50ad.1539867047.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- kernel/irq/manage.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 069311541577..4cd85870f00e 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -882,6 +882,9 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) local_bh_disable(); ret = action->thread_fn(action->irq, action->dev_id); + if (ret == IRQ_HANDLED) + atomic_inc(&desc->threads_handled); + irq_finalize_oneshot(desc, action); local_bh_enable(); return ret; @@ -898,6 +901,9 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc, irqreturn_t ret; ret = action->thread_fn(action->irq, action->dev_id); + if (ret == IRQ_HANDLED) + atomic_inc(&desc->threads_handled); + irq_finalize_oneshot(desc, action); return ret; } @@ -975,8 +981,6 @@ static int irq_thread(void *data) irq_thread_check_affinity(desc, action); action_ret = handler_fn(desc, action); - if (action_ret == IRQ_HANDLED) - atomic_inc(&desc->threads_handled); if (action_ret == IRQ_WAKE_THREAD) irq_wake_secondary(desc, action); From 1796b3898e51d4f435f17b8ff54458f5fd541d46 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Sep 2018 10:07:44 -0400 Subject: [PATCH 0343/2608] NFSv4.1: Fix the r/wsize checking commit 943cff67b842839f4f35364ba2db5c2d3f025d94 upstream. The intention of nfs4_session_set_rwsize() was to cap the r/wsize to the buffer sizes negotiated by the CREATE_SESSION. The initial code had a bug whereby we would not check the values negotiated by nfs_probe_fsinfo() (the assumption being that CREATE_SESSION will always negotiate buffer values that are sane w.r.t. the server's preferred r/wsizes) but would only check values set by the user in the 'mount' command. The code was changed in 4.11 to _always_ set the r/wsize, meaning that we now never use the server preferred r/wsizes. This is the regression that this patch fixes. Also rename the function to nfs4_session_limit_rwsize() in order to avoid future confusion. Fixes: 033853325fe3 (NFSv4.1 respect server's max size in CREATE_SESSION") Cc: stable@vger.kernel.org # v4.11+ Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4client.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index fb85d04fdc4c..fed9c8005c17 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -925,10 +925,10 @@ EXPORT_SYMBOL_GPL(nfs4_set_ds_client); /* * Session has been established, and the client marked ready. - * Set the mount rsize and wsize with negotiated fore channel - * attributes which will be bound checked in nfs_server_set_fsinfo. + * Limit the mount rsize, wsize and dtsize using negotiated fore + * channel attributes. */ -static void nfs4_session_set_rwsize(struct nfs_server *server) +static void nfs4_session_limit_rwsize(struct nfs_server *server) { #ifdef CONFIG_NFS_V4_1 struct nfs4_session *sess; @@ -941,9 +941,11 @@ static void nfs4_session_set_rwsize(struct nfs_server *server) server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; - if (!server->rsize || server->rsize > server_resp_sz) + if (server->dtsize > server_resp_sz) + server->dtsize = server_resp_sz; + if (server->rsize > server_resp_sz) server->rsize = server_resp_sz; - if (!server->wsize || server->wsize > server_rqst_sz) + if (server->wsize > server_rqst_sz) server->wsize = server_rqst_sz; #endif /* CONFIG_NFS_V4_1 */ } @@ -990,12 +992,12 @@ static int nfs4_server_common_setup(struct nfs_server *server, (unsigned long long) server->fsid.minor); nfs_display_fhandle(mntfh, "Pseudo-fs root FH"); - nfs4_session_set_rwsize(server); - error = nfs_probe_fsinfo(server, mntfh, fattr); if (error < 0) goto out; + nfs4_session_limit_rwsize(server); + if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) server->namelen = NFS4_MAXNAMLEN; From eecbf2c22b3f0dca05a199a8992ac2158266dfbd Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 18 Oct 2018 15:01:48 -0400 Subject: [PATCH 0344/2608] nfs: Fix a missed page unlock after pg_doio() commit fdbd1a2e4a71adcb1ae219fcfd964930d77a7f84 upstream. We must check pg_error and call error_cleanup after any call to pg_doio. Currently, we are skipping the unlock of a page if we encounter an error in nfs_pageio_complete() before handing off the work to the RPC layer. Signed-off-by: Benjamin Coddington Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pagelist.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d0543e19098a..37f20d7a26ed 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1110,6 +1110,20 @@ static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc, return ret; } +static void nfs_pageio_error_cleanup(struct nfs_pageio_descriptor *desc) +{ + u32 midx; + struct nfs_pgio_mirror *mirror; + + if (!desc->pg_error) + return; + + for (midx = 0; midx < desc->pg_mirror_count; midx++) { + mirror = &desc->pg_mirrors[midx]; + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); + } +} + int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { @@ -1160,25 +1174,11 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, return 1; out_failed: - /* - * We might have failed before sending any reqs over wire. - * Clean up rest of the reqs in mirror pg_list. - */ - if (desc->pg_error) { - struct nfs_pgio_mirror *mirror; - void (*func)(struct list_head *); - - /* remember fatal errors */ - if (nfs_error_is_fatal(desc->pg_error)) - nfs_context_set_write_error(req->wb_context, - desc->pg_error); - - func = desc->pg_completion_ops->error_cleanup; - for (midx = 0; midx < desc->pg_mirror_count; midx++) { - mirror = &desc->pg_mirrors[midx]; - func(&mirror->pg_list); - } - } + /* remember fatal errors */ + if (nfs_error_is_fatal(desc->pg_error)) + nfs_context_set_write_error(req->wb_context, + desc->pg_error); + nfs_pageio_error_cleanup(desc); return 0; } @@ -1250,6 +1250,8 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) for (midx = 0; midx < desc->pg_mirror_count; midx++) nfs_pageio_complete_mirror(desc, midx); + if (desc->pg_error < 0) + nfs_pageio_error_cleanup(desc); if (desc->pg_ops->pg_cleanup) desc->pg_ops->pg_cleanup(desc); nfs_pageio_cleanup_mirroring(desc); From eecd97e13c1553dfff1c159513b6ab041f53a70a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Oct 2018 15:54:15 -0400 Subject: [PATCH 0345/2608] nfsd: Fix an Oops in free_session() commit bb6ad5572c0022e17e846b382d7413cdcf8055be upstream. In call_xpt_users(), we delete the entry from the list, but we do not reinitialise it. This triggers the list poisoning when we later call unregister_xpt_user() in nfsd4_del_conns(). Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svc_xprt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index d16a8b423c20..ea7b5a3a53f0 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1040,7 +1040,7 @@ static void call_xpt_users(struct svc_xprt *xprt) spin_lock(&xprt->xpt_lock); while (!list_empty(&xprt->xpt_users)) { u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list); - list_del(&u->list); + list_del_init(&u->list); u->callback(u); } spin_unlock(&xprt->xpt_lock); From a45ce0e1b9cd86443406462375c4004f55f7a881 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 28 Sep 2018 20:41:48 +0300 Subject: [PATCH 0346/2608] lockd: fix access beyond unterminated strings in prints commit 93f38b6fae0ea8987e22d9e6c38f8dfdccd867ee upstream. printk format used %*s instead of %.*s, so hostname_len does not limit the number of bytes accessed from hostname. Signed-off-by: Amir Goldstein Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/lockd/host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 0d4e590e0549..c4504ed9f680 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -341,7 +341,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, }; struct lockd_net *ln = net_generic(net, lockd_net_id); - dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, + dprintk("lockd: %s(host='%.*s', vers=%u, proto=%s)\n", __func__, (int)hostname_len, hostname, rqstp->rq_vers, (rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp")); From a0a4629065e4cc4a79b91d9942afb5565d86b462 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 3 Oct 2018 11:43:59 -0500 Subject: [PATCH 0347/2608] dm ioctl: harden copy_params()'s copy_from_user() from malicious users commit 800a7340ab7dd667edf95e74d8e4f23a17e87076 upstream. In copy_params(), the struct 'dm_ioctl' is first copied from the user space buffer 'user' to 'param_kernel' and the field 'data_size' is checked against 'minimum_data_size' (size of 'struct dm_ioctl' payload up to its 'data' member). If the check fails, an error code EINVAL will be returned. Otherwise, param_kernel->data_size is used to do a second copy, which copies from the same user-space buffer to 'dmi'. After the second copy, only 'dmi->data_size' is checked against 'param_kernel->data_size'. Given that the buffer 'user' resides in the user space, a malicious user-space process can race to change the content in the buffer between the two copies. This way, the attacker can inject inconsistent data into 'dmi' (versus previously validated 'param_kernel'). Fix redundant copying of 'minimum_data_size' from user-space buffer by using the first copy stored in 'param_kernel'. Also remove the 'data_size' check after the second copy because it is now unnecessary. Cc: stable@vger.kernel.org Signed-off-by: Wenwen Wang Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-ioctl.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index e52676fa9832..ca948155191a 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1719,8 +1719,7 @@ static void free_params(struct dm_ioctl *param, size_t param_size, int param_fla } static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel, - int ioctl_flags, - struct dm_ioctl **param, int *param_flags) + int ioctl_flags, struct dm_ioctl **param, int *param_flags) { struct dm_ioctl *dmi; int secure_data; @@ -1761,18 +1760,13 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern *param_flags |= DM_PARAMS_MALLOC; - if (copy_from_user(dmi, user, param_kernel->data_size)) - goto bad; + /* Copy from param_kernel (which was already copied from user) */ + memcpy(dmi, param_kernel, minimum_data_size); + if (copy_from_user(&dmi->data, (char __user *)user + minimum_data_size, + param_kernel->data_size - minimum_data_size)) + goto bad; data_copied: - /* - * Abort if something changed the ioctl data while it was being copied. - */ - if (dmi->data_size != param_kernel->data_size) { - DMERR("rejecting ioctl: data size modified while processing parameters"); - goto bad; - } - /* Wipe the user buffer so we do not return it to userspace */ if (secure_data && clear_user(user, param_kernel->data_size)) goto bad; From 229cbc6123b4b1d2d46956a869e6dbf6905265a4 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 17 Oct 2018 18:05:07 +0900 Subject: [PATCH 0348/2608] dm zoned: fix metadata block ref counting commit 33c2865f8d011a2ca9f67124ddab9dc89382e9f1 upstream. Since the ref field of struct dmz_mblock is always used with the spinlock of struct dmz_metadata locked, there is no need to use an atomic_t type. Change the type of the ref field to an unsigne integer. Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-zoned-metadata.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 70485de37b66..e1dce7f8f4fd 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -99,7 +99,7 @@ struct dmz_mblock { struct rb_node node; struct list_head link; sector_t no; - atomic_t ref; + unsigned int ref; unsigned long state; struct page *page; void *data; @@ -296,7 +296,7 @@ static struct dmz_mblock *dmz_alloc_mblock(struct dmz_metadata *zmd, RB_CLEAR_NODE(&mblk->node); INIT_LIST_HEAD(&mblk->link); - atomic_set(&mblk->ref, 0); + mblk->ref = 0; mblk->state = 0; mblk->no = mblk_no; mblk->data = page_address(mblk->page); @@ -397,7 +397,7 @@ static struct dmz_mblock *dmz_fetch_mblock(struct dmz_metadata *zmd, return NULL; spin_lock(&zmd->mblk_lock); - atomic_inc(&mblk->ref); + mblk->ref++; set_bit(DMZ_META_READING, &mblk->state); dmz_insert_mblock(zmd, mblk); spin_unlock(&zmd->mblk_lock); @@ -484,7 +484,8 @@ static void dmz_release_mblock(struct dmz_metadata *zmd, spin_lock(&zmd->mblk_lock); - if (atomic_dec_and_test(&mblk->ref)) { + mblk->ref--; + if (mblk->ref == 0) { if (test_bit(DMZ_META_ERROR, &mblk->state)) { rb_erase(&mblk->node, &zmd->mblk_rbtree); dmz_free_mblock(zmd, mblk); @@ -511,7 +512,8 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd, mblk = dmz_lookup_mblock(zmd, mblk_no); if (mblk) { /* Cache hit: remove block from LRU list */ - if (atomic_inc_return(&mblk->ref) == 1 && + mblk->ref++; + if (mblk->ref == 1 && !test_bit(DMZ_META_DIRTY, &mblk->state)) list_del_init(&mblk->link); } @@ -753,7 +755,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) spin_lock(&zmd->mblk_lock); clear_bit(DMZ_META_DIRTY, &mblk->state); - if (atomic_read(&mblk->ref) == 0) + if (mblk->ref == 0) list_add_tail(&mblk->link, &zmd->mblk_lru_list); spin_unlock(&zmd->mblk_lock); } @@ -2308,7 +2310,7 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd) mblk = list_first_entry(&zmd->mblk_dirty_list, struct dmz_mblock, link); dmz_dev_warn(zmd->dev, "mblock %llu still in dirty list (ref %u)", - (u64)mblk->no, atomic_read(&mblk->ref)); + (u64)mblk->no, mblk->ref); list_del_init(&mblk->link); rb_erase(&mblk->node, &zmd->mblk_rbtree); dmz_free_mblock(zmd, mblk); @@ -2326,8 +2328,8 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd) root = &zmd->mblk_rbtree; rbtree_postorder_for_each_entry_safe(mblk, next, root, node) { dmz_dev_warn(zmd->dev, "mblock %llu ref %u still in rbtree", - (u64)mblk->no, atomic_read(&mblk->ref)); - atomic_set(&mblk->ref, 0); + (u64)mblk->no, mblk->ref); + mblk->ref = 0; dmz_free_mblock(zmd, mblk); } From 37531246704e708a5b3ee4a9498493cc4ff0c4bb Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 17 Oct 2018 18:05:08 +0900 Subject: [PATCH 0349/2608] dm zoned: fix various dmz_get_mblock() issues commit 3d4e738311327bc4ba1d55fbe2f1da3de4a475f9 upstream. dmz_fetch_mblock() called from dmz_get_mblock() has a race since the allocation of the new metadata block descriptor and its insertion in the cache rbtree with the READING state is not atomic. Two different contexts requesting the same block may end up each adding two different descriptors of the same block to the cache. Another problem for this function is that the BIO for processing the block read is allocated after the metadata block descriptor is inserted in the cache rbtree. If the BIO allocation fails, the metadata block descriptor is freed without first being removed from the rbtree. Fix the first problem by checking again if the requested block is not in the cache right before inserting the newly allocated descriptor, atomically under the mblk_lock spinlock. The second problem is fixed by simply allocating the BIO before inserting the new block in the cache. Finally, since dmz_fetch_mblock() also increments a block reference counter, rename the function to dmz_get_mblock_slow(). To be symmetric and clear, also rename dmz_lookup_mblock() to dmz_get_mblock_fast() and increment the block reference counter directly in that function rather than in dmz_get_mblock(). Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-zoned-metadata.c | 66 +++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index e1dce7f8f4fd..34968ca6b84a 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -339,10 +339,11 @@ static void dmz_insert_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk) } /* - * Lookup a metadata block in the rbtree. + * Lookup a metadata block in the rbtree. If the block is found, increment + * its reference count. */ -static struct dmz_mblock *dmz_lookup_mblock(struct dmz_metadata *zmd, - sector_t mblk_no) +static struct dmz_mblock *dmz_get_mblock_fast(struct dmz_metadata *zmd, + sector_t mblk_no) { struct rb_root *root = &zmd->mblk_rbtree; struct rb_node *node = root->rb_node; @@ -350,8 +351,17 @@ static struct dmz_mblock *dmz_lookup_mblock(struct dmz_metadata *zmd, while (node) { mblk = container_of(node, struct dmz_mblock, node); - if (mblk->no == mblk_no) + if (mblk->no == mblk_no) { + /* + * If this is the first reference to the block, + * remove it from the LRU list. + */ + mblk->ref++; + if (mblk->ref == 1 && + !test_bit(DMZ_META_DIRTY, &mblk->state)) + list_del_init(&mblk->link); return mblk; + } node = (mblk->no < mblk_no) ? node->rb_left : node->rb_right; } @@ -382,32 +392,47 @@ static void dmz_mblock_bio_end_io(struct bio *bio) } /* - * Read a metadata block from disk. + * Read an uncached metadata block from disk and add it to the cache. */ -static struct dmz_mblock *dmz_fetch_mblock(struct dmz_metadata *zmd, - sector_t mblk_no) +static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd, + sector_t mblk_no) { - struct dmz_mblock *mblk; + struct dmz_mblock *mblk, *m; sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no; struct bio *bio; - /* Get block and insert it */ + /* Get a new block and a BIO to read it */ mblk = dmz_alloc_mblock(zmd, mblk_no); if (!mblk) return NULL; - spin_lock(&zmd->mblk_lock); - mblk->ref++; - set_bit(DMZ_META_READING, &mblk->state); - dmz_insert_mblock(zmd, mblk); - spin_unlock(&zmd->mblk_lock); - bio = bio_alloc(GFP_NOIO, 1); if (!bio) { dmz_free_mblock(zmd, mblk); return NULL; } + spin_lock(&zmd->mblk_lock); + + /* + * Make sure that another context did not start reading + * the block already. + */ + m = dmz_get_mblock_fast(zmd, mblk_no); + if (m) { + spin_unlock(&zmd->mblk_lock); + dmz_free_mblock(zmd, mblk); + bio_put(bio); + return m; + } + + mblk->ref++; + set_bit(DMZ_META_READING, &mblk->state); + dmz_insert_mblock(zmd, mblk); + + spin_unlock(&zmd->mblk_lock); + + /* Submit read BIO */ bio->bi_iter.bi_sector = dmz_blk2sect(block); bio_set_dev(bio, zmd->dev->bdev); bio->bi_private = mblk; @@ -509,19 +534,12 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd, /* Check rbtree */ spin_lock(&zmd->mblk_lock); - mblk = dmz_lookup_mblock(zmd, mblk_no); - if (mblk) { - /* Cache hit: remove block from LRU list */ - mblk->ref++; - if (mblk->ref == 1 && - !test_bit(DMZ_META_DIRTY, &mblk->state)) - list_del_init(&mblk->link); - } + mblk = dmz_get_mblock_fast(zmd, mblk_no); spin_unlock(&zmd->mblk_lock); if (!mblk) { /* Cache miss: read the block from disk */ - mblk = dmz_fetch_mblock(zmd, mblk_no); + mblk = dmz_get_mblock_slow(zmd, mblk_no); if (!mblk) return ERR_PTR(-ENOMEM); } From dfed53c894d5578031175d11bc56e3031ce0991b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 19 Oct 2018 06:12:50 +0000 Subject: [PATCH 0350/2608] powerpc/msi: Fix compile error on mpc83xx commit 0f99153def98134403c9149128e59d3e1786cf04 upstream. mpic_get_primary_version() is not defined when not using MPIC. The compile error log like: arch/powerpc/sysdev/built-in.o: In function `fsl_of_msi_probe': fsl_msi.c:(.text+0x150c): undefined reference to `fsl_mpic_primary_get_version' Signed-off-by: Jia Hongtao Signed-off-by: Scott Wood Reported-by: Radu Rendec Fixes: 807d38b73b6 ("powerpc/mpic: Add get_version API both for internal and external use") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/mpic.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index fad8ddd697ac..0abf2e7fd222 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -393,7 +393,14 @@ extern struct bus_type mpic_subsys; #define MPIC_REGSET_TSI108 MPIC_REGSET(1) /* Tsi108/109 PIC */ /* Get the version of primary MPIC */ +#ifdef CONFIG_MPIC extern u32 fsl_mpic_primary_get_version(void); +#else +static inline u32 fsl_mpic_primary_get_version(void) +{ + return 0; +} +#endif /* Allocate the controller structure and setup the linux irq descs * for the range if interrupts passed in. No HW initialization is From d70d2e3c4ab8c29330b91b210514e32bee13ec1e Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sat, 27 Oct 2018 01:46:34 +0300 Subject: [PATCH 0351/2608] MIPS: OCTEON: fix out of bounds array access on CN68XX commit c0fae7e2452b90c31edd2d25eb3baf0c76b400ca upstream. The maximum number of interfaces is returned by cvmx_helper_get_number_of_interfaces(), and the value is used to access interface_port_count[]. When CN68XX support was added, we forgot to increase the array size. Fix that. Fixes: 2c8c3f0201333 ("MIPS: Octeon: Support additional interfaces on CN68XX") Signed-off-by: Aaro Koskinen Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20949/ Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/cavium-octeon/executive/cvmx-helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper.c b/arch/mips/cavium-octeon/executive/cvmx-helper.c index f24be0b5db50..c683c369bca5 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-helper.c +++ b/arch/mips/cavium-octeon/executive/cvmx-helper.c @@ -67,7 +67,7 @@ void (*cvmx_override_pko_queue_priority) (int pko_port, void (*cvmx_override_ipd_port_setup) (int ipd_port); /* Port count per interface */ -static int interface_port_count[5]; +static int interface_port_count[9]; /** * Return the number of interfaces the chip has. Each interface From 34e0ab871c813d9f8939f30198a972d9a476c20c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 1 Oct 2018 12:42:49 +0100 Subject: [PATCH 0352/2608] iommu/arm-smmu: Ensure that page-table updates are visible before TLBI commit 7d321bd3542500caf125249f44dc37cb4e738013 upstream. The IO-pgtable code relies on the driver TLB invalidation callbacks to ensure that all page-table updates are visible to the IOMMU page-table walker. In the case that the page-table walker is cache-coherent, we cannot rely on an implicit DSB from the DMA-mapping code, so we must ensure that we execute a DSB in our tlb_add_flush() callback prior to triggering the invalidation. Cc: Cc: Robin Murphy Fixes: 2df7a25ce4a7 ("iommu/arm-smmu: Clean up DMA API usage") Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/arm-smmu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 2c436376f13e..15b5856475fc 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -475,6 +475,9 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + wmb(); + if (stage1) { reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; @@ -516,6 +519,9 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, struct arm_smmu_domain *smmu_domain = cookie; void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu); + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + wmb(); + writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); } From e7f6b41ad2aab1453f0ff64b014027209e8ea632 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 3 Oct 2018 13:21:07 +0100 Subject: [PATCH 0353/2608] TC: Set DMA masks for devices commit 3f2aa244ee1a0d17ed5b6c86564d2c1b24d1c96b upstream. Fix a TURBOchannel support regression with commit 205e1b7f51e4 ("dma-mapping: warn when there is no coherent_dma_mask") that caused coherent DMA allocations to produce a warning such as: defxx: v1.11 2014/07/01 Lawrence V. Stefani and others tc1: DEFTA at MMIO addr = 0x1e900000, IRQ = 20, Hardware addr = 08-00-2b-a3-a3-29 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at ./include/linux/dma-mapping.h:516 dfx_dev_register+0x670/0x678 Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.19.0-rc6 #2 Stack : ffffffff8009ffc0 fffffffffffffec0 0000000000000000 ffffffff80647650 0000000000000000 0000000000000000 ffffffff806f5f80 ffffffffffffffff 0000000000000000 0000000000000000 0000000000000001 ffffffff8065d4e8 98000000031b6300 ffffffff80563478 ffffffff805685b0 ffffffffffffffff 0000000000000000 ffffffff805d6720 0000000000000204 ffffffff80388df8 0000000000000000 0000000000000009 ffffffff8053efd0 ffffffff806657d0 0000000000000000 ffffffff803177f8 0000000000000000 ffffffff806d0000 9800000003078000 980000000307b9e0 000000001e900000 ffffffff80067940 0000000000000000 ffffffff805d6720 0000000000000204 ffffffff80388df8 ffffffff805176c0 ffffffff8004dc78 0000000000000000 ffffffff80067940 ... Call Trace: [] show_stack+0xa0/0x130 [] __warn+0x128/0x170 ---[ end trace b1d1e094f67f3bb2 ]--- This is because the TURBOchannel bus driver fails to set the coherent DMA mask for devices enumerated. Set the regular and coherent DMA masks for TURBOchannel devices then, observing that the bus protocol supports a 34-bit (16GiB) DMA address space, by interpreting the value presented in the address cycle across the 32 `ad' lines as a 32-bit word rather than byte address[1]. The architectural size of the TURBOchannel DMA address space exceeds the maximum amount of RAM any actual TURBOchannel system in existence may have, hence both masks are the same. This removes the warning shown above. References: [1] "TURBOchannel Hardware Specification", EK-369AA-OD-007B, Digital Equipment Corporation, January 1993, Section "DMA", pp. 1-15 -- 1-17 Signed-off-by: Maciej W. Rozycki Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20835/ Fixes: 205e1b7f51e4 ("dma-mapping: warn when there is no coherent_dma_mask") Cc: stable@vger.kernel.org # 4.16+ Cc: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- drivers/tc/tc.c | 8 +++++++- include/linux/tc.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/tc/tc.c b/drivers/tc/tc.c index 3be9519654e5..cf3fad2cb871 100644 --- a/drivers/tc/tc.c +++ b/drivers/tc/tc.c @@ -2,7 +2,7 @@ * TURBOchannel bus services. * * Copyright (c) Harald Koerfgen, 1998 - * Copyright (c) 2001, 2003, 2005, 2006 Maciej W. Rozycki + * Copyright (c) 2001, 2003, 2005, 2006, 2018 Maciej W. Rozycki * Copyright (c) 2005 James Simmons * * This file is subject to the terms and conditions of the GNU @@ -10,6 +10,7 @@ * directory of this archive for more details. */ #include +#include #include #include #include @@ -92,6 +93,11 @@ static void __init tc_bus_add_devices(struct tc_bus *tbus) tdev->dev.bus = &tc_bus_type; tdev->slot = slot; + /* TURBOchannel has 34-bit DMA addressing (16GiB space). */ + tdev->dma_mask = DMA_BIT_MASK(34); + tdev->dev.dma_mask = &tdev->dma_mask; + tdev->dev.coherent_dma_mask = DMA_BIT_MASK(34); + for (i = 0; i < 8; i++) { tdev->firmware[i] = readb(module + offset + TC_FIRM_VER + 4 * i); diff --git a/include/linux/tc.h b/include/linux/tc.h index f92511e57cdb..a60639f37963 100644 --- a/include/linux/tc.h +++ b/include/linux/tc.h @@ -84,6 +84,7 @@ struct tc_dev { device. */ struct device dev; /* Generic device interface. */ struct resource resource; /* Address space of this device. */ + u64 dma_mask; /* DMA addressable range. */ char vendor[9]; char name[9]; char firmware[9]; From fdff1cf08931439f5807dcf336947c2adaeec37d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 8 Oct 2018 15:08:27 -0400 Subject: [PATCH 0354/2608] media: v4l2-tpg: fix kernel oops when enabling HFLIP and OSD commit 250854eed5d45a73d81e4137dfd85180af6f2ec3 upstream. When the OSD is on (i.e. vivid displays text on top of the test pattern), and you enable hflip, then the driver crashes. The cause turned out to be a division of a negative number by an unsigned value. You expect that -8 / 2U would be -4, but in reality it is 2147483644 :-( Fixes: 3e14e7a82c1ef ("vivid-tpg: add hor/vert downsampling support to tpg_gen_text") Signed-off-by: Hans Verkuil Reported-by: Mauro Carvalho Chehab Cc: # for v4.1 and up Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/common/v4l2-tpg/v4l2-tpg-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c index a772976cfe26..a1aacd6fb96f 100644 --- a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c +++ b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c @@ -1765,7 +1765,7 @@ typedef struct { u16 __; u8 _; } __packed x24; pos[7] = (chr & (0x01 << 0) ? fg : bg); \ } \ \ - pos += (tpg->hflip ? -8 : 8) / hdiv; \ + pos += (tpg->hflip ? -8 : 8) / (int)hdiv; \ } \ } \ } while (0) From 8305d98c58d238b82c0def1e4395724381c58c9e Mon Sep 17 00:00:00 2001 From: He Zhe Date: Fri, 17 Aug 2018 22:42:28 +0800 Subject: [PATCH 0355/2608] kgdboc: Passing ekgdboc to command line causes panic commit 1bd54d851f50dea6af30c3e6ff4f3e9aab5558f9 upstream. kgdboc_option_setup does not check input argument before passing it to strlen. The argument would be a NULL pointer if "ekgdboc", without its value, is set in command line and thus cause the following panic. PANIC: early exception 0xe3 IP 10:ffffffff8fbbb620 error 0 cr2 0x0 [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 4.18-rc8+ #1 [ 0.000000] RIP: 0010:strlen+0x0/0x20 ... [ 0.000000] Call Trace [ 0.000000] ? kgdboc_option_setup+0x9/0xa0 [ 0.000000] ? kgdboc_early_init+0x6/0x1b [ 0.000000] ? do_early_param+0x4d/0x82 [ 0.000000] ? parse_args+0x212/0x330 [ 0.000000] ? rdinit_setup+0x26/0x26 [ 0.000000] ? parse_early_options+0x20/0x23 [ 0.000000] ? rdinit_setup+0x26/0x26 [ 0.000000] ? parse_early_param+0x2d/0x39 [ 0.000000] ? setup_arch+0x2f7/0xbf4 [ 0.000000] ? start_kernel+0x5e/0x4c2 [ 0.000000] ? load_ucode_bsp+0x113/0x12f [ 0.000000] ? secondary_startup_64+0xa5/0xb0 This patch adds a check to prevent the panic. Cc: stable@vger.kernel.org Cc: jason.wessel@windriver.com Cc: gregkh@linuxfoundation.org Cc: jslaby@suse.com Signed-off-by: He Zhe Reviewed-by: Daniel Thompson Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/kgdboc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index a260cde743e2..2db68dfe497d 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -133,6 +133,11 @@ static void kgdboc_unregister_kbd(void) static int kgdboc_option_setup(char *opt) { + if (!opt) { + pr_err("kgdboc: config string not provided\n"); + return -EINVAL; + } + if (strlen(opt) >= MAX_CONFIG_LEN) { printk(KERN_ERR "kgdboc: config string too long\n"); return -ENOSPC; From 034680fcf1e96977d0e80ff8a6ceb9c1b5326bac Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 8 Nov 2018 08:35:06 +0100 Subject: [PATCH 0356/2608] xen: fix xen_qlock_wait() commit d3132b3860f6cf35ff7609a76bbcdbb814bd027c upstream. Commit a856531951dc80 ("xen: make xen_qlock_wait() nestable") introduced a regression for Xen guests running fully virtualized (HVM or PVH mode). The Xen hypervisor wouldn't return from the poll hypercall with interrupts disabled in case of an interrupt (for PV guests it does). So instead of disabling interrupts in xen_qlock_wait() use a nesting counter to avoid calling xen_clear_irq_pending() in case xen_qlock_wait() is nested. Fixes: a856531951dc80 ("xen: make xen_qlock_wait() nestable") Cc: stable@vger.kernel.org Reported-by: Sander Eikelenboom Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Tested-by: Sander Eikelenboom Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/spinlock.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 3243fe1b6ea4..2527540051ff 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -20,6 +21,7 @@ static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; static DEFINE_PER_CPU(char *, irq_name); +static DEFINE_PER_CPU(atomic_t, xen_qlock_wait_nest); static bool xen_pvspin = true; #include @@ -40,25 +42,25 @@ static void xen_qlock_kick(int cpu) */ static void xen_qlock_wait(u8 *byte, u8 val) { - unsigned long flags; int irq = __this_cpu_read(lock_kicker_irq); + atomic_t *nest_cnt = this_cpu_ptr(&xen_qlock_wait_nest); /* If kicker interrupts not initialized yet, just spin */ if (irq == -1 || in_nmi()) return; - /* Guard against reentry. */ - local_irq_save(flags); + /* Detect reentry. */ + atomic_inc(nest_cnt); - /* If irq pending already clear it. */ - if (xen_test_irq_pending(irq)) { + /* If irq pending already and no nested call clear it. */ + if (atomic_read(nest_cnt) == 1 && xen_test_irq_pending(irq)) { xen_clear_irq_pending(irq); } else if (READ_ONCE(*byte) == val) { /* Block until irq becomes pending (or a spurious wakeup) */ xen_poll_irq(irq); } - local_irq_restore(flags); + atomic_dec(nest_cnt); } static irqreturn_t dummy_handler(int irq, void *dev_id) From 4e6d30de2e44fab869078194a6b1686659579a70 Mon Sep 17 00:00:00 2001 From: Manjunath Patil Date: Tue, 30 Oct 2018 09:49:21 -0700 Subject: [PATCH 0357/2608] xen-blkfront: fix kernel panic with negotiate_mq error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6cc4a0863c9709c512280c64e698d68443ac8053 upstream. info->nr_rings isn't adjusted in case of ENOMEM error from negotiate_mq(). This leads to kernel panic in error path. Typical call stack involving panic - #8 page_fault at ffffffff8175936f [exception RIP: blkif_free_ring+33] RIP: ffffffffa0149491 RSP: ffff8804f7673c08 RFLAGS: 00010292 ... #9 blkif_free at ffffffffa0149aaa [xen_blkfront] #10 talk_to_blkback at ffffffffa014c8cd [xen_blkfront] #11 blkback_changed at ffffffffa014ea8b [xen_blkfront] #12 xenbus_otherend_changed at ffffffff81424670 #13 backend_changed at ffffffff81426dc3 #14 xenwatch_thread at ffffffff81422f29 #15 kthread at ffffffff810abe6a #16 ret_from_fork at ffffffff81754078 Cc: stable@vger.kernel.org Fixes: 7ed8ce1c5fc7 ("xen-blkfront: move negotiate_mq to cover all cases of new VBDs") Signed-off-by: Manjunath Patil Acked-by: Roger Pau Monné Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/block/xen-blkfront.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 4b2dcb65b011..32ac5f551e55 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1910,6 +1910,7 @@ static int negotiate_mq(struct blkfront_info *info) info->rinfo = kzalloc(sizeof(struct blkfront_ring_info) * info->nr_rings, GFP_KERNEL); if (!info->rinfo) { xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure"); + info->nr_rings = 0; return -ENOMEM; } From 1f1eb8444ba04c530c2e6f024a77e94782a6279f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 13 Sep 2018 23:22:40 -0400 Subject: [PATCH 0358/2608] media: em28xx: use a default format if TRY_FMT fails commit f823ce2a1202d47110a7ef86b65839f0be8adc38 upstream. Follow the V4L2 spec, as warned by v4l2-compliance: warn: v4l2-test-formats.cpp(732): TRY_FMT cannot handle an invalid pixelformat. warn: v4l2-test-formats.cpp(733): This may or may not be a problem. For more information see: warn: v4l2-test-formats.cpp(734): http://www.mail-archive.com/linux-media@vger.kernel.org/msg56550.html Cc: stable@vger.kernel.org Fixes: bddcf63313c6 ("V4L/DVB (9927): em28xx: use a more standard way to specify video formats") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/em28xx/em28xx-video.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/em28xx/em28xx-video.c b/drivers/media/usb/em28xx/em28xx-video.c index 8d253a5df0a9..73d8ae3d9bcf 100644 --- a/drivers/media/usb/em28xx/em28xx-video.c +++ b/drivers/media/usb/em28xx/em28xx-video.c @@ -1445,9 +1445,9 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv, fmt = format_by_fourcc(f->fmt.pix.pixelformat); if (!fmt) { - em28xx_videodbg("Fourcc format (%08x) invalid.\n", - f->fmt.pix.pixelformat); - return -EINVAL; + fmt = &format[0]; + em28xx_videodbg("Fourcc format (%08x) invalid. Using default (%08x).\n", + f->fmt.pix.pixelformat, fmt->fourcc); } if (dev->board.is_em2800) { From 3dfd975bb1c360932ec718bf83f65bad594e964c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 13 Sep 2018 16:49:51 -0400 Subject: [PATCH 0359/2608] media: tvp5150: avoid going past array on v4l2_querymenu() commit 5c4c4505b716cb782ad7263091edc466c4d1fbd4 upstream. The parameters of v4l2_ctrl_new_std_menu_items() are tricky: instead of the number of possible values, it requires the number of the maximum value. In other words, the ARRAY_SIZE() value should be decremented, otherwise it will go past the array bounds, as warned by KASAN: [ 279.839688] BUG: KASAN: global-out-of-bounds in v4l2_querymenu+0x10d/0x180 [videodev] [ 279.839709] Read of size 8 at addr ffffffffc10a4cb0 by task v4l2-compliance/16676 [ 279.839736] CPU: 1 PID: 16676 Comm: v4l2-compliance Not tainted 4.18.0-rc2+ #120 [ 279.839741] Hardware name: /NUC5i7RYB, BIOS RYBDWi35.86A.0364.2017.0511.0949 05/11/2017 [ 279.839743] Call Trace: [ 279.839758] dump_stack+0x71/0xab [ 279.839807] ? v4l2_querymenu+0x10d/0x180 [videodev] [ 279.839817] print_address_description+0x1c9/0x270 [ 279.839863] ? v4l2_querymenu+0x10d/0x180 [videodev] [ 279.839871] kasan_report+0x237/0x360 [ 279.839918] v4l2_querymenu+0x10d/0x180 [videodev] [ 279.839964] __video_do_ioctl+0x2c8/0x590 [videodev] [ 279.840011] ? copy_overflow+0x20/0x20 [videodev] [ 279.840020] ? avc_ss_reset+0xa0/0xa0 [ 279.840028] ? check_stack_object+0x21/0x60 [ 279.840036] ? __check_object_size+0xe7/0x240 [ 279.840080] video_usercopy+0xed/0x730 [videodev] [ 279.840123] ? copy_overflow+0x20/0x20 [videodev] [ 279.840167] ? v4l_enumstd+0x40/0x40 [videodev] [ 279.840177] ? __handle_mm_fault+0x9f9/0x1ba0 [ 279.840186] ? __pmd_alloc+0x2c0/0x2c0 [ 279.840193] ? __vfs_write+0xb6/0x350 [ 279.840200] ? kernel_read+0xa0/0xa0 [ 279.840244] ? video_usercopy+0x730/0x730 [videodev] [ 279.840284] v4l2_ioctl+0xa1/0xb0 [videodev] [ 279.840295] do_vfs_ioctl+0x117/0x8a0 [ 279.840303] ? selinux_file_ioctl+0x211/0x2f0 [ 279.840313] ? ioctl_preallocate+0x120/0x120 [ 279.840319] ? selinux_capable+0x20/0x20 [ 279.840332] ksys_ioctl+0x70/0x80 [ 279.840342] __x64_sys_ioctl+0x3d/0x50 [ 279.840351] do_syscall_64+0x6d/0x1c0 [ 279.840361] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 279.840367] RIP: 0033:0x7fdfb46275d7 [ 279.840369] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48 [ 279.840474] RSP: 002b:00007ffee1179038 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [ 279.840483] RAX: ffffffffffffffda RBX: 00007ffee1179180 RCX: 00007fdfb46275d7 [ 279.840488] RDX: 00007ffee11790c0 RSI: 00000000c02c5625 RDI: 0000000000000003 [ 279.840493] RBP: 0000000000000002 R08: 0000000000000020 R09: 00000000009f0902 [ 279.840497] R10: 0000000000000000 R11: 0000000000000202 R12: 00007ffee117a5a0 [ 279.840501] R13: 00007ffee11790c0 R14: 0000000000000002 R15: 0000000000000000 [ 279.840515] The buggy address belongs to the variable: [ 279.840535] tvp5150_test_patterns+0x10/0xffffffffffffe360 [tvp5150] Fixes: c43875f66140 ("[media] tvp5150: replace MEDIA_ENT_F_CONN_TEST by a control") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/tvp5150.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index 59b0c1fce9be..4d3e97f97c76 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -1530,7 +1530,7 @@ static int tvp5150_probe(struct i2c_client *c, 27000000, 1, 27000000); v4l2_ctrl_new_std_menu_items(&core->hdl, &tvp5150_ctrl_ops, V4L2_CID_TEST_PATTERN, - ARRAY_SIZE(tvp5150_test_patterns), + ARRAY_SIZE(tvp5150_test_patterns) - 1, 0, 0, tvp5150_test_patterns); sd->ctrl_handler = &core->hdl; if (core->hdl.error) { From a4e311ed2390394d32806d5fcca5542505f5f47f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 14 Sep 2018 00:20:21 -0400 Subject: [PATCH 0360/2608] media: em28xx: fix input name for Terratec AV 350 commit 15644bfa195bd166d0a5ed76ae2d587f719c3dac upstream. Instead of using a register value, use an AMUX name, as otherwise VIDIOC_G_AUDIO would fail. Cc: stable@vger.kernel.org Fixes: 766ed64de554 ("V4L/DVB (11827): Add support for Terratec Grabster AV350") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/em28xx/em28xx-cards.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/em28xx/em28xx-cards.c b/drivers/media/usb/em28xx/em28xx-cards.c index 11a59854a0a6..9747e23aad27 100644 --- a/drivers/media/usb/em28xx/em28xx-cards.c +++ b/drivers/media/usb/em28xx/em28xx-cards.c @@ -2112,13 +2112,13 @@ struct em28xx_board em28xx_boards[] = { .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, - .amux = EM28XX_AUDIO_SRC_LINE, + .amux = EM28XX_AMUX_LINE_IN, .gpio = terratec_av350_unmute_gpio, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, - .amux = EM28XX_AUDIO_SRC_LINE, + .amux = EM28XX_AMUX_LINE_IN, .gpio = terratec_av350_unmute_gpio, } }, }, From 453a8a459cc2cd6c975449c04e0720fe118e4056 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 13 Sep 2018 22:46:29 -0400 Subject: [PATCH 0361/2608] media: em28xx: make v4l2-compliance happier by starting sequence on zero commit afeaade90db4c5dab93f326d9582be1d5954a198 upstream. The v4l2-compliance tool complains if a video doesn't start with a zero sequence number. While this shouldn't cause any real problem for apps, let's make it happier, in order to better check the v4l2-compliance differences before and after patchsets. This is actually an old issue. It is there since at least its videobuf2 conversion, e. g. changeset 3829fadc461 ("[media] em28xx: convert to videobuf2"), if VB1 wouldn't suffer from the same issue. Cc: stable@vger.kernel.org Fixes: d3829fadc461 ("[media] em28xx: convert to videobuf2") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/em28xx/em28xx-video.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/usb/em28xx/em28xx-video.c b/drivers/media/usb/em28xx/em28xx-video.c index 73d8ae3d9bcf..92a74bc34527 100644 --- a/drivers/media/usb/em28xx/em28xx-video.c +++ b/drivers/media/usb/em28xx/em28xx-video.c @@ -900,6 +900,8 @@ static int em28xx_enable_analog_tuner(struct em28xx *dev) if (!mdev || !v4l2->decoder) return 0; + dev->v4l2->field_count = 0; + /* * This will find the tuner that is connected into the decoder. * Technically, this is not 100% correct, as the device may be From baf1746da59e310f8e7d016090e2d447e867828a Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 13 Sep 2018 07:47:28 -0400 Subject: [PATCH 0362/2608] media: media colorspaces*.rst: rename AdobeRGB to opRGB commit a58c37978cf02f6d35d05ee4e9288cb8455f1401 upstream. Drop all Adobe references and use the official opRGB standard instead. Signed-off-by: Hans Verkuil Cc: stable@vger.kernel.org Acked-by: Daniel Vetter Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- Documentation/media/uapi/v4l/biblio.rst | 10 ---------- Documentation/media/uapi/v4l/colorspaces-defs.rst | 8 ++++---- .../media/uapi/v4l/colorspaces-details.rst | 13 ++++++------- 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/Documentation/media/uapi/v4l/biblio.rst b/Documentation/media/uapi/v4l/biblio.rst index 1cedcfc04327..386d6cf83e9c 100644 --- a/Documentation/media/uapi/v4l/biblio.rst +++ b/Documentation/media/uapi/v4l/biblio.rst @@ -226,16 +226,6 @@ xvYCC :author: International Electrotechnical Commission (http://www.iec.ch) -.. _adobergb: - -AdobeRGB -======== - - -:title: Adobe© RGB (1998) Color Image Encoding Version 2005-05 - -:author: Adobe Systems Incorporated (http://www.adobe.com) - .. _oprgb: opRGB diff --git a/Documentation/media/uapi/v4l/colorspaces-defs.rst b/Documentation/media/uapi/v4l/colorspaces-defs.rst index 410907fe9415..f24615544792 100644 --- a/Documentation/media/uapi/v4l/colorspaces-defs.rst +++ b/Documentation/media/uapi/v4l/colorspaces-defs.rst @@ -51,8 +51,8 @@ whole range, 0-255, dividing the angular value by 1.41. The enum - See :ref:`col-rec709`. * - ``V4L2_COLORSPACE_SRGB`` - See :ref:`col-srgb`. - * - ``V4L2_COLORSPACE_ADOBERGB`` - - See :ref:`col-adobergb`. + * - ``V4L2_COLORSPACE_OPRGB`` + - See :ref:`col-oprgb`. * - ``V4L2_COLORSPACE_BT2020`` - See :ref:`col-bt2020`. * - ``V4L2_COLORSPACE_DCI_P3`` @@ -90,8 +90,8 @@ whole range, 0-255, dividing the angular value by 1.41. The enum - Use the Rec. 709 transfer function. * - ``V4L2_XFER_FUNC_SRGB`` - Use the sRGB transfer function. - * - ``V4L2_XFER_FUNC_ADOBERGB`` - - Use the AdobeRGB transfer function. + * - ``V4L2_XFER_FUNC_OPRGB`` + - Use the opRGB transfer function. * - ``V4L2_XFER_FUNC_SMPTE240M`` - Use the SMPTE 240M transfer function. * - ``V4L2_XFER_FUNC_NONE`` diff --git a/Documentation/media/uapi/v4l/colorspaces-details.rst b/Documentation/media/uapi/v4l/colorspaces-details.rst index b5d551b9cc8f..09fabf4cd412 100644 --- a/Documentation/media/uapi/v4l/colorspaces-details.rst +++ b/Documentation/media/uapi/v4l/colorspaces-details.rst @@ -290,15 +290,14 @@ Y' is clamped to the range [0…1] and Cb and Cr are clamped to the range 170M/BT.601. The Y'CbCr quantization is limited range. -.. _col-adobergb: +.. _col-oprgb: -Colorspace Adobe RGB (V4L2_COLORSPACE_ADOBERGB) +Colorspace opRGB (V4L2_COLORSPACE_OPRGB) =============================================== -The :ref:`adobergb` standard defines the colorspace used by computer -graphics that use the AdobeRGB colorspace. This is also known as the -:ref:`oprgb` standard. The default transfer function is -``V4L2_XFER_FUNC_ADOBERGB``. The default Y'CbCr encoding is +The :ref:`oprgb` standard defines the colorspace used by computer +graphics that use the opRGB colorspace. The default transfer function is +``V4L2_XFER_FUNC_OPRGB``. The default Y'CbCr encoding is ``V4L2_YCBCR_ENC_601``. The default Y'CbCr quantization is limited range. @@ -312,7 +311,7 @@ The chromaticities of the primary colors and the white reference are: .. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}| -.. flat-table:: Adobe RGB Chromaticities +.. flat-table:: opRGB Chromaticities :header-rows: 1 :stub-columns: 0 :widths: 1 1 2 From ad2c712b50597ab70cdf9c75101518a6837d0e69 Mon Sep 17 00:00:00 2001 From: Tri Vo Date: Wed, 19 Sep 2018 12:27:50 -0700 Subject: [PATCH 0363/2608] arm64: lse: remove -fcall-used-x0 flag commit 2a6c7c367de82951c98a290a21156770f6f82c84 upstream. x0 is not callee-saved in the PCS. So there is no need to specify -fcall-used-x0. Clang doesn't currently support -fcall-used flags. This patch will help building the kernel with clang. Tested-by: Nick Desaulniers Acked-by: Will Deacon Signed-off-by: Tri Vo Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 9a8cb96555d6..9a947afaf74c 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -12,7 +12,7 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ # when supported by the CPU. Result and argument registers are handled # correctly, based on the function prototype. lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o -CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ +CFLAGS_atomic_ll_sc.o := -ffixed-x1 -ffixed-x2 \ -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \ -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ From 7f46d951fd3389089500623a9f026e4d25be823a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 27 Sep 2018 22:36:27 +0100 Subject: [PATCH 0364/2608] rpmsg: smd: fix memory leak on channel create commit 940c620d6af8fca7d115de40f19870fba415efac upstream. Currently a failed allocation of channel->name leads to an immediate return without freeing channel. Fix this by setting ret to -ENOMEM and jumping to an exit path that kfree's channel. Detected by CoverityScan, CID#1473692 ("Resource Leak") Fixes: 53e2822e56c7 ("rpmsg: Introduce Qualcomm SMD backend") Cc: stable@vger.kernel.org Signed-off-by: Colin Ian King Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/qcom_smd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c index f1a2147a6d84..72d02bfeda9e 100644 --- a/drivers/rpmsg/qcom_smd.c +++ b/drivers/rpmsg/qcom_smd.c @@ -1049,8 +1049,10 @@ static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *ed channel->edge = edge; channel->name = kstrdup(name, GFP_KERNEL); - if (!channel->name) - return ERR_PTR(-ENOMEM); + if (!channel->name) { + ret = -ENOMEM; + goto free_channel; + } mutex_init(&channel->tx_lock); spin_lock_init(&channel->recv_lock); @@ -1099,6 +1101,7 @@ static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *ed free_name_and_channel: kfree(channel->name); +free_channel: kfree(channel); return ERR_PTR(ret); From fdd780d0a26070e59fe2c3690a776aad25227756 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 30 Oct 2018 13:26:15 -0400 Subject: [PATCH 0365/2608] Cramfs: fix abad comparison when wrap-arounds occur commit 672ca9dd13f1aca0c17516f76fc5b0e8344b3e46 upstream. It is possible for corrupted filesystem images to produce very large block offsets that may wrap when a length is added, and wrongly pass the buffer size test. Reported-by: Anatoly Trosinenko Signed-off-by: Nicolas Pitre Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/cramfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 7919967488cb..011c6f53dcda 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -186,7 +186,8 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i continue; blk_offset = (blocknr - buffer_blocknr[i]) << PAGE_SHIFT; blk_offset += offset; - if (blk_offset + len > BUFFER_SIZE) + if (blk_offset > BUFFER_SIZE || + blk_offset + len > BUFFER_SIZE) continue; return read_buffers[i] + blk_offset; } From 540631d9e690431fdc214b98e2f623838d84b03c Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Tue, 25 Sep 2018 10:21:10 -0500 Subject: [PATCH 0366/2608] ARM: dts: socfpga: Fix SDRAM node address for Arria10 commit ce3bf934f919a7d675c5b7fa4cc233ded9c6256e upstream. The address in the SDRAM node was incorrect. Fix this to agree with the correct address and to match the reg definition block. Cc: stable@vger.kernel.org Fixes: 54b4a8f57848b("arm: socfpga: dts: Add Arria10 SDRAM EDAC DTS support") Signed-off-by: Thor Thayer Signed-off-by: Dinh Nguyen Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/socfpga_arria10.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi index 791ca15c799e..bd1985694bca 100644 --- a/arch/arm/boot/dts/socfpga_arria10.dtsi +++ b/arch/arm/boot/dts/socfpga_arria10.dtsi @@ -601,7 +601,7 @@ status = "disabled"; }; - sdr: sdr@ffc25000 { + sdr: sdr@ffcfb100 { compatible = "altr,sdr-ctl", "syscon"; reg = <0xffcfb100 0x80>; }; From 1258e45dc460aa29dd70fd715fd1624b7ab93ef1 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Tue, 25 Sep 2018 10:31:52 -0500 Subject: [PATCH 0367/2608] arm64: dts: stratix10: Correct System Manager register size commit 74121b9aa3cd571ddfff014a9f47db36cae3cda9 upstream. Correct the register size of the System Manager node. Cc: stable@vger.kernel.org Fixes: 78cd6a9d8e154 ("arm64: dts: Add base stratix 10 dtsi") Signed-off-by: Thor Thayer Signed-off-by: Dinh Nguyen Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index c2b9bcb0ef61..e79f3defe002 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -249,7 +249,7 @@ sysmgr: sysmgr@ffd12000 { compatible = "altr,sys-mgr", "syscon"; - reg = <0xffd12000 0x1000>; + reg = <0xffd12000 0x228>; }; /* Local timer */ From 59bc2d3a68ad99c72972caadfa350e758af65e8b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 15 Nov 2017 10:44:58 +0100 Subject: [PATCH 0368/2608] soc/tegra: pmc: Fix child-node lookup commit 1dc6bd5e39a29453bdcc17348dd2a89f1aa4004e upstream. Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at the parent rather than just matching on its children. To make things worse, the parent pmc node could end up being prematurely freed as of_find_node_by_name() drops a reference to its first argument. Fixes: 3568df3d31d6 ("soc: tegra: Add thermal reset (thermtrip) support to PMC") Cc: stable # 4.0 Cc: Mikko Perttunen Signed-off-by: Johan Hovold Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/soc/tegra/pmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index 0453ff6839a7..7e9ef3431bea 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -1321,7 +1321,7 @@ static void tegra_pmc_init_tsense_reset(struct tegra_pmc *pmc) if (!pmc->soc->has_tsense_reset) return; - np = of_find_node_by_name(pmc->dev->of_node, "i2c-thermtrip"); + np = of_get_child_by_name(pmc->dev->of_node, "i2c-thermtrip"); if (!np) { dev_warn(dev, "i2c-thermtrip node not found, %s.\n", disabled); return; From e09399e8047e4cf7d6df0b3029ce4b13ab5970eb Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 22 Oct 2018 18:16:26 -0300 Subject: [PATCH 0369/2608] selftests/powerpc: Fix ptrace tm failure commit 48dc0ef19044bfb69193302fbe3a834e3331b7ae upstream. Test ptrace-tm-spd-gpr fails on current kernel (4.19) due to a segmentation fault that happens on the child process prior to setting cptr[2] = 1. This causes the parent process to wait forever at 'while (!pptr[2])' and the test to be killed by the test harness framework by timeout, thus, failing. The segmentation fault happens because of a inline assembly being generated as: 0x10000355c lfs f0, 0(0) This is reading memory position 0x0 and causing the segmentation fault. This code is being generated by ASM_LOAD_FPR_SINGLE_PRECISION(flt_4), where flt_4 is passed to the inline assembly block as: [flt_4] "r" (&d) Since the inline assembly 'r' constraint means any GPR, gpr0 is being chosen, thus causing this issue when issuing a Load Floating-Point Single instruction. This patch simply changes the constraint to 'b', which specify that this register will be used as base, and r0 is not allowed to be used, avoiding this issue. Other than that, removing flt_2 register from the input operands, since it is not used by the inline assembly code at all. Cc: stable@vger.kernel.org Signed-off-by: Breno Leitao Acked-by: Segher Boessenkool Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c index 327fa943c7f3..dbdffa2e2c82 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c @@ -67,8 +67,8 @@ trans: "3: ;" : [res] "=r" (result), [texasr] "=r" (texasr) : [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), [gpr_4]"i"(GPR_4), - [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "r" (&a), - [flt_2] "r" (&b), [flt_4] "r" (&d) + [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a), + [flt_4] "b" (&d) : "memory", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", From 2f7e525a25e10e66f87b2329b1b8ab4d311e5f4e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 9 Oct 2018 14:36:45 +0800 Subject: [PATCH 0370/2608] btrfs: qgroup: Avoid calling qgroup functions if qgroup is not enabled commit 3628b4ca64f24a4ec55055597d0cb1c814729f8b upstream. Some qgroup trace events like btrfs_qgroup_release_data() and btrfs_qgroup_free_delayed_ref() can still be triggered even if qgroup is not enabled. This is caused by the lack of qgroup status check before calling some qgroup functions. Thankfully the functions can handle quota disabled case well and just do nothing for qgroup disabled case. This patch will do earlier check before triggering related trace events. And for enabled <-> disabled race case: 1) For enabled->disabled case Disable will wipe out all qgroups data including reservation and excl/rfer. Even if we leak some reservation or numbers, it will still be cleared, so nothing will go wrong. 2) For disabled -> enabled case Current btrfs_qgroup_release_data() will use extent_io tree to ensure we won't underflow reservation. And for delayed_ref we use head->qgroup_reserved to record the reserved space, so in that case head->qgroup_reserved should be 0 and we won't underflow. CC: stable@vger.kernel.org # 4.14+ Reported-by: Chris Murphy Link: https://lore.kernel.org/linux-btrfs/CAJCQCtQau7DtuUUeycCkZ36qjbKuxNzsgqJ7+sJ6W0dK_NLE3w@mail.gmail.com/ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 4 ++++ fs/btrfs/qgroup.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 47dec283628d..1b647fa93209 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2972,6 +2972,10 @@ static int __btrfs_qgroup_release_data(struct inode *inode, int trace_op = QGROUP_RELEASE; int ret; + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, + &BTRFS_I(inode)->root->fs_info->flags)) + return 0; + /* In release case, we shouldn't have @reserved */ WARN_ON(!free && reserved); if (free && reserved) diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index d9984e87cddf..83483ade3b19 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -232,6 +232,8 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info, u64 ref_root, u64 num_bytes) { + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) + return; trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes); btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes); } From 61e3643dc51e8368a8042252fa1b3cca00784923 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 21 Aug 2018 09:42:03 +0800 Subject: [PATCH 0371/2608] btrfs: Handle owner mismatch gracefully when walking up tree commit 65c6e82becec33731f48786e5a30f98662c86b16 upstream. [BUG] When mounting certain crafted image, btrfs will trigger kernel BUG_ON() when trying to recover balance: kernel BUG at fs/btrfs/extent-tree.c:8956! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 662 Comm: mount Not tainted 4.18.0-rc1-custom+ #10 RIP: 0010:walk_up_proc+0x336/0x480 [btrfs] RSP: 0018:ffffb53540c9b890 EFLAGS: 00010202 Call Trace: walk_up_tree+0x172/0x1f0 [btrfs] btrfs_drop_snapshot+0x3a4/0x830 [btrfs] merge_reloc_roots+0xe1/0x1d0 [btrfs] btrfs_recover_relocation+0x3ea/0x420 [btrfs] open_ctree+0x1af3/0x1dd0 [btrfs] btrfs_mount_root+0x66b/0x740 [btrfs] mount_fs+0x3b/0x16a vfs_kern_mount.part.9+0x54/0x140 btrfs_mount+0x16d/0x890 [btrfs] mount_fs+0x3b/0x16a vfs_kern_mount.part.9+0x54/0x140 do_mount+0x1fd/0xda0 ksys_mount+0xba/0xd0 __x64_sys_mount+0x21/0x30 do_syscall_64+0x60/0x210 entry_SYSCALL_64_after_hwframe+0x49/0xbe [CAUSE] Extent tree corruption. In this particular case, reloc tree root's owner is DATA_RELOC_TREE (should be TREE_RELOC), thus its backref is corrupted and we failed the owner check in walk_up_tree(). [FIX] It's pretty hard to take care of every extent tree corruption, but at least we can remove such BUG_ON() and exit more gracefully. And since in this particular image, DATA_RELOC_TREE and TREE_RELOC share the same root (which is obviously invalid), we needs to make __del_reloc_root() more robust to detect such invalid sharing to avoid possible NULL dereference as root->node can be NULL in this case. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200411 Reported-by: Xu Wen CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 18 ++++++++++++------ fs/btrfs/relocation.c | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f96f72659693..1fe62225bc31 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9028,15 +9028,14 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, if (eb == root->node) { if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) parent = eb->start; - else - BUG_ON(root->root_key.objectid != - btrfs_header_owner(eb)); + else if (root->root_key.objectid != btrfs_header_owner(eb)) + goto owner_mismatch; } else { if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) parent = path->nodes[level + 1]->start; - else - BUG_ON(root->root_key.objectid != - btrfs_header_owner(path->nodes[level + 1])); + else if (root->root_key.objectid != + btrfs_header_owner(path->nodes[level + 1])) + goto owner_mismatch; } btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); @@ -9044,6 +9043,11 @@ out: wc->refs[level] = 0; wc->flags[level] = 0; return 0; + +owner_mismatch: + btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu", + btrfs_header_owner(eb), root->root_key.objectid); + return -EUCLEAN; } static noinline int walk_down_tree(struct btrfs_trans_handle *trans, @@ -9097,6 +9101,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, ret = walk_up_proc(trans, root, path, wc); if (ret > 0) return 0; + if (ret < 0) + return ret; if (path->locks[level]) { btrfs_tree_unlock_rw(path->nodes[level], diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b80b03e0c5d3..eeae2c3ab17e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1334,7 +1334,7 @@ static void __del_reloc_root(struct btrfs_root *root) struct mapping_node *node = NULL; struct reloc_control *rc = fs_info->reloc_ctl; - if (rc) { + if (rc && root->node) { spin_lock(&rc->reloc_root_tree.lock); rb_node = tree_search(&rc->reloc_root_tree.rb_root, root->node->start); From d36ac4a32275f6c551c9e2d0284b3e0ef79623ec Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 21 Aug 2018 09:53:47 +0800 Subject: [PATCH 0372/2608] btrfs: locking: Add extra check in btrfs_init_new_buffer() to avoid deadlock commit b72c3aba09a53fc7c1824250d71180ca154517a7 upstream. [BUG] For certain crafted image, whose csum root leaf has missing backref, if we try to trigger write with data csum, it could cause deadlock with the following kernel WARN_ON(): WARNING: CPU: 1 PID: 41 at fs/btrfs/locking.c:230 btrfs_tree_lock+0x3e2/0x400 CPU: 1 PID: 41 Comm: kworker/u4:1 Not tainted 4.18.0-rc1+ #8 Workqueue: btrfs-endio-write btrfs_endio_write_helper RIP: 0010:btrfs_tree_lock+0x3e2/0x400 Call Trace: btrfs_alloc_tree_block+0x39f/0x770 __btrfs_cow_block+0x285/0x9e0 btrfs_cow_block+0x191/0x2e0 btrfs_search_slot+0x492/0x1160 btrfs_lookup_csum+0xec/0x280 btrfs_csum_file_blocks+0x2be/0xa60 add_pending_csums+0xaf/0xf0 btrfs_finish_ordered_io+0x74b/0xc90 finish_ordered_fn+0x15/0x20 normal_work_helper+0xf6/0x500 btrfs_endio_write_helper+0x12/0x20 process_one_work+0x302/0x770 worker_thread+0x81/0x6d0 kthread+0x180/0x1d0 ret_from_fork+0x35/0x40 [CAUSE] That crafted image has missing backref for csum tree root leaf. And when we try to allocate new tree block, since there is no EXTENT/METADATA_ITEM for csum tree root, btrfs consider it's free slot and use it. The extent tree of the image looks like: Normal image | This fuzzed image ----------------------------------+-------------------------------- BG 29360128 | BG 29360128 One empty slot | One empty slot 29364224: backref to UUID tree | 29364224: backref to UUID tree Two empty slots | Two empty slots 29376512: backref to CSUM tree | One empty slot (bad type) <<< 29380608: backref to D_RELOC tree | 29380608: backref to D_RELOC tree ... | ... Since bytenr 29376512 has no METADATA/EXTENT_ITEM, when btrfs try to alloc tree block, it's an valid slot for btrfs. And for finish_ordered_write, when we need to insert csum, we try to CoW csum tree root. By accident, empty slots at bytenr BG_OFFSET, BG_OFFSET + 8K, BG_OFFSET + 12K is already used by tree block COW for other trees, the next empty slot is BG_OFFSET + 16K, which should be the backref for CSUM tree. But due to the bad type, btrfs can recognize it and still consider it as an empty slot, and will try to use it for csum tree CoW. Then in the following call trace, we will try to lock the new tree block, which turns out to be the old csum tree root which is already locked: btrfs_search_slot() called on csum tree root, which is at 29376512 |- btrfs_cow_block() |- btrfs_set_lock_block() | |- Now locks tree block 29376512 (old csum tree root) |- __btrfs_cow_block() |- btrfs_alloc_tree_block() |- btrfs_reserve_extent() | Now it returns tree block 29376512, which extent tree | shows its empty slot, but it's already hold by csum tree |- btrfs_init_new_buffer() |- btrfs_tree_lock() | Triggers WARN_ON(eb->lock_owner == current->pid) |- wait_event() Wait lock owner to release the lock, but it's locked by ourself, so it will deadlock [FIX] This patch will do the lock_owner and current->pid check at btrfs_init_new_buffer(). So above deadlock can be avoided. Since such problem can only happen in crafted image, we will still trigger kernel warning for later aborted transaction, but with a little more meaningful warning message. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200405 Reported-by: Xu Wen CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1fe62225bc31..e6c5cfd051a8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8398,6 +8398,19 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (IS_ERR(buf)) return buf; + /* + * Extra safety check in case the extent tree is corrupted and extent + * allocator chooses to use a tree block which is already used and + * locked. + */ + if (buf->lock_owner == current->pid) { + btrfs_err_rl(fs_info, +"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected", + buf->start, btrfs_header_owner(buf), current->pid); + free_extent_buffer(buf); + return ERR_PTR(-EUCLEAN); + } + btrfs_set_header_generation(buf, trans->transid); btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); From e2fc220f5cfd10ca7cd74ef622e8906287db1c7e Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 6 Sep 2018 16:59:33 -0400 Subject: [PATCH 0373/2608] btrfs: fix error handling in free_log_tree commit 374b0e2d6ba5da7fd1cadb3247731ff27d011f6f upstream. When we hit an I/O error in free_log_tree->walk_log_tree during file system shutdown we can crash due to there not being a valid transaction handle. Use btrfs_handle_fs_error when there's no transaction handle to use. BUG: unable to handle kernel NULL pointer dereference at 0000000000000060 IP: free_log_tree+0xd2/0x140 [btrfs] PGD 0 P4D 0 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC PTI Modules linked in: CPU: 2 PID: 23544 Comm: umount Tainted: G W 4.12.14-kvmsmall #9 SLE15 (unreleased) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 task: ffff96bfd3478880 task.stack: ffffa7cf40d78000 RIP: 0010:free_log_tree+0xd2/0x140 [btrfs] RSP: 0018:ffffa7cf40d7bd10 EFLAGS: 00010282 RAX: 00000000fffffffb RBX: 00000000fffffffb RCX: 0000000000000002 RDX: 0000000000000000 RSI: ffff96c02f07d4c8 RDI: 0000000000000282 RBP: ffff96c013cf1000 R08: ffff96c02f07d4c8 R09: ffff96c02f07d4d0 R10: 0000000000000000 R11: 0000000000000002 R12: 0000000000000000 R13: ffff96c005e800c0 R14: ffffa7cf40d7bdb8 R15: 0000000000000000 FS: 00007f17856bcfc0(0000) GS:ffff96c03f600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000060 CR3: 0000000045ed6002 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? wait_for_writer+0xb0/0xb0 [btrfs] btrfs_free_log+0x17/0x30 [btrfs] btrfs_drop_and_free_fs_root+0x9a/0xe0 [btrfs] btrfs_free_fs_roots+0xc0/0x130 [btrfs] ? wait_for_completion+0xf2/0x100 close_ctree+0xea/0x2e0 [btrfs] ? kthread_stop+0x161/0x260 generic_shutdown_super+0x6c/0x120 kill_anon_super+0xe/0x20 btrfs_kill_super+0x13/0x100 [btrfs] deactivate_locked_super+0x3f/0x70 cleanup_mnt+0x3b/0x70 task_work_run+0x78/0x90 exit_to_usermode_loop+0x77/0xa6 do_syscall_64+0x1c5/0x1e0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x7f1784f90827 RSP: 002b:00007ffdeeb03118 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 0000556a60c62970 RCX: 00007f1784f90827 RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000556a60c62b50 RBP: 0000000000000000 R08: 0000000000000005 R09: 00000000ffffffff R10: 0000556a60c63900 R11: 0000000000000246 R12: 0000556a60c62b50 R13: 00007f17854a81c4 R14: 0000000000000000 R15: 0000000000000000 RIP: free_log_tree+0xd2/0x140 [btrfs] RSP: ffffa7cf40d7bd10 CR2: 0000000000000060 Fixes: 681ae50917df9 ("Btrfs: cleanup reserved space when freeing tree log on error") CC: # v3.13 Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index e1b4a59485df..efc0a93abe74 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3078,9 +3078,12 @@ static void free_log_tree(struct btrfs_trans_handle *trans, }; ret = walk_log_tree(trans, log, &wc); - /* I don't think this can happen but just in case */ - if (ret) - btrfs_abort_transaction(trans, ret); + if (ret) { + if (trans) + btrfs_abort_transaction(trans, ret); + else + btrfs_handle_fs_error(log->fs_info, ret, NULL); + } while (1) { ret = find_first_extent_bit(&log->dirty_log_pages, From 7b23644c64fd2d4033cf265bb3f131cccffdc453 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 7 Sep 2018 14:16:23 +0800 Subject: [PATCH 0374/2608] btrfs: Enhance btrfs_trim_fs function to handle error better commit 93bba24d4b5ad1e5cd8b43f64e66ff9d6355dd20 upstream. Function btrfs_trim_fs() doesn't handle errors in a consistent way. If error happens when trimming existing block groups, it will skip the remaining blocks and continue to trim unallocated space for each device. The return value will only reflect the final error from device trimming. This patch will fix such behavior by: 1) Recording the last error from block group or device trimming The return value will also reflect the last error during trimming. Make developer more aware of the problem. 2) Continuing trimming if possible If we failed to trim one block group or device, we could still try the next block group or device. 3) Report number of failures during block group and device trimming It would be less noisy, but still gives user a brief summary of what's going wrong. Such behavior can avoid confusion for cases like failure to trim the first block group and then only unallocated space is trimmed. Reported-by: Chris Murphy CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba [ add bg_ret and dev_ret to the messages ] Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 49 ++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e6c5cfd051a8..21b62e63a916 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -11037,6 +11037,15 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, return ret; } +/* + * Trim the whole filesystem by: + * 1) trimming the free space in each block group + * 2) trimming the unallocated space on each device + * + * This will also continue trimming even if a block group or device encounters + * an error. The return value will be the last error, or 0 if nothing bad + * happens. + */ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) { struct btrfs_block_group_cache *cache = NULL; @@ -11047,6 +11056,10 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) u64 end; u64 trimmed = 0; u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); + u64 bg_failed = 0; + u64 dev_failed = 0; + int bg_ret = 0; + int dev_ret = 0; int ret = 0; /* @@ -11057,7 +11070,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) else cache = btrfs_lookup_block_group(fs_info, range->start); - while (cache) { + for (; cache; cache = next_block_group(fs_info, cache)) { if (cache->key.objectid >= (range->start + range->len)) { btrfs_put_block_group(cache); break; @@ -11071,13 +11084,15 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) if (!block_group_cache_done(cache)) { ret = cache_block_group(cache, 0); if (ret) { - btrfs_put_block_group(cache); - break; + bg_failed++; + bg_ret = ret; + continue; } ret = wait_block_group_cache_done(cache); if (ret) { - btrfs_put_block_group(cache); - break; + bg_failed++; + bg_ret = ret; + continue; } } ret = btrfs_trim_block_group(cache, @@ -11088,28 +11103,40 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) trimmed += group_trimmed; if (ret) { - btrfs_put_block_group(cache); - break; + bg_failed++; + bg_ret = ret; + continue; } } - - cache = next_block_group(fs_info, cache); } + if (bg_failed) + btrfs_warn(fs_info, + "failed to trim %llu block group(s), last error %d", + bg_failed, bg_ret); mutex_lock(&fs_info->fs_devices->device_list_mutex); devices = &fs_info->fs_devices->alloc_list; list_for_each_entry(device, devices, dev_alloc_list) { ret = btrfs_trim_free_extents(device, range->minlen, &group_trimmed); - if (ret) + if (ret) { + dev_failed++; + dev_ret = ret; break; + } trimmed += group_trimmed; } mutex_unlock(&fs_info->fs_devices->device_list_mutex); + if (dev_failed) + btrfs_warn(fs_info, + "failed to trim %llu device(s), last error %d", + dev_failed, dev_ret); range->len = trimmed; - return ret; + if (bg_ret) + return bg_ret; + return dev_ret; } /* From 2fdb337b67bcc407c0eaf1108786cb1300fe1ff1 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 7 Sep 2018 14:16:24 +0800 Subject: [PATCH 0375/2608] btrfs: Ensure btrfs_trim_fs can trim the whole filesystem commit 6ba9fc8e628becf0e3ec94083450d089b0dec5f5 upstream. [BUG] fstrim on some btrfs only trims the unallocated space, not trimming any space in existing block groups. [CAUSE] Before fstrim_range passed to btrfs_trim_fs(), it gets truncated to range [0, super->total_bytes). So later btrfs_trim_fs() will only be able to trim block groups in range [0, super->total_bytes). While for btrfs, any bytenr aligned to sectorsize is valid, since btrfs uses its logical address space, there is nothing limiting the location where we put block groups. For filesystem with frequent balance, it's quite easy to relocate all block groups and bytenr of block groups will start beyond super->total_bytes. In that case, btrfs will not trim existing block groups. [FIX] Just remove the truncation in btrfs_ioctl_fitrim(), so btrfs_trim_fs() can get the unmodified range, which is normally set to [0, U64_MAX]. Reported-by: Chris Murphy Fixes: f4c697e6406d ("btrfs: return EINVAL if start > total_bytes in fitrim ioctl") CC: # v4.4+ Signed-off-by: Qu Wenruo Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 10 +--------- fs/btrfs/ioctl.c | 11 +++++++---- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 21b62e63a916..a32c84932eed 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -11055,21 +11055,13 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) u64 start; u64 end; u64 trimmed = 0; - u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); u64 bg_failed = 0; u64 dev_failed = 0; int bg_ret = 0; int dev_ret = 0; int ret = 0; - /* - * try to trim all FS space, our block group may start from non-zero. - */ - if (range->len == total_bytes) - cache = btrfs_lookup_first_block_group(fs_info, range->start); - else - cache = btrfs_lookup_block_group(fs_info, range->start); - + cache = btrfs_lookup_first_block_group(fs_info, range->start); for (; cache; cache = next_block_group(fs_info, cache)) { if (cache->key.objectid >= (range->start + range->len)) { btrfs_put_block_group(cache); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a507c0d25354..9333e4cda68d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -352,7 +352,6 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) struct fstrim_range range; u64 minlen = ULLONG_MAX; u64 num_devices = 0; - u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); int ret; if (!capable(CAP_SYS_ADMIN)) @@ -376,11 +375,15 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) return -EOPNOTSUPP; if (copy_from_user(&range, arg, sizeof(range))) return -EFAULT; - if (range.start > total_bytes || - range.len < fs_info->sb->s_blocksize) + + /* + * NOTE: Don't truncate the range using super->total_bytes. Bytenr of + * block group is in the logical address space, which can be any + * sectorsize aligned bytenr in the range [0, U64_MAX]. + */ + if (range.len < fs_info->sb->s_blocksize) return -EINVAL; - range.len = min(range.len, total_bytes - range.start); range.minlen = max(range.minlen, minlen); ret = btrfs_trim_fs(fs_info, &range); if (ret < 0) From a7d24c89db0b0b664046aebd1e0f9aef7306b944 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 6 Sep 2018 17:18:14 -0400 Subject: [PATCH 0376/2608] btrfs: iterate all devices during trim, instead of fs_devices::alloc_list commit d4e329de5e5e21594df2e0dd59da9acee71f133b upstream. btrfs_trim_fs iterates over the fs_devices->alloc_list while holding the device_list_mutex. The problem is that ->alloc_list is protected by the chunk mutex. We don't want to hold the chunk mutex over the trim of the entire file system. Fortunately, the ->dev_list list is protected by the dev_list mutex and while it will give us all devices, including read-only devices, we already just skip the read-only devices. Then we can continue to take and release the chunk mutex while scanning each device. Fixes: 499f377f49f ("btrfs: iterate over unused chunk space in FITRIM") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a32c84932eed..3ebef3ab9dde 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -11107,8 +11107,8 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) "failed to trim %llu block group(s), last error %d", bg_failed, bg_ret); mutex_lock(&fs_info->fs_devices->device_list_mutex); - devices = &fs_info->fs_devices->alloc_list; - list_for_each_entry(device, devices, dev_alloc_list) { + devices = &fs_info->fs_devices->devices; + list_for_each_entry(device, devices, dev_list) { ret = btrfs_trim_free_extents(device, range->minlen, &group_trimmed); if (ret) { From da05e93720843101d7b70c84b63805a37d4485d1 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 6 Sep 2018 17:18:15 -0400 Subject: [PATCH 0377/2608] btrfs: don't attempt to trim devices that don't support it commit 0be88e367fd8fbdb45257615d691f4675dda062f upstream. We check whether any device the file system is using supports discard in the ioctl call, but then we attempt to trim free extents on every device regardless of whether discard is supported. Due to the way we mask off EOPNOTSUPP, we can end up issuing the trim operations on each free range on devices that don't support it, just wasting time. Fixes: 499f377f49f08 ("btrfs: iterate over unused chunk space in FITRIM") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3ebef3ab9dde..43a2d77039df 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10976,6 +10976,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, *trimmed = 0; + /* Discard not supported = nothing to do. */ + if (!blk_queue_discard(bdev_get_queue(device->bdev))) + return 0; + /* Not writeable = nothing to do. */ if (!device->writeable) return 0; From 9ce0f8b6fb06afb39a4dba8a8aa9ed4ccd5134b5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 12 Sep 2018 10:45:45 -0400 Subject: [PATCH 0378/2608] btrfs: wait on caching when putting the bg cache commit 3aa7c7a31c26321696b92841d5103461c6f3f517 upstream. While testing my backport I noticed there was a panic if I ran generic/416 generic/417 generic/418 all in a row. This just happened to uncover a race where we had outstanding IO after we destroy all of our workqueues, and then we'd go to queue the endio work on those free'd workqueues. This is because we aren't waiting for the caching threads to be done before freeing everything up, so to fix this make sure we wait on any outstanding caching that's being done before we free up the block group, so we're sure to be done with all IO by the time we get to btrfs_stop_all_workers(). This fixes the panic I was seeing consistently in testing. ------------[ cut here ]------------ kernel BUG at fs/btrfs/volumes.c:6112! SMP PTI Modules linked in: CPU: 1 PID: 27165 Comm: kworker/u4:7 Not tainted 4.16.0-02155-g3553e54a578d-dirty #875 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 Workqueue: btrfs-cache btrfs_cache_helper RIP: 0010:btrfs_map_bio+0x346/0x370 RSP: 0000:ffffc900061e79d0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffff880071542e00 RCX: 0000000000533000 RDX: ffff88006bb74380 RSI: 0000000000000008 RDI: ffff880078160000 RBP: 0000000000000001 R08: ffff8800781cd200 R09: 0000000000503000 R10: ffff88006cd21200 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: ffff8800781cd200 R15: ffff880071542e00 FS: 0000000000000000(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000817ffc4 CR3: 0000000078314000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btree_submit_bio_hook+0x8a/0xd0 submit_one_bio+0x5d/0x80 read_extent_buffer_pages+0x18a/0x320 btree_read_extent_buffer_pages+0xbc/0x200 ? alloc_extent_buffer+0x359/0x3e0 read_tree_block+0x3d/0x60 read_block_for_search.isra.30+0x1a5/0x360 btrfs_search_slot+0x41b/0xa10 btrfs_next_old_leaf+0x212/0x470 caching_thread+0x323/0x490 normal_work_helper+0xc5/0x310 process_one_work+0x141/0x340 worker_thread+0x44/0x3c0 kthread+0xf8/0x130 ? process_one_work+0x340/0x340 ? kthread_bind+0x10/0x10 ret_from_fork+0x35/0x40 RIP: btrfs_map_bio+0x346/0x370 RSP: ffffc900061e79d0 ---[ end trace 827eb13e50846033 ]--- Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: Omar Sandoval Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 43a2d77039df..7a863338dba1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9881,6 +9881,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info) block_group = btrfs_lookup_first_block_group(info, last); while (block_group) { + wait_block_group_cache_done(block_group); spin_lock(&block_group->lock); if (block_group->iref) break; From b5d59a48c3724d2fdfa145a9dfa64261299aee45 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Sep 2018 07:17:49 -0400 Subject: [PATCH 0379/2608] btrfs: protect space cache inode alloc with GFP_NOFS commit 84de76a2fb217dc1b6bc2965cc397d1648aa1404 upstream. If we're allocating a new space cache inode it's likely going to be under a transaction handle, so we need to use memalloc_nofs_save() in order to avoid deadlocks, and more importantly lockdep messages that make xfstests fail. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Omar Sandoval Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 4426d1c73e50..e75489e3b67c 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "ctree.h" #include "free-space-cache.h" #include "transaction.h" @@ -59,6 +60,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, struct btrfs_free_space_header *header; struct extent_buffer *leaf; struct inode *inode = NULL; + unsigned nofs_flag; int ret; key.objectid = BTRFS_FREE_SPACE_OBJECTID; @@ -80,7 +82,13 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, btrfs_disk_key_to_cpu(&location, &disk_key); btrfs_release_path(path); + /* + * We are often under a trans handle at this point, so we need to make + * sure NOFS is set to keep us from deadlocking. + */ + nofs_flag = memalloc_nofs_save(); inode = btrfs_iget(fs_info->sb, &location, root, NULL); + memalloc_nofs_restore(nofs_flag); if (IS_ERR(inode)) return inode; if (is_bad_inode(inode)) { From a7e3da4918b18ad8ff3848c9054425e8143a991c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Sep 2018 07:18:00 -0400 Subject: [PATCH 0380/2608] btrfs: reset max_extent_size on clear in a bitmap commit 553cceb49681d60975d00892877d4c871bf220f9 upstream. We need to clear the max_extent_size when we clear bits from a bitmap since it could have been from the range that contains the max_extent_size. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Liu Bo Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index e75489e3b67c..9b3fbe7c16a9 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1710,6 +1710,8 @@ static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, bitmap_clear(info->bitmap, start, count); info->bytes -= bytes; + if (info->max_extent_size > ctl->unit) + info->max_extent_size = 0; } static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, From 0ae66cf18fc52d3f3285b5090b3e4516e4fcc1ed Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Sep 2018 07:18:02 -0400 Subject: [PATCH 0381/2608] btrfs: make sure we create all new block groups commit 545e3366db823dc3342ca9d7fea803f829c9062f upstream. Allocating new chunks modifies both the extent and chunk tree, which can trigger new chunk allocations. So instead of doing list_for_each_safe, just do while (!list_empty()) so we make sure we don't exit with other pending bg's still on our list. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Omar Sandoval Reviewed-by: Liu Bo Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7a863338dba1..e201415edb2d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10270,7 +10270,7 @@ error: void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { - struct btrfs_block_group_cache *block_group, *tmp; + struct btrfs_block_group_cache *block_group; struct btrfs_root *extent_root = fs_info->extent_root; struct btrfs_block_group_item item; struct btrfs_key key; @@ -10278,7 +10278,10 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, bool can_flush_pending_bgs = trans->can_flush_pending_bgs; trans->can_flush_pending_bgs = false; - list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { + while (!list_empty(&trans->new_bgs)) { + block_group = list_first_entry(&trans->new_bgs, + struct btrfs_block_group_cache, + bg_list); if (ret) goto next; From 985579fa688e8fed8ed3570210af7201dc67d179 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 8 Oct 2018 11:12:55 +0100 Subject: [PATCH 0382/2608] Btrfs: fix warning when replaying log after fsync of a tmpfile commit f2d72f42d5fa3bf33761d9e47201745f624fcff5 upstream. When replaying a log which contains a tmpfile (which necessarily has a link count of 0) we end up calling inc_nlink(), at fs/btrfs/tree-log.c:replay_one_buffer(), which produces a warning like the following: [195191.943673] WARNING: CPU: 0 PID: 6924 at fs/inode.c:342 inc_nlink+0x33/0x40 [195191.943723] CPU: 0 PID: 6924 Comm: mount Not tainted 4.19.0-rc6-btrfs-next-38 #1 [195191.943724] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014 [195191.943726] RIP: 0010:inc_nlink+0x33/0x40 [195191.943728] RSP: 0018:ffffb96e425e3870 EFLAGS: 00010246 [195191.943730] RAX: 0000000000000000 RBX: ffff8c0d1e6af4f0 RCX: 0000000000000006 [195191.943731] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8c0d1e6af4f0 [195191.943731] RBP: 0000000000000097 R08: 0000000000000001 R09: 0000000000000000 [195191.943732] R10: 0000000000000000 R11: 0000000000000000 R12: ffffb96e425e3a60 [195191.943733] R13: ffff8c0d10cff0c8 R14: ffff8c0d0d515348 R15: ffff8c0d78a1b3f8 [195191.943735] FS: 00007f570ee24480(0000) GS:ffff8c0dfb200000(0000) knlGS:0000000000000000 [195191.943736] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [195191.943737] CR2: 00005593286277c8 CR3: 00000000bb8f2006 CR4: 00000000003606f0 [195191.943739] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [195191.943740] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [195191.943741] Call Trace: [195191.943778] replay_one_buffer+0x797/0x7d0 [btrfs] [195191.943802] walk_up_log_tree+0x1c1/0x250 [btrfs] [195191.943809] ? rcu_read_lock_sched_held+0x3f/0x70 [195191.943825] walk_log_tree+0xae/0x1d0 [btrfs] [195191.943840] btrfs_recover_log_trees+0x1d7/0x4d0 [btrfs] [195191.943856] ? replay_dir_deletes+0x280/0x280 [btrfs] [195191.943870] open_ctree+0x1c3b/0x22a0 [btrfs] [195191.943887] btrfs_mount_root+0x6b4/0x800 [btrfs] [195191.943894] ? rcu_read_lock_sched_held+0x3f/0x70 [195191.943899] ? pcpu_alloc+0x55b/0x7c0 [195191.943906] ? mount_fs+0x3b/0x140 [195191.943908] mount_fs+0x3b/0x140 [195191.943912] ? __init_waitqueue_head+0x36/0x50 [195191.943916] vfs_kern_mount+0x62/0x160 [195191.943927] btrfs_mount+0x134/0x890 [btrfs] [195191.943936] ? rcu_read_lock_sched_held+0x3f/0x70 [195191.943938] ? pcpu_alloc+0x55b/0x7c0 [195191.943943] ? mount_fs+0x3b/0x140 [195191.943952] ? btrfs_remount+0x570/0x570 [btrfs] [195191.943954] mount_fs+0x3b/0x140 [195191.943956] ? __init_waitqueue_head+0x36/0x50 [195191.943960] vfs_kern_mount+0x62/0x160 [195191.943963] do_mount+0x1f9/0xd40 [195191.943967] ? memdup_user+0x4b/0x70 [195191.943971] ksys_mount+0x7e/0xd0 [195191.943974] __x64_sys_mount+0x21/0x30 [195191.943977] do_syscall_64+0x60/0x1b0 [195191.943980] entry_SYSCALL_64_after_hwframe+0x49/0xbe [195191.943983] RIP: 0033:0x7f570e4e524a [195191.943986] RSP: 002b:00007ffd83589478 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [195191.943989] RAX: ffffffffffffffda RBX: 0000563f335b2060 RCX: 00007f570e4e524a [195191.943990] RDX: 0000563f335b2240 RSI: 0000563f335b2280 RDI: 0000563f335b2260 [195191.943992] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000020 [195191.943993] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000563f335b2260 [195191.943994] R13: 0000563f335b2240 R14: 0000000000000000 R15: 00000000ffffffff [195191.944002] irq event stamp: 8688 [195191.944010] hardirqs last enabled at (8687): [] console_unlock+0x503/0x640 [195191.944012] hardirqs last disabled at (8688): [] trace_hardirqs_off_thunk+0x1a/0x1c [195191.944018] softirqs last enabled at (8638): [] __set_page_dirty_nobuffers+0x101/0x150 [195191.944020] softirqs last disabled at (8634): [] wb_wakeup_delayed+0x2e/0x60 [195191.944022] ---[ end trace 5d6e873a9a0b811a ]--- This happens because the inode does not have the flag I_LINKABLE set, which is a runtime only flag, not meant to be persisted, set when the inode is created through open(2) if the flag O_EXCL is not passed to it. Except for the warning, there are no other consequences (like corruptions or metadata inconsistencies). Since it's pointless to replay a tmpfile as it would be deleted in a later phase of the log replay procedure (it has a link count of 0), fix this by not logging tmpfiles and if a tmpfile is found in a log (created by a kernel without this change), skip the replay of the inode. A test case for fstests follows soon. Fixes: 471d557afed1 ("Btrfs: fix loss of prealloc extents past i_size after fsync log replay") CC: stable@vger.kernel.org # 4.18+ Reported-by: Martin Steigerwald Link: https://lore.kernel.org/linux-btrfs/3666619.NTnn27ZJZE@merkaba/ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index efc0a93abe74..0eb9efd4c346 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -273,6 +273,13 @@ struct walk_control { /* what stage of the replay code we're currently in */ int stage; + /* + * Ignore any items from the inode currently being processed. Needs + * to be set every time we find a BTRFS_INODE_ITEM_KEY and we are in + * the LOG_WALK_REPLAY_INODES stage. + */ + bool ignore_cur_inode; + /* the root we are currently replaying */ struct btrfs_root *replay_dest; @@ -2363,6 +2370,20 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, inode_item = btrfs_item_ptr(eb, i, struct btrfs_inode_item); + /* + * If we have a tmpfile (O_TMPFILE) that got fsync'ed + * and never got linked before the fsync, skip it, as + * replaying it is pointless since it would be deleted + * later. We skip logging tmpfiles, but it's always + * possible we are replaying a log created with a kernel + * that used to log tmpfiles. + */ + if (btrfs_inode_nlink(eb, inode_item) == 0) { + wc->ignore_cur_inode = true; + continue; + } else { + wc->ignore_cur_inode = false; + } ret = replay_xattr_deletes(wc->trans, root, log, path, key.objectid); if (ret) @@ -2400,16 +2421,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, root->fs_info->sectorsize); ret = btrfs_drop_extents(wc->trans, root, inode, from, (u64)-1, 1); - /* - * If the nlink count is zero here, the iput - * will free the inode. We bump it to make - * sure it doesn't get freed until the link - * count fixup is done. - */ if (!ret) { - if (inode->i_nlink == 0) - inc_nlink(inode); - /* Update link count and nbytes. */ + /* Update the inode's nbytes. */ ret = btrfs_update_inode(wc->trans, root, inode); } @@ -2424,6 +2437,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, break; } + if (wc->ignore_cur_inode) + continue; + if (key.type == BTRFS_DIR_INDEX_KEY && wc->stage == LOG_WALK_REPLAY_DIR_INDEX) { ret = replay_one_dir_item(wc->trans, root, path, @@ -5644,7 +5660,13 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (ret) goto end_no_trans; - if (btrfs_inode_in_log(inode, trans->transid)) { + /* + * Skip already logged inodes or inodes corresponding to tmpfiles + * (since logging them is pointless, a link count of 0 means they + * will never be accessible). + */ + if (btrfs_inode_in_log(inode, trans->transid) || + inode->vfs_inode.i_nlink == 0) { ret = BTRFS_NO_LOG_SYNC; goto end_no_trans; } From 45be291e9def76b8c42c10bfbf482a9dc81aa537 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 9 Oct 2018 15:05:29 +0100 Subject: [PATCH 0383/2608] Btrfs: fix wrong dentries after fsync of file that got its parent replaced commit 0f375eed92b5a407657532637ed9652611a682f5 upstream. In a scenario like the following: mkdir /mnt/A # inode 258 mkdir /mnt/B # inode 259 touch /mnt/B/bar # inode 260 sync mv /mnt/B/bar /mnt/A/bar mv -T /mnt/A /mnt/B fsync /mnt/B/bar After replaying the log we end up with file bar having 2 hard links, both with the name 'bar' and one in the directory with inode number 258 and the other in the directory with inode number 259. Also, we end up with the directory inode 259 still existing and with the directory inode 258 still named as 'A', instead of 'B'. In this scenario, file 'bar' should only have one hard link, located at directory inode 258, the directory inode 259 should not exist anymore and the name for directory inode 258 should be 'B'. This incorrect behaviour happens because when attempting to log the old parents of an inode, we skip any parents that no longer exist. Fix this by forcing a full commit if an old parent no longer exists. A test case for fstests follows soon. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 0eb9efd4c346..dd7a39797a32 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5583,9 +5583,33 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, dir_inode = btrfs_iget(fs_info->sb, &inode_key, root, NULL); - /* If parent inode was deleted, skip it. */ - if (IS_ERR(dir_inode)) - continue; + /* + * If the parent inode was deleted, return an error to + * fallback to a transaction commit. This is to prevent + * getting an inode that was moved from one parent A to + * a parent B, got its former parent A deleted and then + * it got fsync'ed, from existing at both parents after + * a log replay (and the old parent still existing). + * Example: + * + * mkdir /mnt/A + * mkdir /mnt/B + * touch /mnt/B/bar + * sync + * mv /mnt/B/bar /mnt/A/bar + * mv -T /mnt/A /mnt/B + * fsync /mnt/B/bar + * + * + * If we ignore the old parent B which got deleted, + * after a log replay we would have file bar linked + * at both parents and the old parent B would still + * exist. + */ + if (IS_ERR(dir_inode)) { + ret = PTR_ERR(dir_inode); + goto out; + } if (ctx) ctx->log_new_dentries = false; From 08374d8bc19ba9899257b6e7824588eb4e5ff0f5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 10 Aug 2018 10:20:26 +0800 Subject: [PATCH 0384/2608] btrfs: qgroup: Dirty all qgroups before rescan commit 9c7b0c2e8dbfbcd80a71e2cbfe02704f26c185c6 upstream. [BUG] In the following case, rescan won't zero out the number of qgroup 1/0: $ mkfs.btrfs -fq $DEV $ mount $DEV /mnt $ btrfs quota enable /mnt $ btrfs qgroup create 1/0 /mnt $ btrfs sub create /mnt/sub $ btrfs qgroup assign 0/257 1/0 /mnt $ dd if=/dev/urandom of=/mnt/sub/file bs=1k count=1000 $ btrfs sub snap /mnt/sub /mnt/snap $ btrfs quota rescan -w /mnt $ btrfs qgroup show -pcre /mnt qgroupid rfer excl max_rfer max_excl parent child -------- ---- ---- -------- -------- ------ ----- 0/5 16.00KiB 16.00KiB none none --- --- 0/257 1016.00KiB 16.00KiB none none 1/0 --- 0/258 1016.00KiB 16.00KiB none none --- --- 1/0 1016.00KiB 16.00KiB none none --- 0/257 So far so good, but: $ btrfs qgroup remove 0/257 1/0 /mnt WARNING: quotas may be inconsistent, rescan needed $ btrfs quota rescan -w /mnt $ btrfs qgroup show -pcre /mnt qgoupid rfer excl max_rfer max_excl parent child -------- ---- ---- -------- -------- ------ ----- 0/5 16.00KiB 16.00KiB none none --- --- 0/257 1016.00KiB 16.00KiB none none --- --- 0/258 1016.00KiB 16.00KiB none none --- --- 1/0 1016.00KiB 16.00KiB none none --- --- ^^^^^^^^^^ ^^^^^^^^ not cleared [CAUSE] Before rescan we call qgroup_rescan_zero_tracking() to zero out all qgroups' accounting numbers. However we don't mark all qgroups dirty, but rely on rescan to do so. If we have any high level qgroup without children, it won't be marked dirty during rescan, since we cannot reach that qgroup. This will cause QGROUP_INFO items of childless qgroups never get updated in the quota tree, thus their numbers will stay the same in "btrfs qgroup show" output. [FIX] Just mark all qgroups dirty in qgroup_rescan_zero_tracking(), so even if we have childless qgroups, their QGROUP_INFO items will still get updated during rescan. Reported-by: Misono Tomohiro CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo Reviewed-by: Misono Tomohiro Tested-by: Misono Tomohiro Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1b647fa93209..d6d6e9593e39 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2763,6 +2763,7 @@ qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) qgroup->rfer_cmpr = 0; qgroup->excl = 0; qgroup->excl_cmpr = 0; + qgroup_dirty(fs_info, qgroup); } spin_unlock(&fs_info->qgroup_lock); } From bdda31751a2074cbfcf64f52b59b165c06204ae5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 13 Oct 2018 00:37:25 +0100 Subject: [PATCH 0385/2608] Btrfs: fix null pointer dereference on compressed write path error commit 3527a018c00e5dbada2f9d7ed5576437b6dd5cfb upstream. At inode.c:compress_file_range(), under the "free_pages_out" label, we can end up dereferencing the "pages" pointer when it has a NULL value. This case happens when "start" has a value of 0 and we fail to allocate memory for the "pages" pointer. When that happens we jump to the "cont" label and then enter the "if (start == 0)" branch where we immediately call the cow_file_range_inline() function. If that function returns 0 (success creating an inline extent) or an error (like -ENOMEM for example) we jump to the "free_pages_out" label and then access "pages[i]" leading to a NULL pointer dereference, since "nr_pages" has a value greater than zero at that point. Fix this by setting "nr_pages" to 0 when we fail to allocate memory for the "pages" pointer. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201119 Fixes: 771ed689d2cd ("Btrfs: Optimize compressed writeback and reads") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Liu Bo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e8bfafa25a71..c24badcafcc0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -524,6 +524,7 @@ again: pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); if (!pages) { /* just bail out to the uncompressed code */ + nr_pages = 0; goto cont; } From 947a9b021baf0521a12b1895c81507d7f5c2ddbe Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Oct 2018 09:51:00 +0100 Subject: [PATCH 0386/2608] Btrfs: fix assertion on fsync of regular file when using no-holes feature commit 7ed586d0a8241e81d58c656c5b315f781fa6fc97 upstream. When using the NO_HOLES feature and logging a regular file, we were expecting that if we find an inline extent, that either its size in RAM (uncompressed and unenconded) matches the size of the file or if it does not, that it matches the sector size and it represents compressed data. This assertion does not cover a case where the length of the inline extent is smaller than the sector size and also smaller the file's size, such case is possible through fallocate. Example: $ mkfs.btrfs -f -O no-holes /dev/sdb $ mount /dev/sdb /mnt $ xfs_io -f -c "pwrite -S 0xb60 0 21" /mnt/foobar $ xfs_io -c "falloc 40 40" /mnt/foobar $ xfs_io -c "fsync" /mnt/foobar In the above example we trigger the assertion because the inline extent's length is 21 bytes while the file size is 80 bytes. The fallocate() call merely updated the file's size and did not touch the existing inline extent, as expected. So fix this by adjusting the assertion so that an inline extent length smaller than the file size is valid if the file size is smaller than the filesystem's sector size. A test case for fstests follows soon. Reported-by: Anatoly Trosinenko Fixes: a89ca6f24ffe ("Btrfs: fix fsync after truncate when no_holes feature is enabled") CC: stable@vger.kernel.org # 4.14+ Link: https://lore.kernel.org/linux-btrfs/CAE5jQCfRSBC7n4pUTFJcmHh109=gwyT9mFkCOL+NKfzswmR=_Q@mail.gmail.com/ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index dd7a39797a32..accc54efb66e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4641,7 +4641,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, ASSERT(len == i_size || (len == fs_info->sectorsize && btrfs_file_extent_compression(leaf, extent) != - BTRFS_COMPRESS_NONE)); + BTRFS_COMPRESS_NONE) || + (len < i_size && i_size < fs_info->sectorsize)); return 0; } From 65d41e98fd715f93b9d79e4583fdb5db7a176c44 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 12 Oct 2018 15:32:33 -0400 Subject: [PATCH 0387/2608] btrfs: set max_extent_size properly commit ad22cf6ea47fa20fbe11ac324a0a15c0a9a4a2a9 upstream. We can't use entry->bytes if our entry is a bitmap entry, we need to use entry->max_extent_size in that case. Fix up all the logic to make this consistent. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9b3fbe7c16a9..a8b4e4d7b82d 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1795,6 +1795,13 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl, return -1; } +static inline u64 get_max_extent_size(struct btrfs_free_space *entry) +{ + if (entry->bitmap) + return entry->max_extent_size; + return entry->bytes; +} + /* Cache the size of the max extent in bytes */ static struct btrfs_free_space * find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, @@ -1816,8 +1823,8 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, for (node = &entry->offset_index; node; node = rb_next(node)) { entry = rb_entry(node, struct btrfs_free_space, offset_index); if (entry->bytes < *bytes) { - if (entry->bytes > *max_extent_size) - *max_extent_size = entry->bytes; + *max_extent_size = max(get_max_extent_size(entry), + *max_extent_size); continue; } @@ -1835,8 +1842,8 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, } if (entry->bytes < *bytes + align_off) { - if (entry->bytes > *max_extent_size) - *max_extent_size = entry->bytes; + *max_extent_size = max(get_max_extent_size(entry), + *max_extent_size); continue; } @@ -1848,8 +1855,10 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, *offset = tmp; *bytes = size; return entry; - } else if (size > *max_extent_size) { - *max_extent_size = size; + } else { + *max_extent_size = + max(get_max_extent_size(entry), + *max_extent_size); } continue; } @@ -2709,8 +2718,8 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); if (err) { - if (search_bytes > *max_extent_size) - *max_extent_size = search_bytes; + *max_extent_size = max(get_max_extent_size(entry), + *max_extent_size); return 0; } @@ -2747,8 +2756,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, entry = rb_entry(node, struct btrfs_free_space, offset_index); while (1) { - if (entry->bytes < bytes && entry->bytes > *max_extent_size) - *max_extent_size = entry->bytes; + if (entry->bytes < bytes) + *max_extent_size = max(get_max_extent_size(entry), + *max_extent_size); if (entry->bytes < bytes || (!entry->bitmap && entry->offset < min_start)) { From 7509d4f9ad5ee2b6061f2073dfb5e3f48216c09f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 11 Oct 2018 15:54:09 -0400 Subject: [PATCH 0388/2608] btrfs: don't use ctl->free_space for max_extent_size commit fb5c39d7a887108087de6ff93d3f326b01b4ef41 upstream. max_extent_size is supposed to be the largest contiguous range for the space info, and ctl->free_space is the total free space in the block group. We need to keep track of these separately and _only_ use the max_free_space if we don't have a max_extent_size, as that means our original request was too large to search any of the block groups for and therefore wouldn't have a max_extent_size set. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e201415edb2d..2cb3569ac548 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7573,6 +7573,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *block_group = NULL; u64 search_start = 0; u64 max_extent_size = 0; + u64 max_free_space = 0; u64 empty_cluster = 0; struct btrfs_space_info *space_info; int loop = 0; @@ -7867,8 +7868,8 @@ unclustered_alloc: spin_lock(&ctl->tree_lock); if (ctl->free_space < num_bytes + empty_cluster + empty_size) { - if (ctl->free_space > max_extent_size) - max_extent_size = ctl->free_space; + max_free_space = max(max_free_space, + ctl->free_space); spin_unlock(&ctl->tree_lock); goto loop; } @@ -8037,6 +8038,8 @@ loop: } out: if (ret == -ENOSPC) { + if (!max_extent_size) + max_extent_size = max_free_space; spin_lock(&space_info->lock); space_info->max_extent_size = max_extent_size; spin_unlock(&space_info->lock); From 42551ab618977b1e8d0b296f937b1aa80e93cd39 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 11 Oct 2018 15:54:21 -0400 Subject: [PATCH 0389/2608] btrfs: only free reserved extent if we didn't insert it commit 49940bdd57779c78462da7aa5a8650b2fea8c2ff upstream. When we insert the file extent once the ordered extent completes we free the reserved extent reservation as it'll have been migrated to the bytes_used counter. However if we error out after this step we'll still clear the reserved extent reservation, resulting in a negative accounting of the reserved bytes for the block group and space info. Fix this by only doing the free if we didn't successfully insert a file extent for this extent. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Omar Sandoval Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c24badcafcc0..39edb4b71fa9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2966,6 +2966,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) bool truncated = false; bool range_locked = false; bool clear_new_delalloc_bytes = false; + bool clear_reserved_extent = true; if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) && @@ -3069,10 +3070,12 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) logical_len, logical_len, compress_type, 0, 0, BTRFS_FILE_EXTENT_REG); - if (!ret) + if (!ret) { + clear_reserved_extent = false; btrfs_release_delalloc_bytes(fs_info, ordered_extent->start, ordered_extent->disk_len); + } } unpin_extent_cache(&BTRFS_I(inode)->extent_tree, ordered_extent->file_offset, ordered_extent->len, @@ -3132,8 +3135,13 @@ out: * wrong we need to return the space for this ordered extent * back to the allocator. We only free the extent in the * truncated case if we didn't write out the extent at all. + * + * If we made it past insert_reserved_file_extent before we + * errored out then we don't need to do this as the accounting + * has already been done. */ if ((ret || !logical_len) && + clear_reserved_extent && !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) btrfs_free_reserved_extent(fs_info, From 87d7ea688393835feeeb5afc2fd4691e8a733da7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 11 Oct 2018 15:54:31 -0400 Subject: [PATCH 0390/2608] btrfs: don't run delayed_iputs in commit commit 30928e9baac238a7330085a1c5747f0b5df444b4 upstream. This could result in a really bad case where we do something like evict evict_refill_and_join btrfs_commit_transaction btrfs_run_delayed_iputs evict evict_refill_and_join btrfs_commit_transaction ... forever We have plenty of other places where we run delayed iputs that are much safer, let those do the work. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/transaction.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 27638b96079d..f74005ca8f08 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2307,15 +2307,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) kmem_cache_free(btrfs_trans_handle_cachep, trans); - /* - * If fs has been frozen, we can not handle delayed iputs, otherwise - * it'll result in deadlock about SB_FREEZE_FS. - */ - if (current != fs_info->transaction_kthread && - current != fs_info->cleaner_kthread && - !test_bit(BTRFS_FS_FROZEN, &fs_info->flags)) - btrfs_run_delayed_iputs(fs_info); - return ret; scrub_continue: From c8bac97db4639b958af13cc41ebcaea3297d4e72 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 12 Oct 2018 15:32:32 -0400 Subject: [PATCH 0391/2608] btrfs: move the dio_sem higher up the callchain commit c495144bc6962186feae31d687596d2472000e45 upstream. We're getting a lockdep splat because we take the dio_sem under the log_mutex. What we really need is to protect fsync() from logging an extent map for an extent we never waited on higher up, so just guard the whole thing with dio_sem. ====================================================== WARNING: possible circular locking dependency detected 4.18.0-rc4-xfstests-00025-g5de5edbaf1d4 #411 Not tainted ------------------------------------------------------ aio-dio-invalid/30928 is trying to acquire lock: 0000000092621cfd (&mm->mmap_sem){++++}, at: get_user_pages_unlocked+0x5a/0x1e0 but task is already holding lock: 00000000cefe6b35 (&ei->dio_sem){++++}, at: btrfs_direct_IO+0x3be/0x400 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #5 (&ei->dio_sem){++++}: lock_acquire+0xbd/0x220 down_write+0x51/0xb0 btrfs_log_changed_extents+0x80/0xa40 btrfs_log_inode+0xbaf/0x1000 btrfs_log_inode_parent+0x26f/0xa80 btrfs_log_dentry_safe+0x50/0x70 btrfs_sync_file+0x357/0x540 do_fsync+0x38/0x60 __ia32_sys_fdatasync+0x12/0x20 do_fast_syscall_32+0x9a/0x2f0 entry_SYSENTER_compat+0x84/0x96 -> #4 (&ei->log_mutex){+.+.}: lock_acquire+0xbd/0x220 __mutex_lock+0x86/0xa10 btrfs_record_unlink_dir+0x2a/0xa0 btrfs_unlink+0x5a/0xc0 vfs_unlink+0xb1/0x1a0 do_unlinkat+0x264/0x2b0 do_fast_syscall_32+0x9a/0x2f0 entry_SYSENTER_compat+0x84/0x96 -> #3 (sb_internal#2){.+.+}: lock_acquire+0xbd/0x220 __sb_start_write+0x14d/0x230 start_transaction+0x3e6/0x590 btrfs_evict_inode+0x475/0x640 evict+0xbf/0x1b0 btrfs_run_delayed_iputs+0x6c/0x90 cleaner_kthread+0x124/0x1a0 kthread+0x106/0x140 ret_from_fork+0x3a/0x50 -> #2 (&fs_info->cleaner_delayed_iput_mutex){+.+.}: lock_acquire+0xbd/0x220 __mutex_lock+0x86/0xa10 btrfs_alloc_data_chunk_ondemand+0x197/0x530 btrfs_check_data_free_space+0x4c/0x90 btrfs_delalloc_reserve_space+0x20/0x60 btrfs_page_mkwrite+0x87/0x520 do_page_mkwrite+0x31/0xa0 __handle_mm_fault+0x799/0xb00 handle_mm_fault+0x7c/0xe0 __do_page_fault+0x1d3/0x4a0 async_page_fault+0x1e/0x30 -> #1 (sb_pagefaults){.+.+}: lock_acquire+0xbd/0x220 __sb_start_write+0x14d/0x230 btrfs_page_mkwrite+0x6a/0x520 do_page_mkwrite+0x31/0xa0 __handle_mm_fault+0x799/0xb00 handle_mm_fault+0x7c/0xe0 __do_page_fault+0x1d3/0x4a0 async_page_fault+0x1e/0x30 -> #0 (&mm->mmap_sem){++++}: __lock_acquire+0x42e/0x7a0 lock_acquire+0xbd/0x220 down_read+0x48/0xb0 get_user_pages_unlocked+0x5a/0x1e0 get_user_pages_fast+0xa4/0x150 iov_iter_get_pages+0xc3/0x340 do_direct_IO+0xf93/0x1d70 __blockdev_direct_IO+0x32d/0x1c20 btrfs_direct_IO+0x227/0x400 generic_file_direct_write+0xcf/0x180 btrfs_file_write_iter+0x308/0x58c aio_write+0xf8/0x1d0 io_submit_one+0x3a9/0x620 __ia32_compat_sys_io_submit+0xb2/0x270 do_int80_syscall_32+0x5b/0x1a0 entry_INT80_compat+0x88/0xa0 other info that might help us debug this: Chain exists of: &mm->mmap_sem --> &ei->log_mutex --> &ei->dio_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&ei->dio_sem); lock(&ei->log_mutex); lock(&ei->dio_sem); lock(&mm->mmap_sem); *** DEADLOCK *** 1 lock held by aio-dio-invalid/30928: #0: 00000000cefe6b35 (&ei->dio_sem){++++}, at: btrfs_direct_IO+0x3be/0x400 stack backtrace: CPU: 0 PID: 30928 Comm: aio-dio-invalid Not tainted 4.18.0-rc4-xfstests-00025-g5de5edbaf1d4 #411 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 Call Trace: dump_stack+0x7c/0xbb print_circular_bug.isra.37+0x297/0x2a4 check_prev_add.constprop.45+0x781/0x7a0 ? __lock_acquire+0x42e/0x7a0 validate_chain.isra.41+0x7f0/0xb00 __lock_acquire+0x42e/0x7a0 lock_acquire+0xbd/0x220 ? get_user_pages_unlocked+0x5a/0x1e0 down_read+0x48/0xb0 ? get_user_pages_unlocked+0x5a/0x1e0 get_user_pages_unlocked+0x5a/0x1e0 get_user_pages_fast+0xa4/0x150 iov_iter_get_pages+0xc3/0x340 do_direct_IO+0xf93/0x1d70 ? __alloc_workqueue_key+0x358/0x490 ? __blockdev_direct_IO+0x14b/0x1c20 __blockdev_direct_IO+0x32d/0x1c20 ? btrfs_run_delalloc_work+0x40/0x40 ? can_nocow_extent+0x490/0x490 ? kvm_clock_read+0x1f/0x30 ? can_nocow_extent+0x490/0x490 ? btrfs_run_delalloc_work+0x40/0x40 btrfs_direct_IO+0x227/0x400 ? btrfs_run_delalloc_work+0x40/0x40 generic_file_direct_write+0xcf/0x180 btrfs_file_write_iter+0x308/0x58c aio_write+0xf8/0x1d0 ? kvm_clock_read+0x1f/0x30 ? __might_fault+0x3e/0x90 io_submit_one+0x3a9/0x620 ? io_submit_one+0xe5/0x620 __ia32_compat_sys_io_submit+0xb2/0x270 do_int80_syscall_32+0x5b/0x1a0 entry_INT80_compat+0x88/0xa0 CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/file.c | 12 ++++++++++++ fs/btrfs/tree-log.c | 2 -- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5690feded0de..57e25e83b81a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2078,6 +2078,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; inode_lock(inode); + + /* + * We take the dio_sem here because the tree log stuff can race with + * lockless dio writes and get an extent map logged for an extent we + * never waited on. We need it this high up for lockdep reasons. + */ + down_write(&BTRFS_I(inode)->dio_sem); + atomic_inc(&root->log_batch); full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); @@ -2129,6 +2137,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ret = start_ordered_ops(inode, start, end); } if (ret) { + up_write(&BTRFS_I(inode)->dio_sem); inode_unlock(inode); goto out; } @@ -2184,6 +2193,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * checked called fsync. */ ret = filemap_check_wb_err(inode->i_mapping, file->f_wb_err); + up_write(&BTRFS_I(inode)->dio_sem); inode_unlock(inode); goto out; } @@ -2208,6 +2218,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); + up_write(&BTRFS_I(inode)->dio_sem); inode_unlock(inode); goto out; } @@ -2229,6 +2240,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * file again, but that will end up using the synchronization * inside btrfs_sync_log to keep things safe. */ + up_write(&BTRFS_I(inode)->dio_sem); inode_unlock(inode); /* diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index accc54efb66e..6c4a2ea2ede9 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4362,7 +4362,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, INIT_LIST_HEAD(&extents); - down_write(&inode->dio_sem); write_lock(&tree->lock); test_gen = root->fs_info->last_trans_committed; logged_start = start; @@ -4443,7 +4442,6 @@ process: } WARN_ON(!list_empty(&extents)); write_unlock(&tree->lock); - up_write(&inode->dio_sem); btrfs_release_path(path); if (!ret) From bf005ed0fbddd6a5e3a2c1c3b222c4104a9819b5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 12 Oct 2018 13:02:48 +0100 Subject: [PATCH 0392/2608] Btrfs: fix use-after-free during inode eviction commit 421f0922a2cfb0c75acd9746454aaa576c711a65 upstream. At inode.c:evict_inode_truncate_pages(), when we iterate over the inode's extent states, we access an extent state record's "state" field after we unlocked the inode's io tree lock. This can lead to a use-after-free issue because after we unlock the io tree that extent state record might have been freed due to being merged into another adjacent extent state record (a previous inflight bio for a read operation finished in the meanwhile which unlocked a range in the io tree and cause a merge of extent state records, as explained in the comment before the while loop added in commit 6ca0709756710 ("Btrfs: fix hang during inode eviction due to concurrent readahead")). Fix this by keeping a copy of the extent state's flags in a local variable and using it after unlocking the io tree. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201189 Fixes: b9d0b38928e2 ("btrfs: Add handler for invalidate page") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 39edb4b71fa9..90568a21fa77 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5335,11 +5335,13 @@ static void evict_inode_truncate_pages(struct inode *inode) struct extent_state *cached_state = NULL; u64 start; u64 end; + unsigned state_flags; node = rb_first(&io_tree->state); state = rb_entry(node, struct extent_state, rb_node); start = state->start; end = state->end; + state_flags = state->state; spin_unlock(&io_tree->lock); lock_extent_bits(io_tree, start, end, &cached_state); @@ -5352,7 +5354,7 @@ static void evict_inode_truncate_pages(struct inode *inode) * * Note, end is the bytenr of last byte, so we need + 1 here. */ - if (state->state & EXTENT_DELALLOC) + if (state_flags & EXTENT_DELALLOC) btrfs_qgroup_free_data(inode, NULL, start, end - start + 1); clear_extent_bit(io_tree, start, end, From 3c2d1ef3dfe0d798266f3ae4d3b7e13aa4956f4d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 22 Oct 2018 10:43:06 +0100 Subject: [PATCH 0393/2608] Btrfs: fix use-after-free when dumping free space commit 9084cb6a24bf5838a665af92ded1af8363f9e563 upstream. We were iterating a block group's free space cache rbtree without locking first the lock that protects it (the free_space_ctl->free_space_offset rbtree is protected by the free_space_ctl->tree_lock spinlock). KASAN reported an use-after-free problem when iterating such a rbtree due to a concurrent rbtree delete: [ 9520.359168] ================================================================== [ 9520.359656] BUG: KASAN: use-after-free in rb_next+0x13/0x90 [ 9520.359949] Read of size 8 at addr ffff8800b7ada500 by task btrfs-transacti/1721 [ 9520.360357] [ 9520.360530] CPU: 4 PID: 1721 Comm: btrfs-transacti Tainted: G L 4.19.0-rc8-nbor #555 [ 9520.360990] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 9520.362682] Call Trace: [ 9520.362887] dump_stack+0xa4/0xf5 [ 9520.363146] print_address_description+0x78/0x280 [ 9520.363412] kasan_report+0x263/0x390 [ 9520.363650] ? rb_next+0x13/0x90 [ 9520.363873] __asan_load8+0x54/0x90 [ 9520.364102] rb_next+0x13/0x90 [ 9520.364380] btrfs_dump_free_space+0x146/0x160 [btrfs] [ 9520.364697] dump_space_info+0x2cd/0x310 [btrfs] [ 9520.364997] btrfs_reserve_extent+0x1ee/0x1f0 [btrfs] [ 9520.365310] __btrfs_prealloc_file_range+0x1cc/0x620 [btrfs] [ 9520.365646] ? btrfs_update_time+0x180/0x180 [btrfs] [ 9520.365923] ? _raw_spin_unlock+0x27/0x40 [ 9520.366204] ? btrfs_alloc_data_chunk_ondemand+0x2c0/0x5c0 [btrfs] [ 9520.366549] btrfs_prealloc_file_range_trans+0x23/0x30 [btrfs] [ 9520.366880] cache_save_setup+0x42e/0x580 [btrfs] [ 9520.367220] ? btrfs_check_data_free_space+0xd0/0xd0 [btrfs] [ 9520.367518] ? lock_downgrade+0x2f0/0x2f0 [ 9520.367799] ? btrfs_write_dirty_block_groups+0x11f/0x6e0 [btrfs] [ 9520.368104] ? kasan_check_read+0x11/0x20 [ 9520.368349] ? do_raw_spin_unlock+0xa8/0x140 [ 9520.368638] btrfs_write_dirty_block_groups+0x2af/0x6e0 [btrfs] [ 9520.368978] ? btrfs_start_dirty_block_groups+0x870/0x870 [btrfs] [ 9520.369282] ? do_raw_spin_unlock+0xa8/0x140 [ 9520.369534] ? _raw_spin_unlock+0x27/0x40 [ 9520.369811] ? btrfs_run_delayed_refs+0x1b8/0x230 [btrfs] [ 9520.370137] commit_cowonly_roots+0x4b9/0x610 [btrfs] [ 9520.370560] ? commit_fs_roots+0x350/0x350 [btrfs] [ 9520.370926] ? btrfs_run_delayed_refs+0x1b8/0x230 [btrfs] [ 9520.371285] btrfs_commit_transaction+0x5e5/0x10e0 [btrfs] [ 9520.371612] ? btrfs_apply_pending_changes+0x90/0x90 [btrfs] [ 9520.371943] ? start_transaction+0x168/0x6c0 [btrfs] [ 9520.372257] transaction_kthread+0x21c/0x240 [btrfs] [ 9520.372537] kthread+0x1d2/0x1f0 [ 9520.372793] ? btrfs_cleanup_transaction+0xb50/0xb50 [btrfs] [ 9520.373090] ? kthread_park+0xb0/0xb0 [ 9520.373329] ret_from_fork+0x3a/0x50 [ 9520.373567] [ 9520.373738] Allocated by task 1804: [ 9520.373974] kasan_kmalloc+0xff/0x180 [ 9520.374208] kasan_slab_alloc+0x11/0x20 [ 9520.374447] kmem_cache_alloc+0xfc/0x2d0 [ 9520.374731] __btrfs_add_free_space+0x40/0x580 [btrfs] [ 9520.375044] unpin_extent_range+0x4f7/0x7a0 [btrfs] [ 9520.375383] btrfs_finish_extent_commit+0x15f/0x4d0 [btrfs] [ 9520.375707] btrfs_commit_transaction+0xb06/0x10e0 [btrfs] [ 9520.376027] btrfs_alloc_data_chunk_ondemand+0x237/0x5c0 [btrfs] [ 9520.376365] btrfs_check_data_free_space+0x81/0xd0 [btrfs] [ 9520.376689] btrfs_delalloc_reserve_space+0x25/0x80 [btrfs] [ 9520.377018] btrfs_direct_IO+0x42e/0x6d0 [btrfs] [ 9520.377284] generic_file_direct_write+0x11e/0x220 [ 9520.377587] btrfs_file_write_iter+0x472/0xac0 [btrfs] [ 9520.377875] aio_write+0x25c/0x360 [ 9520.378106] io_submit_one+0xaa0/0xdc0 [ 9520.378343] __se_sys_io_submit+0xfa/0x2f0 [ 9520.378589] __x64_sys_io_submit+0x43/0x50 [ 9520.378840] do_syscall_64+0x7d/0x240 [ 9520.379081] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 9520.379387] [ 9520.379557] Freed by task 1802: [ 9520.379782] __kasan_slab_free+0x173/0x260 [ 9520.380028] kasan_slab_free+0xe/0x10 [ 9520.380262] kmem_cache_free+0xc1/0x2c0 [ 9520.380544] btrfs_find_space_for_alloc+0x4cd/0x4e0 [btrfs] [ 9520.380866] find_free_extent+0xa99/0x17e0 [btrfs] [ 9520.381166] btrfs_reserve_extent+0xd5/0x1f0 [btrfs] [ 9520.381474] btrfs_get_blocks_direct+0x60b/0xbd0 [btrfs] [ 9520.381761] __blockdev_direct_IO+0x10ee/0x58a1 [ 9520.382059] btrfs_direct_IO+0x25a/0x6d0 [btrfs] [ 9520.382321] generic_file_direct_write+0x11e/0x220 [ 9520.382623] btrfs_file_write_iter+0x472/0xac0 [btrfs] [ 9520.382904] aio_write+0x25c/0x360 [ 9520.383172] io_submit_one+0xaa0/0xdc0 [ 9520.383416] __se_sys_io_submit+0xfa/0x2f0 [ 9520.383678] __x64_sys_io_submit+0x43/0x50 [ 9520.383927] do_syscall_64+0x7d/0x240 [ 9520.384165] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 9520.384439] [ 9520.384610] The buggy address belongs to the object at ffff8800b7ada500 which belongs to the cache btrfs_free_space of size 72 [ 9520.385175] The buggy address is located 0 bytes inside of 72-byte region [ffff8800b7ada500, ffff8800b7ada548) [ 9520.385691] The buggy address belongs to the page: [ 9520.385957] page:ffffea0002deb680 count:1 mapcount:0 mapping:ffff880108a1d700 index:0x0 compound_mapcount: 0 [ 9520.388030] flags: 0x8100(slab|head) [ 9520.388281] raw: 0000000000008100 ffffea0002deb608 ffffea0002728808 ffff880108a1d700 [ 9520.388722] raw: 0000000000000000 0000000000130013 00000001ffffffff 0000000000000000 [ 9520.389169] page dumped because: kasan: bad access detected [ 9520.389473] [ 9520.389658] Memory state around the buggy address: [ 9520.389943] ffff8800b7ada400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 9520.390368] ffff8800b7ada480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 9520.390796] >ffff8800b7ada500: fb fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc [ 9520.391223] ^ [ 9520.391461] ffff8800b7ada580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 9520.391885] ffff8800b7ada600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 9520.392313] ================================================================== [ 9520.392772] BTRFS critical (device vdc): entry offset 2258497536, bytes 131072, bitmap no [ 9520.393247] BUG: unable to handle kernel NULL pointer dereference at 0000000000000011 [ 9520.393705] PGD 800000010dbab067 P4D 800000010dbab067 PUD 107551067 PMD 0 [ 9520.394059] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 9520.394378] CPU: 4 PID: 1721 Comm: btrfs-transacti Tainted: G B L 4.19.0-rc8-nbor #555 [ 9520.394858] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 9520.395350] RIP: 0010:rb_next+0x3c/0x90 [ 9520.396461] RSP: 0018:ffff8801074ff780 EFLAGS: 00010292 [ 9520.396762] RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffffffff81b5ac4c [ 9520.397115] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 0000000000000011 [ 9520.397468] RBP: ffff8801074ff7a0 R08: ffffed0021d64ccc R09: ffffed0021d64ccc [ 9520.397821] R10: 0000000000000001 R11: ffffed0021d64ccb R12: ffff8800b91e0000 [ 9520.398188] R13: ffff8800a3ceba48 R14: ffff8800b627bf80 R15: 0000000000020000 [ 9520.398555] FS: 0000000000000000(0000) GS:ffff88010eb00000(0000) knlGS:0000000000000000 [ 9520.399007] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 9520.399335] CR2: 0000000000000011 CR3: 0000000106b52000 CR4: 00000000000006a0 [ 9520.399679] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 9520.400023] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 9520.400400] Call Trace: [ 9520.400648] btrfs_dump_free_space+0x146/0x160 [btrfs] [ 9520.400974] dump_space_info+0x2cd/0x310 [btrfs] [ 9520.401287] btrfs_reserve_extent+0x1ee/0x1f0 [btrfs] [ 9520.401609] __btrfs_prealloc_file_range+0x1cc/0x620 [btrfs] [ 9520.401952] ? btrfs_update_time+0x180/0x180 [btrfs] [ 9520.402232] ? _raw_spin_unlock+0x27/0x40 [ 9520.402522] ? btrfs_alloc_data_chunk_ondemand+0x2c0/0x5c0 [btrfs] [ 9520.402882] btrfs_prealloc_file_range_trans+0x23/0x30 [btrfs] [ 9520.403261] cache_save_setup+0x42e/0x580 [btrfs] [ 9520.403570] ? btrfs_check_data_free_space+0xd0/0xd0 [btrfs] [ 9520.403871] ? lock_downgrade+0x2f0/0x2f0 [ 9520.404161] ? btrfs_write_dirty_block_groups+0x11f/0x6e0 [btrfs] [ 9520.404481] ? kasan_check_read+0x11/0x20 [ 9520.404732] ? do_raw_spin_unlock+0xa8/0x140 [ 9520.405026] btrfs_write_dirty_block_groups+0x2af/0x6e0 [btrfs] [ 9520.405375] ? btrfs_start_dirty_block_groups+0x870/0x870 [btrfs] [ 9520.405694] ? do_raw_spin_unlock+0xa8/0x140 [ 9520.405958] ? _raw_spin_unlock+0x27/0x40 [ 9520.406243] ? btrfs_run_delayed_refs+0x1b8/0x230 [btrfs] [ 9520.406574] commit_cowonly_roots+0x4b9/0x610 [btrfs] [ 9520.406899] ? commit_fs_roots+0x350/0x350 [btrfs] [ 9520.407253] ? btrfs_run_delayed_refs+0x1b8/0x230 [btrfs] [ 9520.407589] btrfs_commit_transaction+0x5e5/0x10e0 [btrfs] [ 9520.407925] ? btrfs_apply_pending_changes+0x90/0x90 [btrfs] [ 9520.408262] ? start_transaction+0x168/0x6c0 [btrfs] [ 9520.408582] transaction_kthread+0x21c/0x240 [btrfs] [ 9520.408870] kthread+0x1d2/0x1f0 [ 9520.409138] ? btrfs_cleanup_transaction+0xb50/0xb50 [btrfs] [ 9520.409440] ? kthread_park+0xb0/0xb0 [ 9520.409682] ret_from_fork+0x3a/0x50 [ 9520.410508] Dumping ftrace buffer: [ 9520.410764] (ftrace buffer empty) [ 9520.411007] CR2: 0000000000000011 [ 9520.411297] ---[ end trace 01a0863445cf360a ]--- [ 9520.411568] RIP: 0010:rb_next+0x3c/0x90 [ 9520.412644] RSP: 0018:ffff8801074ff780 EFLAGS: 00010292 [ 9520.412932] RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffffffff81b5ac4c [ 9520.413274] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 0000000000000011 [ 9520.413616] RBP: ffff8801074ff7a0 R08: ffffed0021d64ccc R09: ffffed0021d64ccc [ 9520.414007] R10: 0000000000000001 R11: ffffed0021d64ccb R12: ffff8800b91e0000 [ 9520.414349] R13: ffff8800a3ceba48 R14: ffff8800b627bf80 R15: 0000000000020000 [ 9520.416074] FS: 0000000000000000(0000) GS:ffff88010eb00000(0000) knlGS:0000000000000000 [ 9520.416536] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 9520.416848] CR2: 0000000000000011 CR3: 0000000106b52000 CR4: 00000000000006a0 [ 9520.418477] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 9520.418846] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 9520.419204] Kernel panic - not syncing: Fatal exception [ 9520.419666] Dumping ftrace buffer: [ 9520.419930] (ftrace buffer empty) [ 9520.420168] Kernel Offset: disabled [ 9520.420406] ---[ end Kernel panic - not syncing: Fatal exception ]--- Fix this by acquiring the respective lock before iterating the rbtree. Reported-by: Nikolay Borisov CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index a8b4e4d7b82d..9f31b81a5e27 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2482,6 +2482,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, struct rb_node *n; int count = 0; + spin_lock(&ctl->tree_lock); for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { info = rb_entry(n, struct btrfs_free_space, offset_index); if (info->bytes >= bytes && !block_group->ro) @@ -2490,6 +2491,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, info->offset, info->bytes, (info->bitmap) ? "yes" : "no"); } + spin_unlock(&ctl->tree_lock); btrfs_info(fs_info, "block group has cluster?: %s", list_empty(&block_group->cluster_list) ? "no" : "yes"); btrfs_info(fs_info, From fc8a236ee0580c920c48696f0df806affa892cbc Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 26 Mar 2018 23:59:00 +0100 Subject: [PATCH 0394/2608] Btrfs: fix fsync after hole punching when using no-holes feature commit 4ee3fad34a9cc2cf33303dfbd0cf554248651c86 upstream. When we have the no-holes mode enabled and fsync a file after punching a hole in it, we can end up not logging the whole hole range in the log tree. This happens if the file has extent items that span more than one leaf and we punch a hole that covers a range that starts in a leaf but does not go beyond the offset of the first extent in the next leaf. Example: $ mkfs.btrfs -f -O no-holes -n 65536 /dev/sdb $ mount /dev/sdb /mnt $ for ((i = 0; i <= 831; i++)); do offset=$((i * 2 * 256 * 1024)) xfs_io -f -c "pwrite -S 0xab -b 256K $offset 256K" \ /mnt/foobar >/dev/null done $ sync # We now have 2 leafs in our filesystem fs tree, the first leaf has an # item corresponding the extent at file offset 216530944 and the second # leaf has a first item corresponding to the extent at offset 217055232. # Now we punch a hole that partially covers the range of the extent at # offset 216530944 but does go beyond the offset 217055232. $ xfs_io -c "fpunch $((216530944 + 128 * 1024 - 4000)) 256K" /mnt/foobar $ xfs_io -c "fsync" /mnt/foobar # mount to replay the log $ mount /dev/sdb /mnt # Before this patch, only the subrange [216658016, 216662016[ (length of # 4000 bytes) was logged, leaving an incorrect file layout after log # replay. Fix this by checking if there is a hole between the last extent item that we processed and the first extent item in the next leaf, and if there is one, log an explicit hole extent item. Fixes: 16e7549f045d ("Btrfs: incompatible format change to remove hole extents") Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6c4a2ea2ede9..2109db196449 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3978,6 +3978,36 @@ fill_holes: break; *last_extent = extent_end; } + + /* + * Check if there is a hole between the last extent found in our leaf + * and the first extent in the next leaf. If there is one, we need to + * log an explicit hole so that at replay time we can punch the hole. + */ + if (ret == 0 && + key.objectid == btrfs_ino(inode) && + key.type == BTRFS_EXTENT_DATA_KEY && + i == btrfs_header_nritems(src_path->nodes[0])) { + ret = btrfs_next_leaf(inode->root, src_path); + need_find_last_extent = true; + if (ret > 0) { + ret = 0; + } else if (ret == 0) { + btrfs_item_key_to_cpu(src_path->nodes[0], &key, + src_path->slots[0]); + if (key.objectid == btrfs_ino(inode) && + key.type == BTRFS_EXTENT_DATA_KEY && + *last_extent < key.offset) { + const u64 len = key.offset - *last_extent; + + ret = btrfs_insert_file_extent(trans, log, + btrfs_ino(inode), + *last_extent, 0, + 0, len, 0, len, + 0, 0, 0); + } + } + } /* * Need to let the callers know we dropped the path so they should * re-search. From dfba4092167df3bb9b6abb0510ea4c680bb8ece5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Oct 2018 08:32:49 -0700 Subject: [PATCH 0395/2608] net: sched: Remove TCA_OPTIONS from policy commit e72bde6b66299602087c8c2350d36a525e75d06e upstream. Marco reported an error with hfsc: root@Calimero:~# tc qdisc add dev eth0 root handle 1:0 hfsc default 1 Error: Attribute failed policy validation. Apparently a few implementations pass TCA_OPTIONS as a binary instead of nested attribute, so drop TCA_OPTIONS from the policy. Fixes: 8b4c3cdd9dd8 ("net: sched: Add policy validation for tc attributes") Reported-by: Marco Berizzi Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_api.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 691ca96f7460..7b4270987ac1 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1218,7 +1218,6 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { [TCA_KIND] = { .type = NLA_STRING }, - [TCA_OPTIONS] = { .type = NLA_NESTED }, [TCA_RATE] = { .type = NLA_BINARY, .len = sizeof(struct tc_estimator) }, [TCA_STAB] = { .type = NLA_NESTED }, From 4bea15f7933dc87a091a7481590dd2ea1088b2bf Mon Sep 17 00:00:00 2001 From: Daniel Colascione Date: Fri, 12 Oct 2018 03:54:27 -0700 Subject: [PATCH 0396/2608] bpf: wait for running BPF programs when updating map-in-map commit 1ae80cf31938c8f77c37a29bbe29e7f1cd492be8 upstream. The map-in-map frequently serves as a mechanism for atomic snapshotting of state that a BPF program might record. The current implementation is dangerous to use in this way, however, since userspace has no way of knowing when all programs that might have retrieved the "old" value of the map may have completed. This change ensures that map update operations on map-in-map map types always wait for all references to the old map to drop before returning to userspace. Signed-off-by: Daniel Colascione Reviewed-by: Joel Fernandes (Google) Signed-off-by: Alexei Starovoitov [fengc@google.com: 4.14 backport: adjust context] Signed-off-by: Chenbo Feng Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/syscall.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ea22d0b6a9f0..5c9deed4524e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -519,6 +519,17 @@ err_put: return err; } +static void maybe_wait_bpf_programs(struct bpf_map *map) +{ + /* Wait for any running BPF programs to complete so that + * userspace, when we return to it, knows that all programs + * that could be running use the new map value. + */ + if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || + map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) + synchronize_rcu(); +} + #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags static int map_update_elem(union bpf_attr *attr) @@ -592,6 +603,7 @@ static int map_update_elem(union bpf_attr *attr) } __this_cpu_dec(bpf_prog_active); preempt_enable(); + maybe_wait_bpf_programs(map); if (!err) trace_bpf_map_update_elem(map, ufd, key, value); @@ -636,6 +648,7 @@ static int map_delete_elem(union bpf_attr *attr) rcu_read_unlock(); __this_cpu_dec(bpf_prog_active); preempt_enable(); + maybe_wait_bpf_programs(map); if (!err) trace_bpf_map_delete_elem(map, ufd, key); From c3cd0a4fe429089701a3480cbbf355495e0642c7 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Sun, 14 Oct 2018 17:05:07 -0700 Subject: [PATCH 0397/2608] MD: fix invalid stored role for a disk - try2 commit 9e753ba9b9b405e3902d9f08aec5f2ea58a0c317 upstream. Commit d595567dc4f0 (MD: fix invalid stored role for a disk) broke linear hotadd. Let's only fix the role for disks in raid1/10. Based on Guoqing's original patch. Reported-by: kernel test robot Cc: Gioh Kim Cc: Guoqing Jiang Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 4 ---- drivers/md/raid1.c | 1 + drivers/md/raid10.c | 1 + 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 4c1a6abed606..5599712d478e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1766,10 +1766,6 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) } else set_bit(In_sync, &rdev->flags); rdev->raid_disk = role; - if (role >= mddev->raid_disks) { - rdev->saved_raid_disk = -1; - rdev->raid_disk = -1; - } break; } if (sb->devflags & WriteMostly1) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 78d830763704..205f86f1a6cb 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1725,6 +1725,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) */ if (rdev->saved_raid_disk >= 0 && rdev->saved_raid_disk >= first && + rdev->saved_raid_disk < conf->raid_disks && conf->mirrors[rdev->saved_raid_disk].rdev == NULL) first = last = rdev->saved_raid_disk; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 927b60e9d3ca..e786546bf3b8 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1775,6 +1775,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) first = last = rdev->raid_disk; if (rdev->saved_raid_disk >= first && + rdev->saved_raid_disk < conf->geo.raid_disks && conf->mirrors[rdev->saved_raid_disk].rdev == NULL) mirror = rdev->saved_raid_disk; else From 2e390c487815669fb9bb35d7ea11883cc10a9b50 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 13 Nov 2018 11:15:18 -0800 Subject: [PATCH 0398/2608] Linux 4.14.81 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f4cad5e03561..2fe1424d61d2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 80 +SUBLEVEL = 81 EXTRAVERSION = NAME = Petit Gorille From 90c68f716d57886f4e6f3ff7c7c07014b4254745 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 13 Oct 2018 09:16:22 +0000 Subject: [PATCH 0399/2608] powerpc/traps: restore recoverability of machine_check interrupts [ Upstream commit daf00ae71dad8aa05965713c62558aeebf2df48e ] commit b96672dd840f ("powerpc: Machine check interrupt is a non- maskable interrupt") added a call to nmi_enter() at the beginning of machine check restart exception handler. Due to that, in_interrupt() always returns true regardless of the state before entering the exception, and die() panics even when the system was not already in interrupt. This patch calls nmi_exit() before calling die() in order to restore the interrupt state we had before calling nmi_enter() Fixes: b96672dd840f ("powerpc: Machine check interrupt is a non-maskable interrupt") Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/traps.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index ac2e5e56a9f0..a5f2b7593976 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -694,12 +694,17 @@ void machine_check_exception(struct pt_regs *regs) if (check_io_access(regs)) goto bail; - die("Machine check", regs, SIGBUS); - /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) nmi_panic(regs, "Unrecoverable Machine check"); + if (!nested) + nmi_exit(); + + die("Machine check", regs, SIGBUS); + + return; + bail: if (!nested) nmi_exit(); From 2b9aed7cb2e2dd48092ce1950a0c4bdd792a19d3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 29 Aug 2018 21:56:56 +1000 Subject: [PATCH 0400/2608] powerpc/64/module: REL32 relocation range check [ Upstream commit b851ba02a6f3075f0f99c60c4bc30a4af80cf428 ] The recent module relocation overflow crash demonstrated that we have no range checking on REL32 relative relocations. This patch implements a basic check, the same kernel that previously oopsed and rebooted now continues with some of these errors when loading the module: module_64: x_tables: REL32 527703503449812 out of range! Possibly other relocations (ADDR32, REL16, TOC16, etc.) should also have overflow checks. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/module_64.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 2a1b1273a312..4d8f6291b766 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -656,7 +656,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_PPC64_REL32: /* 32 bits relative (used by relative exception tables) */ - *(u32 *)location = value - (unsigned long)location; + /* Convert value to relative */ + value -= (unsigned long)location; + if (value + 0x80000000 > 0xffffffff) { + pr_err("%s: REL32 %li out of range!\n", + me->name, (long int)value); + return -ENOEXEC; + } + *(u32 *)location = value; break; case R_PPC64_TOCSAVE: From 0b1f1204554e4ade5af9ea8d59ce6b59ef95e894 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 15 Aug 2018 21:29:45 +1000 Subject: [PATCH 0401/2608] powerpc/mm: Fix page table dump to work on Radix [ Upstream commit 0d923962ab69c27cca664a2d535e90ef655110ca ] When we're running on Book3S with the Radix MMU enabled the page table dump currently prints the wrong addresses because it uses the wrong start address. Fix it to use PAGE_OFFSET rather than KERN_VIRT_START. Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/dump_linuxpagetables.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index c9282d27b203..31c1c61afaa4 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -422,12 +422,13 @@ static void walk_pagetables(struct pg_state *st) unsigned int i; unsigned long addr; + addr = st->start_address; + /* * Traverse the linux pagetable structure and dump pages that are in * the hash pagetable. */ - for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { - addr = KERN_VIRT_START + i * PGDIR_SIZE; + for (i = 0; i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { if (!pgd_none(*pgd) && !pgd_huge(*pgd)) /* pgd exists */ walk_pud(st, pgd, addr); @@ -476,9 +477,14 @@ static int ptdump_show(struct seq_file *m, void *v) { struct pg_state st = { .seq = m, - .start_address = KERN_VIRT_START, .marker = address_markers, }; + + if (radix_enabled()) + st.start_address = PAGE_OFFSET; + else + st.start_address = KERN_VIRT_START; + /* Traverse kernel page tables */ walk_pagetables(&st); note_page(&st, 0, 0, 0); From 211981e7e186dd3d9119a4f9f90633a677a2d58b Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Wed, 12 Sep 2018 11:23:20 +1000 Subject: [PATCH 0402/2608] powerpc/eeh: Fix possible null deref in eeh_dump_dev_log() [ Upstream commit f9bc28aedfb5bbd572d2d365f3095c1becd7209b ] If an error occurs during an unplug operation, it's possible for eeh_dump_dev_log() to be called when edev->pdn is null, which currently leads to dereferencing a null pointer. Handle this by skipping the error log for those devices. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/eeh.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 116000b45531..45322b37669a 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -169,6 +169,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) int n = 0, l = 0; char buffer[128]; + if (!pdn) { + pr_warn("EEH: Note: No error log for absent device.\n"); + return 0; + } + n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", pdn->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); From 8c5800cdb75c354ec0224f0fbab9e4c604b065c0 Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Mon, 8 Oct 2018 10:39:17 +0800 Subject: [PATCH 0403/2608] tty: check name length in tty_find_polling_driver() [ Upstream commit 33a1a7be198657c8ca26ad406c4d2a89b7162bcc ] The issue is found by a fuzzing test. If tty_find_polling_driver() recevies an incorrect input such as ',,' or '0b', the len becomes 0 and strncmp() always return 0. In this case, a null p->ops->poll_init() is called and it causes a kernel panic. Fix this by checking name length against zero in tty_find_polling_driver(). $echo ,, > /sys/module/kgdboc/parameters/kgdboc [ 20.804451] WARNING: CPU: 1 PID: 104 at drivers/tty/serial/serial_core.c:457 uart_get_baud_rate+0xe8/0x190 [ 20.804917] Modules linked in: [ 20.805317] CPU: 1 PID: 104 Comm: sh Not tainted 4.19.0-rc7ajb #8 [ 20.805469] Hardware name: linux,dummy-virt (DT) [ 20.805732] pstate: 20000005 (nzCv daif -PAN -UAO) [ 20.805895] pc : uart_get_baud_rate+0xe8/0x190 [ 20.806042] lr : uart_get_baud_rate+0xc0/0x190 [ 20.806476] sp : ffffffc06acff940 [ 20.806676] x29: ffffffc06acff940 x28: 0000000000002580 [ 20.806977] x27: 0000000000009600 x26: 0000000000009600 [ 20.807231] x25: ffffffc06acffad0 x24: 00000000ffffeff0 [ 20.807576] x23: 0000000000000001 x22: 0000000000000000 [ 20.807807] x21: 0000000000000001 x20: 0000000000000000 [ 20.808049] x19: ffffffc06acffac8 x18: 0000000000000000 [ 20.808277] x17: 0000000000000000 x16: 0000000000000000 [ 20.808520] x15: ffffffffffffffff x14: ffffffff00000000 [ 20.808757] x13: ffffffffffffffff x12: 0000000000000001 [ 20.809011] x11: 0101010101010101 x10: ffffff880d59ff5f [ 20.809292] x9 : ffffff880d59ff5e x8 : ffffffc06acffaf3 [ 20.809549] x7 : 0000000000000000 x6 : ffffff880d59ff5f [ 20.809803] x5 : 0000000080008001 x4 : 0000000000000003 [ 20.810056] x3 : ffffff900853e6b4 x2 : dfffff9000000000 [ 20.810693] x1 : ffffffc06acffad0 x0 : 0000000000000cb0 [ 20.811005] Call trace: [ 20.811214] uart_get_baud_rate+0xe8/0x190 [ 20.811479] serial8250_do_set_termios+0xe0/0x6f4 [ 20.811719] serial8250_set_termios+0x48/0x54 [ 20.811928] uart_set_options+0x138/0x1bc [ 20.812129] uart_poll_init+0x114/0x16c [ 20.812330] tty_find_polling_driver+0x158/0x200 [ 20.812545] configure_kgdboc+0xbc/0x1bc [ 20.812745] param_set_kgdboc_var+0xb8/0x150 [ 20.812960] param_attr_store+0xbc/0x150 [ 20.813160] module_attr_store+0x40/0x58 [ 20.813364] sysfs_kf_write+0x8c/0xa8 [ 20.813563] kernfs_fop_write+0x154/0x290 [ 20.813764] vfs_write+0xf0/0x278 [ 20.813951] __arm64_sys_write+0x84/0xf4 [ 20.814400] el0_svc_common+0xf4/0x1dc [ 20.814616] el0_svc_handler+0x98/0xbc [ 20.814804] el0_svc+0x8/0xc [ 20.822005] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 20.826913] Mem abort info: [ 20.827103] ESR = 0x84000006 [ 20.827352] Exception class = IABT (current EL), IL = 16 bits [ 20.827655] SET = 0, FnV = 0 [ 20.827855] EA = 0, S1PTW = 0 [ 20.828135] user pgtable: 4k pages, 39-bit VAs, pgdp = (____ptrval____) [ 20.828484] [0000000000000000] pgd=00000000aadee003, pud=00000000aadee003, pmd=0000000000000000 [ 20.829195] Internal error: Oops: 84000006 [#1] SMP [ 20.829564] Modules linked in: [ 20.829890] CPU: 1 PID: 104 Comm: sh Tainted: G W 4.19.0-rc7ajb #8 [ 20.830545] Hardware name: linux,dummy-virt (DT) [ 20.830829] pstate: 60000085 (nZCv daIf -PAN -UAO) [ 20.831174] pc : (null) [ 20.831457] lr : serial8250_do_set_termios+0x358/0x6f4 [ 20.831727] sp : ffffffc06acff9b0 [ 20.831936] x29: ffffffc06acff9b0 x28: ffffff9008d7c000 [ 20.832267] x27: ffffff900969e16f x26: 0000000000000000 [ 20.832589] x25: ffffff900969dfb0 x24: 0000000000000000 [ 20.832906] x23: ffffffc06acffad0 x22: ffffff900969e160 [ 20.833232] x21: 0000000000000000 x20: ffffffc06acffac8 [ 20.833559] x19: ffffff900969df90 x18: 0000000000000000 [ 20.833878] x17: 0000000000000000 x16: 0000000000000000 [ 20.834491] x15: ffffffffffffffff x14: ffffffff00000000 [ 20.834821] x13: ffffffffffffffff x12: 0000000000000001 [ 20.835143] x11: 0101010101010101 x10: ffffff880d59ff5f [ 20.835467] x9 : ffffff880d59ff5e x8 : ffffffc06acffaf3 [ 20.835790] x7 : 0000000000000000 x6 : ffffff880d59ff5f [ 20.836111] x5 : c06419717c314100 x4 : 0000000000000007 [ 20.836419] x3 : 0000000000000000 x2 : 0000000000000000 [ 20.836732] x1 : 0000000000000001 x0 : ffffff900969df90 [ 20.837100] Process sh (pid: 104, stack limit = 0x(____ptrval____)) [ 20.837396] Call trace: [ 20.837566] (null) [ 20.837816] serial8250_set_termios+0x48/0x54 [ 20.838089] uart_set_options+0x138/0x1bc [ 20.838570] uart_poll_init+0x114/0x16c [ 20.838834] tty_find_polling_driver+0x158/0x200 [ 20.839119] configure_kgdboc+0xbc/0x1bc [ 20.839380] param_set_kgdboc_var+0xb8/0x150 [ 20.839658] param_attr_store+0xbc/0x150 [ 20.839920] module_attr_store+0x40/0x58 [ 20.840183] sysfs_kf_write+0x8c/0xa8 [ 20.840183] sysfs_kf_write+0x8c/0xa8 [ 20.840440] kernfs_fop_write+0x154/0x290 [ 20.840702] vfs_write+0xf0/0x278 [ 20.840942] __arm64_sys_write+0x84/0xf4 [ 20.841209] el0_svc_common+0xf4/0x1dc [ 20.841471] el0_svc_handler+0x98/0xbc [ 20.841713] el0_svc+0x8/0xc [ 20.842057] Code: bad PC value [ 20.842764] ---[ end trace a8835d7de79aaadf ]--- [ 20.843134] Kernel panic - not syncing: Fatal exception [ 20.843515] SMP: stopping secondary CPUs [ 20.844289] Kernel Offset: disabled [ 20.844634] CPU features: 0x0,21806002 [ 20.844857] Memory Limit: none [ 20.845172] ---[ end Kernel panic - not syncing: Fatal exception ]--- Signed-off-by: Miles Chen Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 8d65b2f9ee80..83376caa571b 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -407,7 +407,7 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line) mutex_lock(&tty_mutex); /* Search through the tty devices to look for a match */ list_for_each_entry(p, &tty_drivers, tty_drivers) { - if (strncmp(name, p->name, len) != 0) + if (!len || strncmp(name, p->name, len) != 0) continue; stp = str; if (*stp == ',') From 6c2449a04c43faf8b6ca52cb11617c77fd3ee4dc Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 10 Sep 2018 14:45:23 -0300 Subject: [PATCH 0404/2608] ARM: imx_v6_v7_defconfig: Select CONFIG_TMPFS_POSIX_ACL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 35d3cbe84544da74e39e1cec01374092467e3119 ] Andreas Müller reports: "Fixes: | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[220]: Failed to apply ACL on /dev/v4l-subdev0: Operation not supported | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[224]: Failed to apply ACL on /dev/v4l-subdev1: Operation not supported | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[215]: Failed to apply ACL on /dev/v4l-subdev10: Operation not supported | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[228]: Failed to apply ACL on /dev/v4l-subdev2: Operation not supported | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[232]: Failed to apply ACL on /dev/v4l-subdev5: Operation not supported | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[217]: Failed to apply ACL on /dev/v4l-subdev11: Operation not supported | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[214]: Failed to apply ACL on /dev/dri/card1: Operation not supported | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[216]: Failed to apply ACL on /dev/v4l-subdev8: Operation not supported | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[226]: Failed to apply ACL on /dev/v4l-subdev9: Operation not supported and nasty follow-ups: Starting weston from sddm as unpriviledged user fails with some hints on missing access rights." Select the CONFIG_TMPFS_POSIX_ACL option to fix these issues. Reported-by: Andreas Müller Signed-off-by: Fabio Estevam Acked-by: Otavio Salvador Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/configs/imx_v6_v7_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 32acac9ab81a..9c795ceedd5b 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -383,6 +383,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=y +CONFIG_TMPFS_POSIX_ACL=y CONFIG_JFFS2_FS=y CONFIG_UBIFS_FS=y CONFIG_NFS_FS=y From 3374b0b15ec7c25aa98274aa95dbdea6d6d2af88 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Mon, 1 Oct 2018 16:21:51 +1000 Subject: [PATCH 0405/2608] powerpc/nohash: fix undefined behaviour when testing page size support [ Upstream commit f5e284803a7206d43e26f9ffcae5de9626d95e37 ] When enumerating page size definitions to check hardware support, we construct a constant which is (1U << (def->shift - 10)). However, the array of page size definitions is only initalised for various MMU_PAGE_* constants, so it contains a number of 0-initialised elements with def->shift == 0. This means we end up shifting by a very large number, which gives the following UBSan splat: ================================================================================ UBSAN: Undefined behaviour in /home/dja/dev/linux/linux/arch/powerpc/mm/tlb_nohash.c:506:21 shift exponent 4294967286 is too large for 32-bit type 'unsigned int' CPU: 0 PID: 0 Comm: swapper Not tainted 4.19.0-rc3-00045-ga604f927b012-dirty #6 Call Trace: [c00000000101bc20] [c000000000a13d54] .dump_stack+0xa8/0xec (unreliable) [c00000000101bcb0] [c0000000004f20a8] .ubsan_epilogue+0x18/0x64 [c00000000101bd30] [c0000000004f2b10] .__ubsan_handle_shift_out_of_bounds+0x110/0x1a4 [c00000000101be20] [c000000000d21760] .early_init_mmu+0x1b4/0x5a0 [c00000000101bf10] [c000000000d1ba28] .early_setup+0x100/0x130 [c00000000101bf90] [c000000000000528] start_here_multiplatform+0x68/0x80 ================================================================================ Fix this by first checking if the element exists (shift != 0) before constructing the constant. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/tlb_nohash.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index bfc4a0869609..2ae18e90e0ba 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -500,6 +500,9 @@ static void setup_page_sizes(void) for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { struct mmu_psize_def *def = &mmu_psize_defs[psize]; + if (!def->shift) + continue; + if (tlb1ps & (1U << (def->shift - 10))) { def->flags |= MMU_PAGE_SIZE_DIRECT; From 4cb592436db9ff3ff394e6d8d951545a15fc66e8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 13 Aug 2018 13:19:52 +0000 Subject: [PATCH 0406/2608] powerpc/mm: Don't report hugepage tables as memory leaks when using kmemleak [ Upstream commit 803d690e68f0c5230183f1a42c7d50a41d16e380 ] When a process allocates a hugepage, the following leak is reported by kmemleak. This is a false positive which is due to the pointer to the table being stored in the PGD as physical memory address and not virtual memory pointer. unreferenced object 0xc30f8200 (size 512): comm "mmap", pid 374, jiffies 4872494 (age 627.630s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] huge_pte_alloc+0xdc/0x1f8 [<9e0df1e1>] hugetlb_fault+0x560/0x8f8 [<7938ec6c>] follow_hugetlb_page+0x14c/0x44c [] __get_user_pages+0x1c4/0x3dc [] __mm_populate+0xac/0x140 [<3215421e>] vm_mmap_pgoff+0xb4/0xb8 [] ksys_mmap_pgoff+0xcc/0x1fc [<4fcd760f>] ret_from_syscall+0x0/0x38 See commit a984506c542e2 ("powerpc/mm: Don't report PUDs as memory leaks when using kmemleak") for detailed explanation. To fix that, this patch tells kmemleak to ignore the allocated hugepage table. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/hugetlbpage.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 4c9e5f9c7a44..e2d929ddad7f 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -110,6 +111,8 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, for (i = i - 1 ; i >= 0; i--, hpdp--) *hpdp = __hugepd(0); kmem_cache_free(cachep, new); + } else { + kmemleak_ignore(new); } spin_unlock(&mm->page_table_lock); return 0; From 547d528ea395d104dcb31a18b2d51a285e0973aa Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 26 Sep 2018 12:11:27 +0300 Subject: [PATCH 0407/2608] drm/omap: fix memory barrier bug in DMM driver [ Upstream commit 538f66ba204944470a653a4cccc5f8befdf97c22 ] A DMM timeout "timed out waiting for done" has been observed on DRA7 devices. The timeout happens rarely, and only when the system is under heavy load. Debugging showed that the timeout can be made to happen much more frequently by optimizing the DMM driver, so that there's almost no code between writing the last DMM descriptors to RAM, and writing to DMM register which starts the DMM transaction. The current theory is that a wmb() does not properly ensure that the data written to RAM is observable by all the components in the system. This DMM timeout has caused interesting (and rare) bugs as the error handling was not functioning properly (the error handling has been fixed in previous commits): * If a DMM timeout happened when a GEM buffer was being pinned for display on the screen, a timeout error would be shown, but the driver would continue programming DSS HW with broken buffer, leading to SYNCLOST floods and possible crashes. * If a DMM timeout happened when other user (say, video decoder) was pinning a GEM buffer, a timeout would be shown but if the user handled the error properly, no other issues followed. * If a DMM timeout happened when a GEM buffer was being released, the driver does not even notice the error, leading to crashes or hang later. This patch adds wmb() and readl() calls after the last bit is written to RAM, which should ensure that the execution proceeds only after the data is actually in RAM, and thus observable by DMM. The read-back should not be needed. Further study is required to understand if DMM is somehow special case and read-back is ok, or if DRA7's memory barriers do not work correctly. Signed-off-by: Tomi Valkeinen Signed-off-by: Peter Ujfalusi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/omapdrm/omap_dmm_tiler.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index df05fe53c399..32901c6fe3df 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -273,6 +273,17 @@ static int dmm_txn_commit(struct dmm_txn *txn, bool wait) } txn->last_pat->next_pa = 0; + /* ensure that the written descriptors are visible to DMM */ + wmb(); + + /* + * NOTE: the wmb() above should be enough, but there seems to be a bug + * in OMAP's memory barrier implementation, which in some rare cases may + * cause the writes not to be observable after wmb(). + */ + + /* read back to ensure the data is in RAM */ + readl(&txn->last_pat->next_pa); /* write to PAT_DESCR to clear out any pending transaction */ dmm_write(dmm, 0x0, reg[PAT_DESCR][engine->id]); From 18490ea7607c1c136b385075219e65760a656f05 Mon Sep 17 00:00:00 2001 From: John Garry Date: Sat, 22 Sep 2018 01:25:25 +0800 Subject: [PATCH 0408/2608] drm/hisilicon: hibmc: Do not carry error code in HiBMC framebuffer pointer [ Upstream commit 331d880b35a76b5de0eec8cbcecbf615d758a5f9 ] In hibmc_drm_fb_create(), when the call to hibmc_framebuffer_init() fails with error, do not store the error code in the HiBMC device frame-buffer pointer, as this will be later checked for non-zero value in hibmc_fbdev_destroy() when our intention is to check for a valid function pointer. This fixes the following crash: [ 9.699791] Unable to handle kernel NULL pointer dereference at virtual address 000000000000001a [ 9.708672] Mem abort info: [ 9.711489] ESR = 0x96000004 [ 9.714570] Exception class = DABT (current EL), IL = 32 bits [ 9.720551] SET = 0, FnV = 0 [ 9.723631] EA = 0, S1PTW = 0 [ 9.726799] Data abort info: [ 9.729702] ISV = 0, ISS = 0x00000004 [ 9.733573] CM = 0, WnR = 0 [ 9.736566] [000000000000001a] user address but active_mm is swapper [ 9.742987] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 9.748614] Modules linked in: [ 9.751694] CPU: 16 PID: 293 Comm: kworker/16:1 Tainted: G W 4.19.0-rc4-next-20180920-00001-g9b0012c #322 [ 9.762681] Hardware name: Huawei Taishan 2280 /D05, BIOS Hisilicon D05 IT21 Nemo 2.0 RC0 04/18/2018 [ 9.771915] Workqueue: events work_for_cpu_fn [ 9.776312] pstate: 60000005 (nZCv daif -PAN -UAO) [ 9.781150] pc : drm_mode_object_put+0x0/0x20 [ 9.785547] lr : hibmc_fbdev_fini+0x40/0x58 [ 9.789767] sp : ffff00000af1bcf0 [ 9.793108] x29: ffff00000af1bcf0 x28: 0000000000000000 [ 9.798473] x27: 0000000000000000 x26: ffff000008f66630 [ 9.803838] x25: 0000000000000000 x24: ffff0000095abb98 [ 9.809203] x23: ffff8017db92fe00 x22: ffff8017d2b13000 [ 9.814568] x21: ffffffffffffffea x20: ffff8017d2f80018 [ 9.819933] x19: ffff8017d28a0018 x18: ffffffffffffffff [ 9.825297] x17: 0000000000000000 x16: 0000000000000000 [ 9.830662] x15: ffff0000092296c8 x14: ffff00008939970f [ 9.836026] x13: ffff00000939971d x12: ffff000009229940 [ 9.841391] x11: ffff0000085f8fc0 x10: ffff00000af1b9a0 [ 9.846756] x9 : 000000000000000d x8 : 6620657a696c6169 [ 9.852121] x7 : ffff8017d3340580 x6 : ffff8017d4168000 [ 9.857486] x5 : 0000000000000000 x4 : ffff8017db92fb20 [ 9.862850] x3 : 0000000000002690 x2 : ffff8017d3340480 [ 9.868214] x1 : 0000000000000028 x0 : 0000000000000002 [ 9.873580] Process kworker/16:1 (pid: 293, stack limit = 0x(____ptrval____)) [ 9.880788] Call trace: [ 9.883252] drm_mode_object_put+0x0/0x20 [ 9.887297] hibmc_unload+0x1c/0x80 [ 9.890815] hibmc_pci_probe+0x170/0x3c8 [ 9.894773] local_pci_probe+0x3c/0xb0 [ 9.898555] work_for_cpu_fn+0x18/0x28 [ 9.902337] process_one_work+0x1e0/0x318 [ 9.906382] worker_thread+0x228/0x450 [ 9.910164] kthread+0x128/0x130 [ 9.913418] ret_from_fork+0x10/0x18 [ 9.917024] Code: a94153f3 a8c27bfd d65f03c0 d503201f (f9400c01) [ 9.923180] ---[ end trace 2695ffa0af5be375 ]--- Fixes: d1667b86795a ("drm/hisilicon/hibmc: Add support for frame buffer") Signed-off-by: John Garry Reviewed-by: Xinliang Liu Signed-off-by: Xinliang Liu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c index b92595c477ef..8bd29075ae4e 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c @@ -122,6 +122,7 @@ static int hibmc_drm_fb_create(struct drm_fb_helper *helper, hi_fbdev->fb = hibmc_framebuffer_init(priv->dev, &mode_cmd, gobj); if (IS_ERR(hi_fbdev->fb)) { ret = PTR_ERR(hi_fbdev->fb); + hi_fbdev->fb = NULL; DRM_ERROR("failed to initialize framebuffer: %d\n", ret); goto out_release_fbi; } From 27f612940d5cf7b5d4bae316c4dd6dfead6a6ec6 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Sun, 9 Sep 2018 12:02:32 -0400 Subject: [PATCH 0409/2608] media: pci: cx23885: handle adding to list failure [ Upstream commit c5d59528e24ad22500347b199d52b9368e686a42 ] altera_hw_filt_init() which calls append_internal() assumes that the node was successfully linked in while in fact it can silently fail. So the call-site needs to set return to -ENOMEM on append_internal() returning NULL and exit through the err path. Fixes: 349bcf02e361 ("[media] Altera FPGA based CI driver module") Signed-off-by: Nicholas Mc Guire Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/cx23885/altera-ci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/media/pci/cx23885/altera-ci.c b/drivers/media/pci/cx23885/altera-ci.c index 5c94e312cba3..f77254cc16bf 100644 --- a/drivers/media/pci/cx23885/altera-ci.c +++ b/drivers/media/pci/cx23885/altera-ci.c @@ -665,6 +665,10 @@ static int altera_hw_filt_init(struct altera_ci_config *config, int hw_filt_nr) } temp_int = append_internal(inter); + if (!temp_int) { + ret = -ENOMEM; + goto err; + } inter->filts_used = 1; inter->dev = config->dev; inter->fpga_rw = config->fpga_rw; @@ -699,6 +703,7 @@ err: __func__, ret); kfree(pid_filt); + kfree(inter); return ret; } @@ -733,6 +738,10 @@ int altera_ci_init(struct altera_ci_config *config, int ci_nr) } temp_int = append_internal(inter); + if (!temp_int) { + ret = -ENOMEM; + goto err; + } inter->cis_used = 1; inter->dev = config->dev; inter->fpga_rw = config->fpga_rw; @@ -801,6 +810,7 @@ err: ci_dbg_print("%s: Cannot initialize CI: Error %d.\n", __func__, ret); kfree(state); + kfree(inter); return ret; } From e56c482e4e7f4d39a851ebceb7652be59d248208 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 1 Aug 2018 10:18:04 -0400 Subject: [PATCH 0410/2608] media: coda: don't overwrite h.264 profile_idc on decoder instance [ Upstream commit 1f32061e843205f6fe8404d5100d5adcec334e75 ] On a decoder instance, after the profile has been parsed from the stream __v4l2_ctrl_s_ctrl() is called to notify userspace about changes in the read-only profile control. This ends up calling back into the CODA driver where a missing check on the s_ctrl caused the profile information that has just been parsed from the stream to be overwritten with the default baseline profile. Later on the driver fails to enable frame reordering, based on the wrong profile information. Fixes: 347de126d1da (media: coda: add read-only h.264 decoder profile/level controls) Signed-off-by: Lucas Stach Reviewed-by: Philipp Zabel Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/coda/coda-common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c index 15eb5dc4dff9..99d138d3f87f 100644 --- a/drivers/media/platform/coda/coda-common.c +++ b/drivers/media/platform/coda/coda-common.c @@ -1686,7 +1686,8 @@ static int coda_s_ctrl(struct v4l2_ctrl *ctrl) break; case V4L2_CID_MPEG_VIDEO_H264_PROFILE: /* TODO: switch between baseline and constrained baseline */ - ctx->params.h264_profile_idc = 66; + if (ctx->inst_type == CODA_INST_ENCODER) + ctx->params.h264_profile_idc = 66; break; case V4L2_CID_MPEG_VIDEO_H264_LEVEL: /* nothing to do, this is set by the encoder */ From 806be82cd255646c8fa872529447be1289c12c6c Mon Sep 17 00:00:00 2001 From: Dengcheng Zhu Date: Tue, 11 Sep 2018 14:49:20 -0700 Subject: [PATCH 0411/2608] MIPS: kexec: Mark CPU offline before disabling local IRQ [ Upstream commit dc57aaf95a516f70e2d527d8287a0332c481a226 ] After changing CPU online status, it will not be sent any IPIs such as in __flush_cache_all() on software coherency systems. Do this before disabling local IRQ. Signed-off-by: Dengcheng Zhu Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20571/ Cc: pburton@wavecomp.com Cc: ralf@linux-mips.org Cc: linux-mips@linux-mips.org Cc: rachel.mozes@intel.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/crash.c | 3 +++ arch/mips/kernel/machine_kexec.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c index d455363d51c3..4c07a43a3242 100644 --- a/arch/mips/kernel/crash.c +++ b/arch/mips/kernel/crash.c @@ -36,6 +36,9 @@ static void crash_shutdown_secondary(void *passed_regs) if (!cpu_online(cpu)) return; + /* We won't be sent IPIs any more. */ + set_cpu_online(cpu, false); + local_irq_disable(); if (!cpumask_test_cpu(cpu, &cpus_in_crash)) crash_save_cpu(regs, cpu); diff --git a/arch/mips/kernel/machine_kexec.c b/arch/mips/kernel/machine_kexec.c index 8b574bcd39ba..4b3726e4fe3a 100644 --- a/arch/mips/kernel/machine_kexec.c +++ b/arch/mips/kernel/machine_kexec.c @@ -118,6 +118,9 @@ machine_kexec(struct kimage *image) *ptr = (unsigned long) phys_to_virt(*ptr); } + /* Mark offline BEFORE disabling local irq. */ + set_cpu_online(smp_processor_id(), false); + /* * we do not want to be bothered. */ From f386e1e50e35ea51c6bd977eb4808c2f429a54df Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 14 Sep 2018 13:36:47 +0930 Subject: [PATCH 0412/2608] powerpc/boot: Ensure _zimage_start is a weak symbol [ Upstream commit ee9d21b3b3583712029a0db65a4b7c081d08d3b3 ] When building with clang crt0's _zimage_start is not marked weak, which breaks the build when linking the kernel image: $ objdump -t arch/powerpc/boot/crt0.o |grep _zimage_start$ 0000000000000058 g .text 0000000000000000 _zimage_start ld: arch/powerpc/boot/wrapper.a(crt0.o): in function '_zimage_start': (.text+0x58): multiple definition of '_zimage_start'; arch/powerpc/boot/pseries-head.o:(.text+0x0): first defined here Clang requires the .weak directive to appear after the symbol is declared. The binutils manual says: This directive sets the weak attribute on the comma separated list of symbol names. If the symbols do not already exist, they will be created. So it appears this is different with clang. The only reference I could see for this was an OpenBSD mailing list post[1]. Changing it to be after the declaration fixes building with Clang, and still works with GCC. $ objdump -t arch/powerpc/boot/crt0.o |grep _zimage_start$ 0000000000000058 w .text 0000000000000000 _zimage_start Reported to clang as https://bugs.llvm.org/show_bug.cgi?id=38921 [1] https://groups.google.com/forum/#!topic/fa.openbsd.tech/PAgKKen2YCY Signed-off-by: Joel Stanley Reviewed-by: Nick Desaulniers Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/boot/crt0.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index dcf2f15e6797..32dfe6d083f3 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -47,8 +47,10 @@ p_end: .long _end p_pstack: .long _platform_stack_top #endif - .weak _zimage_start .globl _zimage_start + /* Clang appears to require the .weak directive to be after the symbol + * is defined. See https://bugs.llvm.org/show_bug.cgi?id=38921 */ + .weak _zimage_start _zimage_start: .globl _zimage_start_lib _zimage_start_lib: From 983721397fe0a6b2a5937da74b43d4bc0bbb13fe Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Fri, 17 Aug 2018 14:25:01 +1000 Subject: [PATCH 0413/2608] powerpc/memtrace: Remove memory in chunks [ Upstream commit 3f7daf3d7582dc6628ac40a9045dd1bbd80c5f35 ] When hot-removing memory release_mem_region_adjustable() splits iomem resources if they are not the exact size of the memory being hot-deleted. Adding this memory back to the kernel adds a new resource. Eg a node has memory 0x0 - 0xfffffffff. Hot-removing 1GB from 0xf40000000 results in the single resource 0x0-0xfffffffff being split into two resources: 0x0-0xf3fffffff and 0xf80000000-0xfffffffff. When we hot-add the memory back we now have three resources: 0x0-0xf3fffffff, 0xf40000000-0xf7fffffff, and 0xf80000000-0xfffffffff. This is an issue if we try to remove some memory that overlaps resources. Eg when trying to remove 2GB at address 0xf40000000, release_mem_region_adjustable() fails as it expects the chunk of memory to be within the boundaries of a single resource. We then get the warning: "Unable to release resource" and attempting to use memtrace again gives us this error: "bash: echo: write error: Resource temporarily unavailable" This patch makes memtrace remove memory in chunks that are always the same size from an address that is always equal to end_of_memory - n*size, for some n. So hotremoving and hotadding memory of different sizes will now not attempt to remove memory that spans multiple resources. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/memtrace.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index fc222a0c2ac4..c9a6d4f3403c 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -119,17 +119,15 @@ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE, change_memblock_state); - lock_device_hotplug(); - remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT); - unlock_device_hotplug(); return true; } static u64 memtrace_alloc_node(u32 nid, u64 size) { - u64 start_pfn, end_pfn, nr_pages; + u64 start_pfn, end_pfn, nr_pages, pfn; u64 base_pfn; + u64 bytes = memory_block_size_bytes(); if (!NODE_DATA(nid) || !node_spanned_pages(nid)) return 0; @@ -142,8 +140,21 @@ static u64 memtrace_alloc_node(u32 nid, u64 size) end_pfn = round_down(end_pfn - nr_pages, nr_pages); for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { - if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) + if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { + /* + * Remove memory in memory block size chunks so that + * iomem resources are always split to the same size and + * we never try to remove memory that spans two iomem + * resources. + */ + lock_device_hotplug(); + end_pfn = base_pfn + nr_pages; + for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { + remove_memory(nid, pfn << PAGE_SHIFT, bytes); + } + unlock_device_hotplug(); return base_pfn << PAGE_SHIFT; + } } return 0; From f9cb913a7959321bf491b824e5f207a2520cd412 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 15 Sep 2018 14:01:12 +0800 Subject: [PATCH 0414/2608] MIPS/PCI: Call pcie_bus_configure_settings() to set MPS/MRRS [ Upstream commit 2794f688b2c336e0da85e9f91fed33febbd9f54a ] Call pcie_bus_configure_settings() on MIPS, like for other platforms. The function pcie_bus_configure_settings() makes sure the MPS (Max Payload Size) across the bus is uniform and provides the ability to tune the MRSS (Max Read Request Size) and MPS (Max Payload Size) to higher performance values. Some devices will not operate properly if these aren't set correctly because the firmware doesn't always do it. Signed-off-by: Huacai Chen Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20649/ Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Huacai Chen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/mips/pci/pci-legacy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c index 0c65c38e05d6..1ae6bc414e2b 100644 --- a/arch/mips/pci/pci-legacy.c +++ b/arch/mips/pci/pci-legacy.c @@ -127,8 +127,12 @@ static void pcibios_scanbus(struct pci_controller *hose) if (pci_has_flag(PCI_PROBE_ONLY)) { pci_bus_claim_resources(bus); } else { + struct pci_bus *child; + pci_bus_size_bridges(bus); pci_bus_assign_resources(bus); + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child); } pci_bus_add_devices(bus); } From 4d0df50d740ecfd8ca7831c739e5c98c7466a94a Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Wed, 12 Sep 2018 15:31:55 +0100 Subject: [PATCH 0415/2608] sc16is7xx: Fix for multi-channel stall [ Upstream commit 8344498721059754e09d30fe255a12dab8fb03ef ] The SC16IS752 is a dual-channel device. The two channels are largely independent, but the IRQ signals are wired together as an open-drain, active low signal which will be driven low while either of the channels requires attention, which can be for significant periods of time until operations complete and the interrupt can be acknowledged. In that respect it is should be treated as a true level-sensitive IRQ. The kernel, however, needs to be able to exit interrupt context in order to use I2C or SPI to access the device registers (which may involve sleeping). Therefore the interrupt needs to be masked out or paused in some way. The usual way to manage sleeping from within an interrupt handler is to use a threaded interrupt handler - a regular interrupt routine does the minimum amount of work needed to triage the interrupt before waking the interrupt service thread. If the threaded IRQ is marked as IRQF_ONESHOT the kernel will automatically mask out the interrupt until the thread runs to completion. The sc16is7xx driver used to use a threaded IRQ, but a patch switched to using a kthread_worker in order to set realtime priorities on the handler thread and for other optimisations. The end result is non-threaded IRQ that schedules some work then returns IRQ_HANDLED, making the kernel think that all IRQ processing has completed. The work-around to prevent a constant stream of interrupts is to mark the interrupt as edge-sensitive rather than level-sensitive, but interpreting an active-low source as a falling-edge source requires care to prevent a total cessation of interrupts. Whereas an edge-triggering source will generate a new edge for every interrupt condition a level-triggering source will keep the signal at the interrupting level until it no longer requires attention; in other words, the host won't see another edge until all interrupt conditions are cleared. It is therefore vital that the interrupt handler does not exit with an outstanding interrupt condition, otherwise the kernel will not receive another interrupt unless some other operation causes the interrupt state on the device to be cleared. The existing sc16is7xx driver has a very simple interrupt "thread" (kthread_work job) that processes interrupts on each channel in turn until there are no more. If both channels are active and the first channel starts interrupting while the handler for the second channel is running then it will not be detected and an IRQ stall ensues. This could be handled easily if there was a shared IRQ status register, or a convenient way to determine if the IRQ had been deasserted for any length of time, but both appear to be lacking. Avoid this problem (or at least make it much less likely to happen) by reducing the granularity of per-channel interrupt processing to one condition per iteration, only exiting the overall loop when both channels are no longer interrupting. Signed-off-by: Phil Elwell Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index ca54ce074a5f..a79f18edf2bd 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -662,7 +662,7 @@ static void sc16is7xx_handle_tx(struct uart_port *port) uart_write_wakeup(port); } -static void sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) +static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) { struct uart_port *port = &s->p[portno].port; @@ -671,7 +671,7 @@ static void sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG); if (iir & SC16IS7XX_IIR_NO_INT_BIT) - break; + return false; iir &= SC16IS7XX_IIR_ID_MASK; @@ -693,16 +693,23 @@ static void sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) port->line, iir); break; } - } while (1); + } while (0); + return true; } static void sc16is7xx_ist(struct kthread_work *ws) { struct sc16is7xx_port *s = to_sc16is7xx_port(ws, irq_work); - int i; - for (i = 0; i < s->devtype->nr_uart; ++i) - sc16is7xx_port_irq(s, i); + while (1) { + bool keep_polling = false; + int i; + + for (i = 0; i < s->devtype->nr_uart; ++i) + keep_polling |= sc16is7xx_port_irq(s, i); + if (!keep_polling) + break; + } } static irqreturn_t sc16is7xx_irq(int irq, void *dev_id) From 3e59ed2e314373c6c91599b027e95b607f5cb584 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Thu, 28 Jun 2018 12:20:33 -0400 Subject: [PATCH 0416/2608] media: tvp5150: fix width alignment during set_selection() [ Upstream commit bd24db04101f45a9c1d874fe21b0c7eab7bcadec ] The driver ignored the width alignment which exists due to the UYVY colorspace format. Fix the width alignment and make use of the the provided v4l2 helper function to set the width, height and all alignments in one. Fixes: 963ddc63e20d ("[media] media: tvp5150: Add cropping support") Signed-off-by: Marco Felsch Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/tvp5150.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index 4d3e97f97c76..b41f7fafb731 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -900,9 +900,6 @@ static int tvp5150_set_selection(struct v4l2_subdev *sd, /* tvp5150 has some special limits */ rect.left = clamp(rect.left, 0, TVP5150_MAX_CROP_LEFT); - rect.width = clamp_t(unsigned int, rect.width, - TVP5150_H_MAX - TVP5150_MAX_CROP_LEFT - rect.left, - TVP5150_H_MAX - rect.left); rect.top = clamp(rect.top, 0, TVP5150_MAX_CROP_TOP); /* Calculate height based on current standard */ @@ -916,9 +913,16 @@ static int tvp5150_set_selection(struct v4l2_subdev *sd, else hmax = TVP5150_V_MAX_OTHERS; - rect.height = clamp_t(unsigned int, rect.height, + /* + * alignments: + * - width = 2 due to UYVY colorspace + * - height, image = no special alignment + */ + v4l_bound_align_image(&rect.width, + TVP5150_H_MAX - TVP5150_MAX_CROP_LEFT - rect.left, + TVP5150_H_MAX - rect.left, 1, &rect.height, hmax - TVP5150_MAX_CROP_TOP - rect.top, - hmax - rect.top); + hmax - rect.top, 0, 0); tvp5150_write(sd, TVP5150_VERT_BLANKING_START, rect.top); tvp5150_write(sd, TVP5150_VERT_BLANKING_STOP, From d3835bb8fa6758b4f38da3741e206192a6340f90 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 31 Jul 2018 17:55:57 -0300 Subject: [PATCH 0417/2608] powerpc/selftests: Wait all threads to join [ Upstream commit 693b31b2fc1636f0aa7af53136d3b49f6ad9ff39 ] Test tm-tmspr might exit before all threads stop executing, because it just waits for the very last thread to join before proceeding/exiting. This patch makes sure that all threads that were created will join before proceeding/exiting. This patch also guarantees that the amount of threads being created is equal to thread_num. Signed-off-by: Breno Leitao Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/powerpc/tm/tm-tmspr.c | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c index 2bda81c7bf23..df1d7d4b1c89 100644 --- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c +++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c @@ -98,7 +98,7 @@ void texasr(void *in) int test_tmspr() { - pthread_t thread; + pthread_t *thread; int thread_num; unsigned long i; @@ -107,21 +107,28 @@ int test_tmspr() /* To cause some context switching */ thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN); - /* Test TFIAR and TFHAR */ - for (i = 0 ; i < thread_num ; i += 2){ - if (pthread_create(&thread, NULL, (void*)tfiar_tfhar, (void *)i)) - return EXIT_FAILURE; - } - if (pthread_join(thread, NULL) != 0) + thread = malloc(thread_num * sizeof(pthread_t)); + if (thread == NULL) return EXIT_FAILURE; - /* Test TEXASR */ - for (i = 0 ; i < thread_num ; i++){ - if (pthread_create(&thread, NULL, (void*)texasr, (void *)i)) + /* Test TFIAR and TFHAR */ + for (i = 0; i < thread_num; i += 2) { + if (pthread_create(&thread[i], NULL, (void *)tfiar_tfhar, + (void *)i)) return EXIT_FAILURE; } - if (pthread_join(thread, NULL) != 0) - return EXIT_FAILURE; + /* Test TEXASR */ + for (i = 1; i < thread_num; i += 2) { + if (pthread_create(&thread[i], NULL, (void *)texasr, (void *)i)) + return EXIT_FAILURE; + } + + for (i = 0; i < thread_num; i++) { + if (pthread_join(thread[i], NULL) != 0) + return EXIT_FAILURE; + } + + free(thread); if (passed) return 0; From 442b54290c782f22a848941064625cad3e263f39 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 13 Sep 2018 11:44:09 +0300 Subject: [PATCH 0418/2608] staging:iio:ad7606: fix voltage scales [ Upstream commit 4ee033301c898dd0835d035d0e0eb768a3d35da1 ] Fixes commit 17be2a2905a6ec9aa27cd59521495e2f490d2af0 ("staging: iio: ad7606: replace range/range_available with corresponding scale"). The AD7606 devices don't have a 2.5V voltage range, they have 5V & 10V voltage range, which is selectable via the `gpio_range` descriptor. The scales also seem to have been miscomputed, because when they were applied to the raw values, the results differ from the expected values. After checking the ADC transfer function in the datasheet, these were re-computed. Signed-off-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/adc/ad7606.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/staging/iio/adc/ad7606.c b/drivers/staging/iio/adc/ad7606.c index 18f5f139117e..0ff458fbee5e 100644 --- a/drivers/staging/iio/adc/ad7606.c +++ b/drivers/staging/iio/adc/ad7606.c @@ -26,9 +26,12 @@ #include "ad7606.h" -/* Scales are computed as 2.5/2**16 and 5/2**16 respectively */ +/* + * Scales are computed as 5000/32768 and 10000/32768 respectively, + * so that when applied to the raw values they provide mV values + */ static const unsigned int scale_avail[2][2] = { - {0, 38147}, {0, 76294} + {0, 152588}, {0, 305176} }; static int ad7606_reset(struct ad7606_state *st) From 491fc097e3d49f777d2e45ec3ae6341cff986f44 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Sat, 8 Sep 2018 01:18:43 +0900 Subject: [PATCH 0419/2608] 9p locks: fix glock.client_id leak in do_lock [ Upstream commit b4dc44b3cac9e8327e0655f530ed0c46f2e6214c ] the 9p client code overwrites our glock.client_id pointing to a static buffer by an allocated string holding the network provided value which we do not care about; free and reset the value as appropriate. This is almost identical to the leak in v9fs_file_getlock() fixed by Al Viro in commit ce85dd58ad5a6 ("9p: we are leaking glock.client_id in v9fs_file_getlock()"), which was returned as an error by a coverity false positive -- while we are here attempt to make the code slightly more robust to future change of the net/9p/client code and hopefully more clear to coverity that there is no problem. Link: http://lkml.kernel.org/r/1536339057-21974-5-git-send-email-asmadeus@codewreck.org Signed-off-by: Dominique Martinet Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/9p/vfs_file.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 03c9e325bfbc..3a2f37ad1f89 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -204,6 +204,14 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) break; if (schedule_timeout_interruptible(P9_LOCK_TIMEOUT) != 0) break; + /* + * p9_client_lock_dotl overwrites flock.client_id with the + * server message, free and reuse the client name + */ + if (flock.client_id != fid->clnt->name) { + kfree(flock.client_id); + flock.client_id = fid->clnt->name; + } } /* map 9p status to VFS status */ @@ -235,6 +243,8 @@ out_unlock: locks_lock_file_wait(filp, fl); fl->fl_type = fl_type; } + if (flock.client_id != fid->clnt->name) + kfree(flock.client_id); out: return res; } @@ -269,7 +279,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) res = p9_client_getlock_dotl(fid, &glock); if (res < 0) - return res; + goto out; /* map 9p lock type to os lock type */ switch (glock.type) { case P9_LOCK_TYPE_RDLCK: @@ -290,7 +300,9 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) fl->fl_end = glock.start + glock.length - 1; fl->fl_pid = -glock.proc_id; } - kfree(glock.client_id); +out: + if (glock.client_id != fid->clnt->name) + kfree(glock.client_id); return res; } From 18280c1260154343e01accb1d52a0dd321cfc828 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Tue, 28 Aug 2018 07:32:35 +0900 Subject: [PATCH 0420/2608] 9p: clear dangling pointers in p9stat_free [ Upstream commit 62e3941776fea8678bb8120607039410b1b61a65 ] p9stat_free is more of a cleanup function than a 'free' function as it only frees the content of the struct; there are chances of use-after-free if it is improperly used (e.g. p9stat_free called twice as it used to be possible to) Clearing dangling pointers makes the function idempotent and safer to use. Link: http://lkml.kernel.org/r/1535410108-20650-2-git-send-email-asmadeus@codewreck.org Signed-off-by: Dominique Martinet Reported-by: syzbot+d4252148d198410b864f@syzkaller.appspotmail.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/9p/protocol.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 16e10680518c..9743837aebc6 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -46,10 +46,15 @@ p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); void p9stat_free(struct p9_wstat *stbuf) { kfree(stbuf->name); + stbuf->name = NULL; kfree(stbuf->uid); + stbuf->uid = NULL; kfree(stbuf->gid); + stbuf->gid = NULL; kfree(stbuf->muid); + stbuf->muid = NULL; kfree(stbuf->extension); + stbuf->extension = NULL; } EXPORT_SYMBOL(p9stat_free); From a8c254d8e96032d5bb235cb2e777203d9acda09d Mon Sep 17 00:00:00 2001 From: Young_X Date: Wed, 3 Oct 2018 12:54:29 +0000 Subject: [PATCH 0421/2608] cdrom: fix improper type cast, which can leat to information leak. commit e4f3aa2e1e67bb48dfbaaf1cad59013d5a5bc276 upstream. There is another cast from unsigned long to int which causes a bounds check to fail with specially crafted input. The value is then used as an index in the slot array in cdrom_slot_status(). This issue is similar to CVE-2018-16658 and CVE-2018-10940. Signed-off-by: Young_X Signed-off-by: Jens Axboe Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/cdrom/cdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 8cfa10ab7abc..930b49606a8c 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2441,7 +2441,7 @@ static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi, return -ENOSYS; if (arg != CDSL_CURRENT && arg != CDSL_NONE) { - if ((int)arg >= cdi->capacity) + if (arg >= cdi->capacity) return -EINVAL; } From 0afa17be1a78bf35085bffb7d0e04b96b766f3ba Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 10 Oct 2018 19:10:06 +0300 Subject: [PATCH 0422/2608] ovl: fix error handling in ovl_verify_set_fh() commit babf4770be0adc69e6d2de150f4040f175e24beb upstream. We hit a BUG on kfree of an ERR_PTR()... Reported-by: syzbot+ff03fe05c717b82502d0@syzkaller.appspotmail.com Fixes: 8b88a2e64036 ("ovl: verify upper root dir matches lower root dir") Cc: # v4.13 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/namei.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index d9468de3c951..8442f9839c90 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -368,8 +368,10 @@ int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt, fh = ovl_encode_fh(origin, is_upper); err = PTR_ERR(fh); - if (IS_ERR(fh)) + if (IS_ERR(fh)) { + fh = NULL; goto fail; + } err = ovl_verify_origin_fh(dentry, fh); if (set && err == -ENODATA) From c60d5af75baaa459aa3619a0487637315de4f83c Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Fri, 31 Aug 2018 11:24:27 -0700 Subject: [PATCH 0423/2608] scsi: qla2xxx: Fix incorrect port speed being set for FC adapters commit 4c1458df9635c7e3ced155f594d2e7dfd7254e21 upstream. Fixes: 6246b8a1d26c7c ("[SCSI] qla2xxx: Enhancements to support ISP83xx.") Fixes: 1bb395485160d2 ("qla2xxx: Correct iiDMA-update calling conventions.") Cc: Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_mbx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 2d909e12e23a..929ec087b8eb 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -3682,10 +3682,7 @@ qla2x00_set_idma_speed(scsi_qla_host_t *vha, uint16_t loop_id, mcp->mb[0] = MBC_PORT_PARAMS; mcp->mb[1] = loop_id; mcp->mb[2] = BIT_0; - if (IS_CNA_CAPABLE(vha->hw)) - mcp->mb[3] = port_speed & (BIT_5|BIT_4|BIT_3|BIT_2|BIT_1|BIT_0); - else - mcp->mb[3] = port_speed & (BIT_2|BIT_1|BIT_0); + mcp->mb[3] = port_speed & (BIT_5|BIT_4|BIT_3|BIT_2|BIT_1|BIT_0); mcp->mb[9] = vha->vp_idx; mcp->out_mb = MBX_9|MBX_3|MBX_2|MBX_1|MBX_0; mcp->in_mb = MBX_3|MBX_1|MBX_0; From 89fdc7f8646eba683709c3ecd1b7fa2d60e3f06a Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 31 Aug 2018 11:24:26 -0700 Subject: [PATCH 0424/2608] scsi: qla2xxx: Fix process response queue for ISP26XX and above commit b86ac8fd4b2f6ec2f9ca9194c56eac12d620096f upstream. This patch improves performance for 16G and above adapter by removing additional call to process_response_queue(). [mkp: typo] Cc: Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 2 -- drivers/scsi/qla2xxx/qla_iocb.c | 17 ----------------- 2 files changed, 19 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 0e19f6bc24ff..de40202b9ac7 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -7591,7 +7591,6 @@ qla81xx_nvram_config(scsi_qla_host_t *vha) } icb->firmware_options_2 &= cpu_to_le32( ~(BIT_3 | BIT_2 | BIT_1 | BIT_0)); - vha->flags.process_response_queue = 0; if (ha->zio_mode != QLA_ZIO_DISABLED) { ha->zio_mode = QLA_ZIO_MODE_6; @@ -7603,7 +7602,6 @@ qla81xx_nvram_config(scsi_qla_host_t *vha) icb->firmware_options_2 |= cpu_to_le32( (uint32_t)ha->zio_mode); icb->interrupt_delay_timer = cpu_to_le16(ha->zio_timer); - vha->flags.process_response_queue = 1; } /* enable RIDA Format2 */ diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 8d579bf0fc81..85cb4e30f742 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -1524,12 +1524,6 @@ qla24xx_start_scsi(srb_t *sp) /* Set chip new ring index. */ WRT_REG_DWORD(req->req_q_in, req->ring_index); - RD_REG_DWORD_RELAXED(&ha->iobase->isp24.hccr); - - /* Manage unprocessed RIO/ZIO commands in response queue. */ - if (vha->flags.process_response_queue && - rsp->ring_ptr->signature != RESPONSE_PROCESSED) - qla24xx_process_response_queue(vha, rsp); spin_unlock_irqrestore(&ha->hardware_lock, flags); return QLA_SUCCESS; @@ -1723,12 +1717,6 @@ qla24xx_dif_start_scsi(srb_t *sp) /* Set chip new ring index. */ WRT_REG_DWORD(req->req_q_in, req->ring_index); - RD_REG_DWORD_RELAXED(&ha->iobase->isp24.hccr); - - /* Manage unprocessed RIO/ZIO commands in response queue. */ - if (vha->flags.process_response_queue && - rsp->ring_ptr->signature != RESPONSE_PROCESSED) - qla24xx_process_response_queue(vha, rsp); spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -1878,11 +1866,6 @@ qla2xxx_start_scsi_mq(srb_t *sp) /* Set chip new ring index. */ WRT_REG_DWORD(req->req_q_in, req->ring_index); - /* Manage unprocessed RIO/ZIO commands in response queue. */ - if (vha->flags.process_response_queue && - rsp->ring_ptr->signature != RESPONSE_PROCESSED) - qla24xx_process_response_queue(vha, rsp); - spin_unlock_irqrestore(&qpair->qp_lock, flags); return QLA_SUCCESS; From 4b6f0ec528d6c56e7394dde2bb72ba2af35305ac Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 11 Sep 2018 10:18:24 -0700 Subject: [PATCH 0425/2608] scsi: qla2xxx: Remove stale debug trace message from tcm_qla2xxx commit 7c388f91ec1a59b0ed815b07b90536e2d57e1e1f upstream. Remove stale debug trace. Fixes: 1eb42f965ced ("qla2xxx: Make trace flags more readable") Cc: stable@vger.kernel.org #4.10 Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 3f82ea1b72dc..9465acd18df0 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -693,10 +693,6 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd) cmd->sg_cnt = 0; cmd->offset = 0; cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); - if (cmd->trc_flags & TRC_XMIT_STATUS) { - pr_crit("Multiple calls for status = %p.\n", cmd); - dump_stack(); - } cmd->trc_flags |= TRC_XMIT_STATUS; if (se_cmd->data_direction == DMA_FROM_DEVICE) { From 21339e8cf0024bf6068a118d985f28417afc1d66 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 11 Sep 2018 10:18:21 -0700 Subject: [PATCH 0426/2608] scsi: qla2xxx: shutdown chip if reset fail commit 1e4ac5d6fe0a4af17e4b6251b884485832bf75a3 upstream. If chip unable to fully initialize, use full shutdown sequence to clear out any stale FW state. Fixes: e315cd28b9ef ("[SCSI] qla2xxx: Code changes for qla data structure refactoring") Cc: stable@vger.kernel.org #4.10 Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index de40202b9ac7..5c7d2628232d 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -6077,7 +6077,7 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) * The next call disables the board * completely. */ - ha->isp_ops->reset_adapter(vha); + qla2x00_abort_isp_cleanup(vha); vha->flags.online = 0; clear_bit(ISP_ABORT_RETRY, &vha->dpc_flags); From 0a7a9ed0ce1d1e669b50d5763168d4cdddba58b2 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 26 Sep 2018 22:05:14 -0700 Subject: [PATCH 0427/2608] scsi: qla2xxx: Fix re-using LoopID when handle is in use commit 5c6400536481d9ef44ef94e7bf2c7b8e81534db7 upstream. This patch fixes issue where driver clears NPort ID map instead of marking handle in use. Once driver clears NPort ID from the database, it can reuse the same NPort ID resulting in a PLOGI failure. [mkp: fixed Himanshu's SoB] Fixes: a084fd68e1d2 ("scsi: qla2xxx: Fix re-login for Nport Handle in use") Cc: Signed-of-by: Quinn Tran Reviewed-by: Ewan D. Milne Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 18 ++++-------------- drivers/scsi/qla2xxx/qla_target.c | 3 ++- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 5c7d2628232d..041497c8ecd8 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1511,25 +1511,15 @@ qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea) cid.b.rsvd_1 = 0; ql_dbg(ql_dbg_disc, vha, 0x20ec, - "%s %d %8phC LoopID 0x%x in use post gnl\n", + "%s %d %8phC lid %#x in use with pid %06x post gnl\n", __func__, __LINE__, ea->fcport->port_name, - ea->fcport->loop_id); + ea->fcport->loop_id, cid.b24); - if (IS_SW_RESV_ADDR(cid)) { - set_bit(ea->fcport->loop_id, vha->hw->loop_id_map); - ea->fcport->loop_id = FC_NO_LOOP_ID; - } else { - qla2x00_clear_loop_id(ea->fcport); - } + set_bit(ea->fcport->loop_id, vha->hw->loop_id_map); + ea->fcport->loop_id = FC_NO_LOOP_ID; qla24xx_post_gnl_work(vha, ea->fcport); break; case MBS_PORT_ID_USED: - ql_dbg(ql_dbg_disc, vha, 0x20ed, - "%s %d %8phC NPortId %02x%02x%02x inuse post gidpn\n", - __func__, __LINE__, ea->fcport->port_name, - ea->fcport->d_id.b.domain, ea->fcport->d_id.b.area, - ea->fcport->d_id.b.al_pa); - lid = ea->iop[1] & 0xffff; qlt_find_sess_invalidate_other(vha, wwn_to_u64(ea->fcport->port_name), diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index d6fe08de59a0..87e04c4a4982 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1203,7 +1203,8 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess, qla24xx_chk_fcp_state(sess); ql_dbg(ql_dbg_tgt, sess->vha, 0xe001, - "Scheduling sess %p for deletion\n", sess); + "Scheduling sess %p for deletion %8phC\n", + sess, sess->port_name); INIT_WORK(&sess->del_work, qla24xx_delete_sess_fn); queue_work(sess->vha->hw->wq, &sess->del_work); From d94b3a2375cbbd55e6961c7b0dd1f8a75ebc8e10 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 25 Sep 2018 12:28:55 +0300 Subject: [PATCH 0428/2608] fuse: Fix use-after-free in fuse_dev_do_read() commit bc78abbd55dd28e2287ec6d6502b842321a17c87 upstream. We may pick freed req in this way: [cpu0] [cpu1] fuse_dev_do_read() fuse_dev_do_write() list_move_tail(&req->list, ...); ... spin_unlock(&fpq->lock); ... ... request_end(fc, req); ... fuse_put_request(fc, req); if (test_bit(FR_INTERRUPTED, ...)) queue_interrupt(fiq, req); Fix that by keeping req alive until we finish all manipulations. Reported-by: syzbot+4e975615ca01f2277bdd@syzkaller.appspotmail.com Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi Fixes: 46c34a348b0a ("fuse: no fc->lock for pqueue parts") Cc: # v4.2 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ee8105af4001..9a1ca92f97fb 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1309,12 +1309,14 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, goto out_end; } list_move_tail(&req->list, &fpq->processing); + __fuse_get_request(req); spin_unlock(&fpq->lock); set_bit(FR_SENT, &req->flags); /* matches barrier in request_wait_answer() */ smp_mb__after_atomic(); if (test_bit(FR_INTERRUPTED, &req->flags)) queue_interrupt(fiq, req); + fuse_put_request(fc, req); return reqsize; From ab962e91008a93af5da403c652745883d19a6adf Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 25 Sep 2018 12:52:42 +0300 Subject: [PATCH 0429/2608] fuse: Fix use-after-free in fuse_dev_do_write() commit d2d2d4fb1f54eff0f3faa9762d84f6446a4bc5d0 upstream. After we found req in request_find() and released the lock, everything may happen with the req in parallel: cpu0 cpu1 fuse_dev_do_write() fuse_dev_do_write() req = request_find(fpq, ...) ... spin_unlock(&fpq->lock) ... ... req = request_find(fpq, oh.unique) ... spin_unlock(&fpq->lock) queue_interrupt(&fc->iq, req); ... ... ... ... ... request_end(fc, req); fuse_put_request(fc, req); ... queue_interrupt(&fc->iq, req); Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi Fixes: 46c34a348b0a ("fuse: no fc->lock for pqueue parts") Cc: # v4.2 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 9a1ca92f97fb..48a88f94d5d8 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1874,16 +1874,20 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, /* Is it an interrupt reply? */ if (req->intr_unique == oh.unique) { + __fuse_get_request(req); spin_unlock(&fpq->lock); err = -EINVAL; - if (nbytes != sizeof(struct fuse_out_header)) + if (nbytes != sizeof(struct fuse_out_header)) { + fuse_put_request(fc, req); goto err_finish; + } if (oh.error == -ENOSYS) fc->no_interrupt = 1; else if (oh.error == -EAGAIN) queue_interrupt(&fc->iq, req); + fuse_put_request(fc, req); fuse_copy_finish(cs); return nbytes; From f6f21a2b70c6023c0b6ba103da62e214e02179bd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 28 Sep 2018 16:43:22 +0200 Subject: [PATCH 0430/2608] fuse: fix blocked_waitq wakeup commit 908a572b80f6e9577b45e81b3dfe2e22111286b8 upstream. Using waitqueue_active() is racy. Make sure we issue a wake_up() unconditionally after storing into fc->blocked. After that it's okay to optimize with waitqueue_active() since the first wake up provides the necessary barrier for all waiters, not the just the woken one. Signed-off-by: Miklos Szeredi Fixes: 3c18ef8117f0 ("fuse: optimize wake_up") Cc: # v3.10 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 48a88f94d5d8..32537cfd29bb 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -384,12 +384,19 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) if (test_bit(FR_BACKGROUND, &req->flags)) { spin_lock(&fc->lock); clear_bit(FR_BACKGROUND, &req->flags); - if (fc->num_background == fc->max_background) + if (fc->num_background == fc->max_background) { fc->blocked = 0; - - /* Wake up next waiter, if any */ - if (!fc->blocked && waitqueue_active(&fc->blocked_waitq)) wake_up(&fc->blocked_waitq); + } else if (!fc->blocked) { + /* + * Wake up next waiter, if any. It's okay to use + * waitqueue_active(), as we've already synced up + * fc->blocked with waiters with the wake_up() call + * above. + */ + if (waitqueue_active(&fc->blocked_waitq)) + wake_up(&fc->blocked_waitq); + } if (fc->num_background == fc->congestion_threshold && fc->sb) { clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); From 78da72ee42d82cfd001e863510eeb270bdd08143 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 28 Sep 2018 16:43:22 +0200 Subject: [PATCH 0431/2608] fuse: set FR_SENT while locked commit 4c316f2f3ff315cb48efb7435621e5bfb81df96d upstream. Otherwise fuse_dev_do_write() could come in and finish off the request, and the set_bit(FR_SENT, ...) could trigger the WARN_ON(test_bit(FR_SENT, ...)) in request_end(). Signed-off-by: Miklos Szeredi Reported-by: syzbot+ef054c4d3f64cd7f7cec@syzkaller.appspotmai Fixes: 46c34a348b0a ("fuse: no fc->lock for pqueue parts") Cc: # v4.2 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 32537cfd29bb..a8b114ae931f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1317,8 +1317,8 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, } list_move_tail(&req->list, &fpq->processing); __fuse_get_request(req); - spin_unlock(&fpq->lock); set_bit(FR_SENT, &req->flags); + spin_unlock(&fpq->lock); /* matches barrier in request_wait_answer() */ smp_mb__after_atomic(); if (test_bit(FR_INTERRUPTED, &req->flags)) From 3c5cf7980bed91cc0f776eb9902b8b6adbe72edc Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 18 Oct 2018 09:45:49 +0300 Subject: [PATCH 0432/2608] ovl: fix recursive oi->lock in ovl_link() commit 6cd078702f2f33cb6b19a682de3e9184112f1a46 upstream. linking a non-copied-up file into a non-copied-up parent results in a nested call to mutex_lock_interruptible(&oi->lock). Fix this by copying up target parent before ovl_nlink_start(), same as done in ovl_rename(). ~/unionmount-testsuite$ ./run --ov -s ~/unionmount-testsuite$ ln /mnt/a/foo100 /mnt/a/dir100/ WARNING: possible recursive locking detected -------------------------------------------- ln/1545 is trying to acquire lock: 00000000bcce7c4c (&ovl_i_lock_key[depth]){+.+.}, at: ovl_copy_up_start+0x28/0x7d but task is already holding lock: 0000000026d73d5b (&ovl_i_lock_key[depth]){+.+.}, at: ovl_nlink_start+0x3c/0xc1 [SzM: this seems to be a false positive, but doing the copy-up first is harmless and removes the lockdep splat] Reported-by: syzbot+3ef5c0d1a5cb0b21e6be@syzkaller.appspotmail.com Fixes: 5f8415d6b87e ("ovl: persistent overlay inode nlink for...") Cc: # v4.13 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi [amir: backport to v4.18] Signed-off-by: Amir Goldstein Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/dir.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index cc961a3bd3bd..0568898393f2 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -595,6 +595,11 @@ static int ovl_link(struct dentry *old, struct inode *newdir, if (err) goto out_drop_write; + err = ovl_copy_up(new->d_parent); + if (err) + goto out_drop_write; + + err = ovl_nlink_start(old, &locked); if (err) goto out_drop_write; From 639fa868ceeb70c59bec3bc54c828cc5566cca23 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 5 Sep 2018 17:33:08 +0800 Subject: [PATCH 0433/2608] MIPS: Loongson-3: Fix CPU UART irq delivery problem [ Upstream commit d06f8a2f1befb5a3d0aa660ab1c05e9b744456ea ] Masking/unmasking the CPU UART irq in CP0_Status (and redirecting it to other CPUs) may cause interrupts be lost, especially in multi-package machines (Package-0's UART irq cannot be delivered to others). So make mask_loongson_irq() and unmask_loongson_irq() be no-ops. The original problem (UART IRQ may deliver to any core) is also because of masking/unmasking the CPU UART irq in CP0_Status. So it is safe to remove all of the stuff. Signed-off-by: Huacai Chen Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20433/ Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Huacai Chen Signed-off-by: Sasha Levin --- arch/mips/loongson64/loongson-3/irq.c | 43 ++------------------------- 1 file changed, 3 insertions(+), 40 deletions(-) diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c index cbeb20f9fc95..2e115ab66a00 100644 --- a/arch/mips/loongson64/loongson-3/irq.c +++ b/arch/mips/loongson64/loongson-3/irq.c @@ -102,45 +102,8 @@ static struct irqaction cascade_irqaction = { .name = "cascade", }; -static inline void mask_loongson_irq(struct irq_data *d) -{ - clear_c0_status(0x100 << (d->irq - MIPS_CPU_IRQ_BASE)); - irq_disable_hazard(); - - /* Workaround: UART IRQ may deliver to any core */ - if (d->irq == LOONGSON_UART_IRQ) { - int cpu = smp_processor_id(); - int node_id = cpu_logical_map(cpu) / loongson_sysconf.cores_per_node; - int core_id = cpu_logical_map(cpu) % loongson_sysconf.cores_per_node; - u64 intenclr_addr = smp_group[node_id] | - (u64)(&LOONGSON_INT_ROUTER_INTENCLR); - u64 introuter_lpc_addr = smp_group[node_id] | - (u64)(&LOONGSON_INT_ROUTER_LPC); - - *(volatile u32 *)intenclr_addr = 1 << 10; - *(volatile u8 *)introuter_lpc_addr = 0x10 + (1<irq == LOONGSON_UART_IRQ) { - int cpu = smp_processor_id(); - int node_id = cpu_logical_map(cpu) / loongson_sysconf.cores_per_node; - int core_id = cpu_logical_map(cpu) % loongson_sysconf.cores_per_node; - u64 intenset_addr = smp_group[node_id] | - (u64)(&LOONGSON_INT_ROUTER_INTENSET); - u64 introuter_lpc_addr = smp_group[node_id] | - (u64)(&LOONGSON_INT_ROUTER_LPC); - - *(volatile u32 *)intenset_addr = 1 << 10; - *(volatile u8 *)introuter_lpc_addr = 0x10 + (1<irq - MIPS_CPU_IRQ_BASE)); - irq_enable_hazard(); -} +static inline void mask_loongson_irq(struct irq_data *d) { } +static inline void unmask_loongson_irq(struct irq_data *d) { } /* For MIPS IRQs which shared by all cores */ static struct irq_chip loongson_irq_chip = { @@ -183,7 +146,7 @@ void __init mach_init_irq(void) chip->irq_set_affinity = plat_set_irq_affinity; irq_set_chip_and_handler(LOONGSON_UART_IRQ, - &loongson_irq_chip, handle_level_irq); + &loongson_irq_chip, handle_percpu_irq); /* setup HT1 irq */ setup_irq(LOONGSON_HT1_IRQ, &cascade_irqaction); From 14563f42672632609f637c88cfe7411d6a06ee64 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 5 Sep 2018 17:33:09 +0800 Subject: [PATCH 0434/2608] MIPS: Loongson-3: Fix BRIDGE irq delivery problem [ Upstream commit 360fe725f8849aaddc53475fef5d4a0c439b05ae ] After commit e509bd7da149dc349160 ("genirq: Allow migration of chained interrupts by installing default action") Loongson-3 fails at here: setup_irq(LOONGSON_HT1_IRQ, &cascade_irqaction); This is because both chained_action and cascade_irqaction don't have IRQF_SHARED flag. This will cause Loongson-3 resume fails because HPET timer interrupt can't be delivered during S3. So we set the irqchip of the chained irq to loongson_irq_chip which doesn't disable the chained irq in CP0.Status. Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20434/ Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Huacai Chen Signed-off-by: Sasha Levin --- arch/mips/include/asm/mach-loongson64/irq.h | 2 +- arch/mips/loongson64/loongson-3/irq.c | 13 +++---------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/mips/include/asm/mach-loongson64/irq.h b/arch/mips/include/asm/mach-loongson64/irq.h index 3644b68c0ccc..be9f727a9328 100644 --- a/arch/mips/include/asm/mach-loongson64/irq.h +++ b/arch/mips/include/asm/mach-loongson64/irq.h @@ -10,7 +10,7 @@ #define MIPS_CPU_IRQ_BASE 56 #define LOONGSON_UART_IRQ (MIPS_CPU_IRQ_BASE + 2) /* UART */ -#define LOONGSON_HT1_IRQ (MIPS_CPU_IRQ_BASE + 3) /* HT1 */ +#define LOONGSON_BRIDGE_IRQ (MIPS_CPU_IRQ_BASE + 3) /* CASCADE */ #define LOONGSON_TIMER_IRQ (MIPS_CPU_IRQ_BASE + 7) /* CPU Timer */ #define LOONGSON_HT1_CFG_BASE loongson_sysconf.ht_control_base diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c index 2e115ab66a00..5605061f5f98 100644 --- a/arch/mips/loongson64/loongson-3/irq.c +++ b/arch/mips/loongson64/loongson-3/irq.c @@ -96,12 +96,6 @@ void mach_irq_dispatch(unsigned int pending) } } -static struct irqaction cascade_irqaction = { - .handler = no_action, - .flags = IRQF_NO_SUSPEND, - .name = "cascade", -}; - static inline void mask_loongson_irq(struct irq_data *d) { } static inline void unmask_loongson_irq(struct irq_data *d) { } @@ -147,11 +141,10 @@ void __init mach_init_irq(void) irq_set_chip_and_handler(LOONGSON_UART_IRQ, &loongson_irq_chip, handle_percpu_irq); + irq_set_chip_and_handler(LOONGSON_BRIDGE_IRQ, + &loongson_irq_chip, handle_percpu_irq); - /* setup HT1 irq */ - setup_irq(LOONGSON_HT1_IRQ, &cascade_irqaction); - - set_c0_status(STATUSF_IP2 | STATUSF_IP6); + set_c0_status(STATUSF_IP2 | STATUSF_IP3 | STATUSF_IP6); } #ifdef CONFIG_HOTPLUG_CPU From 1592c520d42724546c376d9e1e7aacd2f5de9c64 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 29 Oct 2018 18:30:13 -0700 Subject: [PATCH 0435/2608] xtensa: add NOTES section to the linker script commit 4119ba211bc4f1bf638f41e50b7a0f329f58aa16 upstream. This section collects all source .note.* sections together in the vmlinux image. Without it .note.Linux section may be placed at address 0, while the rest of the kernel is at its normal address, resulting in a huge vmlinux.bin image that may not be linked into the xtensa Image.elf. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/boot/Makefile | 2 +- arch/xtensa/kernel/vmlinux.lds.S | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/boot/Makefile b/arch/xtensa/boot/Makefile index 53e4178711e6..8c20a7965bda 100644 --- a/arch/xtensa/boot/Makefile +++ b/arch/xtensa/boot/Makefile @@ -34,7 +34,7 @@ boot-elf boot-redboot: $(addprefix $(obj)/,$(subdir-y)) \ $(addprefix $(obj)/,$(host-progs)) $(Q)$(MAKE) $(build)=$(obj)/$@ $(MAKECMDGOALS) -OBJCOPYFLAGS = --strip-all -R .comment -R .note.gnu.build-id -O binary +OBJCOPYFLAGS = --strip-all -R .comment -R .notes -O binary vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index 162c77e53ca8..3e04845a15e6 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -146,6 +146,7 @@ SECTIONS .fixup : { *(.fixup) } EXCEPTION_TABLE(16) + NOTES /* Data section */ _sdata = .; From f221f7b447f34778647bef9db70eed71648d4f28 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 4 Nov 2018 01:46:00 -0700 Subject: [PATCH 0436/2608] xtensa: make sure bFLT stack is 16 byte aligned commit 0773495b1f5f1c5e23551843f87b5ff37e7af8f7 upstream. Xtensa ABI requires stack alignment to be at least 16. In noMMU configuration ARCH_SLAB_MINALIGN is used to align stack. Make it at least 16. This fixes the following runtime error in noMMU configuration, caused by interaction between insufficiently aligned stack and alloca function, that results in corruption of on-stack variable in the libc function glob: Caught unhandled exception in 'sh' (pid = 47, pc = 0x02d05d65) - should not happen EXCCAUSE is 15 Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/include/asm/processor.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 5b0027d4ecc0..a39cd81b741a 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -24,7 +24,11 @@ # error Linux requires the Xtensa Windowed Registers Option. #endif -#define ARCH_SLAB_MINALIGN XCHAL_DATA_WIDTH +/* Xtensa ABI requires stack alignment to be at least 16 */ + +#define STACK_ALIGN (XCHAL_DATA_WIDTH > 16 ? XCHAL_DATA_WIDTH : 16) + +#define ARCH_SLAB_MINALIGN STACK_ALIGN /* * User space process size: 1 GB. From fa68fdf0a20137a416f5a5cce72aed098faa6858 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 13 Nov 2018 23:46:42 -0800 Subject: [PATCH 0437/2608] xtensa: fix boot parameters address translation commit 40dc948f234b73497c3278875eb08a01d5854d3f upstream. The bootloader may pass physical address of the boot parameters structure to the MMUv3 kernel in the register a2. Code in the _SetupMMU block in the arch/xtensa/kernel/head.S is supposed to map that physical address to the virtual address in the configured virtual memory layout. This code haven't been updated when additional 256+256 and 512+512 memory layouts were introduced and it may produce wrong addresses when used with these layouts. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/head.S | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index 23ce62e60435..27c8e07ace43 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -88,9 +88,12 @@ _SetupMMU: initialize_mmu #if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY rsr a2, excsave1 - movi a3, 0x08000000 + movi a3, XCHAL_KSEG_PADDR + bltu a2, a3, 1f + sub a2, a2, a3 + movi a3, XCHAL_KSEG_SIZE bgeu a2, a3, 1f - movi a3, 0xd0000000 + movi a3, XCHAL_KSEG_CACHED_VADDR add a2, a2, a3 wsr a2, excsave1 1: From 5d33c77ba0fa216009728da96d33e0199b58e9ad Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 15 Jun 2018 16:42:54 +0200 Subject: [PATCH 0438/2608] um: Drop own definition of PTRACE_SYSEMU/_SINGLESTEP commit 0676b957c24bfb6e495449ba7b7e72c5b5d79233 upstream. 32bit UML used to define PTRACE_SYSEMU and PTRACE_SYSEMU_SINGLESTEP own its own because many years ago not all libcs had these request codes in their UAPI. These days PTRACE_SYSEMU/_SINGLESTEP is well known and part of glibc and our own define becomes problematic. With change c48831d0eebf ("linux/x86: sync sys/ptrace.h with Linux 4.14 [BZ #22433]") glibc turned PTRACE_SYSEMU/_SINGLESTEP into a enum and UML failed to build. Let's drop our define and rely on the fact that every libc has PTRACE_SYSEMU/_SINGLESTEP. Cc: Cc: Ritesh Raj Sarraf Reported-and-tested-by: Ritesh Raj Sarraf Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- arch/x86/um/shared/sysdep/ptrace_32.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/x86/um/shared/sysdep/ptrace_32.h b/arch/x86/um/shared/sysdep/ptrace_32.h index b94a108de1dc..ae00d22bce02 100644 --- a/arch/x86/um/shared/sysdep/ptrace_32.h +++ b/arch/x86/um/shared/sysdep/ptrace_32.h @@ -10,20 +10,10 @@ static inline void update_debugregs(int seq) {} -/* syscall emulation path in ptrace */ - -#ifndef PTRACE_SYSEMU -#define PTRACE_SYSEMU 31 -#endif - void set_using_sysemu(int value); int get_using_sysemu(void); extern int sysemu_supported; -#ifndef PTRACE_SYSEMU_SINGLESTEP -#define PTRACE_SYSEMU_SINGLESTEP 32 -#endif - #define UPT_SYSCALL_ARG1(r) UPT_BX(r) #define UPT_SYSCALL_ARG2(r) UPT_CX(r) #define UPT_SYSCALL_ARG3(r) UPT_DX(r) From f074414aff064f7167756713ebdd8622592fe350 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 29 Aug 2018 21:20:10 +0200 Subject: [PATCH 0439/2608] clk: s2mps11: Fix matching when built as module and DT node contains compatible commit 8985167ecf57f97061599a155bb9652c84ea4913 upstream. When driver is built as module and DT node contains clocks compatible (e.g. "samsung,s2mps11-clk"), the module will not be autoloaded because module aliases won't match. The modalias from uevent: of:NclocksTCsamsung,s2mps11-clk The modalias from driver: platform:s2mps11-clk The devices are instantiated by parent's MFD. However both Device Tree bindings and parent define the compatible for clocks devices. In case of module matching this DT compatible will be used. The issue will not happen if this is a built-in (no need for module matching) or when clocks DT node does not contain compatible (not correct from bindings perspective but working for driver). Note when backporting to stable kernels: adjust the list of device ID entries. Cc: Fixes: 53c31b3437a6 ("mfd: sec-core: Add of_compatible strings for clock MFD cells") Signed-off-by: Krzysztof Kozlowski Acked-by: Stephen Boyd Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clk-s2mps11.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c index fbaa84a33c46..14071a57c926 100644 --- a/drivers/clk/clk-s2mps11.c +++ b/drivers/clk/clk-s2mps11.c @@ -245,6 +245,36 @@ static const struct platform_device_id s2mps11_clk_id[] = { }; MODULE_DEVICE_TABLE(platform, s2mps11_clk_id); +#ifdef CONFIG_OF +/* + * Device is instantiated through parent MFD device and device matching is done + * through platform_device_id. + * + * However if device's DT node contains proper clock compatible and driver is + * built as a module, then the *module* matching will be done trough DT aliases. + * This requires of_device_id table. In the same time this will not change the + * actual *device* matching so do not add .of_match_table. + */ +static const struct of_device_id s2mps11_dt_match[] = { + { + .compatible = "samsung,s2mps11-clk", + .data = (void *)S2MPS11X, + }, { + .compatible = "samsung,s2mps13-clk", + .data = (void *)S2MPS13X, + }, { + .compatible = "samsung,s2mps14-clk", + .data = (void *)S2MPS14X, + }, { + .compatible = "samsung,s5m8767-clk", + .data = (void *)S5M8767X, + }, { + /* Sentinel */ + }, +}; +MODULE_DEVICE_TABLE(of, s2mps11_dt_match); +#endif + static struct platform_driver s2mps11_clk_driver = { .driver = { .name = "s2mps11-clk", From abe960b761eb6b3b32b099c80535b3d3943db040 Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Wed, 10 Oct 2018 15:54:54 +0200 Subject: [PATCH 0440/2608] clk: at91: Fix division by zero in PLL recalc_rate() commit 0f5cb0e6225cae2f029944cb8c74617aab6ddd49 upstream. Commit a982e45dc150 ("clk: at91: PLL recalc_rate() now using cached MUL and DIV values") removed a check that prevents a division by zero. This now causes a stacktrace when booting the kernel on a at91 platform if the PLL DIV register contains zero. This commit reintroduces this check. Fixes: a982e45dc150 ("clk: at91: PLL recalc_rate() now using cached...") Cc: Signed-off-by: Ronald Wahl Acked-by: Ludovic Desroches Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/at91/clk-pll.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clk/at91/clk-pll.c b/drivers/clk/at91/clk-pll.c index 72b6091eb7b9..dc7fbc796cb6 100644 --- a/drivers/clk/at91/clk-pll.c +++ b/drivers/clk/at91/clk-pll.c @@ -133,6 +133,9 @@ static unsigned long clk_pll_recalc_rate(struct clk_hw *hw, { struct clk_pll *pll = to_clk_pll(hw); + if (!pll->div || !pll->mul) + return 0; + return (parent_rate / pll->div) * (pll->mul + 1); } From 864aede99f55cfad8be37ae24a02bd92bc5606ad Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Tue, 16 Oct 2018 15:41:44 +0200 Subject: [PATCH 0441/2608] clk: rockchip: Fix static checker warning in rockchip_ddrclk_get_parent call commit 665636b2940d0897c4130253467f5e8c42eea392 upstream. Fixes the signedness bug returning '(-22)' on the return type by removing the sanity checker in rockchip_ddrclk_get_parent(). The function should return and unsigned value only and it's safe to remove the sanity checker as the core functions that call get_parent like clk_core_get_parent_by_index already ensures the validity of the clk index returned (index >= core->num_parents). Fixes: a4f182bf81f18 ("clk: rockchip: add new clock-type for the ddrclk") Cc: stable@vger.kernel.org Signed-off-by: Enric Balletbo i Serra Reviewed-by: Stephen Boyd Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- drivers/clk/rockchip/clk-ddr.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/clk/rockchip/clk-ddr.c b/drivers/clk/rockchip/clk-ddr.c index e8075359366b..ebce5260068b 100644 --- a/drivers/clk/rockchip/clk-ddr.c +++ b/drivers/clk/rockchip/clk-ddr.c @@ -80,16 +80,12 @@ static long rockchip_ddrclk_sip_round_rate(struct clk_hw *hw, static u8 rockchip_ddrclk_get_parent(struct clk_hw *hw) { struct rockchip_ddrclk *ddrclk = to_rockchip_ddrclk_hw(hw); - int num_parents = clk_hw_get_num_parents(hw); u32 val; val = clk_readl(ddrclk->reg_base + ddrclk->mux_offset) >> ddrclk->mux_shift; val &= GENMASK(ddrclk->mux_width - 1, 0); - if (val >= num_parents) - return -EINVAL; - return val; } From d2aaeb9a8e05442129694d8f206685f01644ba75 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Thu, 24 May 2018 17:23:41 +1200 Subject: [PATCH 0442/2608] clk: mvebu: use correct bit for 98DX3236 NAND commit 00c5a926af12a9f0236928dab3dc9faf621406a1 upstream. The correct fieldbit value for the NAND PLL reload trigger is 27. Fixes: commit e120c17a70e5 ("clk: mvebu: support for 98DX3236 SoC") Signed-off-by: Chris Packham Signed-off-by: Stephen Boyd Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/clk/mvebu/clk-corediv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/mvebu/clk-corediv.c b/drivers/clk/mvebu/clk-corediv.c index 8491979f4096..68f05c53d40e 100644 --- a/drivers/clk/mvebu/clk-corediv.c +++ b/drivers/clk/mvebu/clk-corediv.c @@ -72,7 +72,7 @@ static const struct clk_corediv_desc mvebu_corediv_desc[] = { }; static const struct clk_corediv_desc mv98dx3236_corediv_desc[] = { - { .mask = 0x0f, .offset = 6, .fieldbit = 26 }, /* NAND clock */ + { .mask = 0x0f, .offset = 6, .fieldbit = 27 }, /* NAND clock */ }; #define to_corediv_clk(p) container_of(p, struct clk_corediv, hw) From 65dd3e59e96f0e196526f0634c1b4a39b42e0911 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Thu, 4 Oct 2018 17:29:03 -0400 Subject: [PATCH 0443/2608] media: ov7670: make "xclk" clock optional commit 786fa584eda86d6598db3b87c61dc81f68808d11 upstream. When the "xclk" clock was added, it was made mandatory. This broke the driver on an OLPC plaform which doesn't know such clock. Make it optional. Tested on a OLPC XO-1 laptop. Fixes: 0a024d634cee ("[media] ov7670: get xclk") Cc: stable@vger.kernel.org # 4.11+ Signed-off-by: Lubomir Rintel Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/ov7670.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/media/i2c/ov7670.c b/drivers/media/i2c/ov7670.c index e88549f0e704..39ff73a6a807 100644 --- a/drivers/media/i2c/ov7670.c +++ b/drivers/media/i2c/ov7670.c @@ -1612,23 +1612,29 @@ static int ov7670_probe(struct i2c_client *client, info->pclk_hb_disable = true; } - info->clk = devm_clk_get(&client->dev, "xclk"); - if (IS_ERR(info->clk)) - return PTR_ERR(info->clk); - ret = clk_prepare_enable(info->clk); - if (ret) - return ret; + info->clk = devm_clk_get(&client->dev, "xclk"); /* optional */ + if (IS_ERR(info->clk)) { + ret = PTR_ERR(info->clk); + if (ret == -ENOENT) + info->clk = NULL; + else + return ret; + } + if (info->clk) { + ret = clk_prepare_enable(info->clk); + if (ret) + return ret; + info->clock_speed = clk_get_rate(info->clk) / 1000000; + if (info->clock_speed < 10 || info->clock_speed > 48) { + ret = -EINVAL; + goto clk_disable; + } + } ret = ov7670_init_gpio(client, info); if (ret) goto clk_disable; - info->clock_speed = clk_get_rate(info->clk) / 1000000; - if (info->clock_speed < 10 || info->clock_speed > 48) { - ret = -EINVAL; - goto clk_disable; - } - /* Make sure it's an ov7670 */ ret = ov7670_detect(sd); if (ret) { From fc07f543a6d39399ba425c5bc588920fbdf65f39 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 26 Sep 2018 18:03:16 +0200 Subject: [PATCH 0444/2608] libceph: bump CEPH_MSG_MAX_DATA_LEN commit 94e6992bb560be8bffb47f287194adf070b57695 upstream. If the read is large enough, we end up spinning in the messenger: libceph: osd0 192.168.122.1:6801 io error libceph: osd0 192.168.122.1:6801 io error libceph: osd0 192.168.122.1:6801 io error This is a receive side limit, so only reads were affected. Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- include/linux/ceph/libceph.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index c2ec44cf5098..d3b04f9589a9 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -81,7 +81,13 @@ struct ceph_options { #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) -#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) + +/* + * Handle the largest possible rbd object in one message. + * There is no limit on the size of cephfs objects, but it has to obey + * rsize and wsize mount options anyway. + */ +#define CEPH_MSG_MAX_DATA_LEN (32*1024*1024) #define CEPH_AUTH_NAME_DEFAULT "guest" From 32ffde79e88f5a943d89b04ca3821ac8fb1ce7e5 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 27 Sep 2018 21:16:05 +0800 Subject: [PATCH 0445/2608] Revert "ceph: fix dentry leak in splice_dentry()" commit efe328230dc01aa0b1269aad0b5fae73eea4677a upstream. This reverts commit 8b8f53af1ed9df88a4c0fbfdf3db58f62060edf3. splice_dentry() is used by three places. For two places, req->r_dentry is passed to splice_dentry(). In the case of error, req->r_dentry does not get updated. So splice_dentry() should not drop reference. Cc: stable@vger.kernel.org # 4.18+ Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index d5124ed35154..a1492bdc6d03 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1087,8 +1087,12 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in) if (IS_ERR(realdn)) { pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", PTR_ERR(realdn), dn, in, ceph_vinop(in)); - dput(dn); - dn = realdn; /* note realdn contains the error */ + dn = realdn; + /* + * Caller should release 'dn' in the case of error. + * If 'req->r_dentry' is passed to this function, + * caller should leave 'req->r_dentry' untouched. + */ goto out; } else if (realdn) { dout("dn %p (%d) spliced with %p (%d) " From 3b29cbb592bd712428607fee1ab214d42348719f Mon Sep 17 00:00:00 2001 From: Allen Wild Date: Tue, 26 Sep 2017 19:37:44 +0200 Subject: [PATCH 0446/2608] thermal: enable broadcom menu for arm64 bcm2835 commit fec3624f0bcdb6b20ef9ccf9d9d55d0d75d776f8 upstream. Moving the bcm2835 thermal driver to the broadcom directory prevented it from getting enabled for arm64 builds, since the broadcom directory is only available when 32-bit specific ARCH_BCM is set. Fix this by enabling the Broadcom menu for ARCH_BCM or ARCH_BCM2835. Fixes: 6892cf07e733 ("thermal: bcm2835: move to the broadcom subdirectory") Reviewed-by: Eric Anholt Signed-off-by: Allen Wild Signed-off-by: Stefan Wahren Signed-off-by: Eduardo Valentin Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 07002df4f83a..e3f0d1fd1720 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -408,7 +408,7 @@ config MTK_THERMAL controller present in Mediatek SoCs menu "Broadcom thermal drivers" -depends on ARCH_BCM || COMPILE_TEST +depends on ARCH_BCM || ARCH_BCM2835 || COMPILE_TEST source "drivers/thermal/broadcom/Kconfig" endmenu From 07ea136219e0bbb653bb15247f1c1838d635f3bb Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 8 Oct 2018 12:57:34 +0200 Subject: [PATCH 0447/2608] mach64: fix display corruption on big endian machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3c6c6a7878d00a3ac997a779c5b9861ff25dfcc8 upstream. The code for manual bit triple is not endian-clean. It builds the variable "hostdword" using byte accesses, therefore we must read the variable with "le32_to_cpu". The patch also enables (hardware or software) bit triple only if the image is monochrome (image->depth). If we want to blit full-color image, we shouldn't use the triple code. Signed-off-by: Mikulas Patocka Reviewed-by: Ville Syrjälä Cc: stable@vger.kernel.org Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/aty/mach64_accel.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/aty/mach64_accel.c b/drivers/video/fbdev/aty/mach64_accel.c index 2541a0e0de76..7c53a2380cc8 100644 --- a/drivers/video/fbdev/aty/mach64_accel.c +++ b/drivers/video/fbdev/aty/mach64_accel.c @@ -345,7 +345,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) * since Rage 3D IIc we have DP_HOST_TRIPLE_EN bit * this hwaccelerated triple has an issue with not aligned data */ - if (M64_HAS(HW_TRIPLE) && image->width % 8 == 0) + if (image->depth == 1 && M64_HAS(HW_TRIPLE) && image->width % 8 == 0) pix_width |= DP_HOST_TRIPLE_EN; } @@ -382,7 +382,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) src_bytes = (((image->width * image->depth) + 7) / 8) * image->height; /* manual triple each pixel */ - if (info->var.bits_per_pixel == 24 && !(pix_width & DP_HOST_TRIPLE_EN)) { + if (image->depth == 1 && info->var.bits_per_pixel == 24 && !(pix_width & DP_HOST_TRIPLE_EN)) { int inbit, outbit, mult24, byte_id_in_dword, width; u8 *pbitmapin = (u8*)image->data, *pbitmapout; u32 hostdword; @@ -415,7 +415,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) } } wait_for_fifo(1, par); - aty_st_le32(HOST_DATA0, hostdword, par); + aty_st_le32(HOST_DATA0, le32_to_cpu(hostdword), par); } } else { u32 *pbitmap, dwords = (src_bytes + 3) / 4; From 859eac9470c8c9493b8254f05e378f8a7ccaf642 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 8 Oct 2018 12:57:35 +0200 Subject: [PATCH 0448/2608] mach64: fix image corruption due to reading accelerator registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c09bcc91bb94ed91f1391bffcbe294963d605732 upstream. Reading the registers without waiting for engine idle returns unpredictable values. These unpredictable values result in display corruption - if atyfb_imageblit reads the content of DP_PIX_WIDTH with the bit DP_HOST_TRIPLE_EN set (from previous invocation), the driver would never ever clear the bit, resulting in display corruption. We don't want to wait for idle because it would degrade performance, so this patch modifies the driver so that it never reads accelerator registers. HOST_CNTL doesn't have to be read, we can just write it with HOST_BYTE_ALIGN because no other part of the driver cares if HOST_BYTE_ALIGN is set. DP_PIX_WIDTH is written in the functions atyfb_copyarea and atyfb_fillrect with the default value and in atyfb_imageblit with the value set according to the source image data. Signed-off-by: Mikulas Patocka Reviewed-by: Ville Syrjälä Cc: stable@vger.kernel.org Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/aty/mach64_accel.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/video/fbdev/aty/mach64_accel.c b/drivers/video/fbdev/aty/mach64_accel.c index 7c53a2380cc8..3ad46255f990 100644 --- a/drivers/video/fbdev/aty/mach64_accel.c +++ b/drivers/video/fbdev/aty/mach64_accel.c @@ -127,7 +127,7 @@ void aty_init_engine(struct atyfb_par *par, struct fb_info *info) /* set host attributes */ wait_for_fifo(13, par); - aty_st_le32(HOST_CNTL, 0, par); + aty_st_le32(HOST_CNTL, HOST_BYTE_ALIGN, par); /* set pattern attributes */ aty_st_le32(PAT_REG0, 0, par); @@ -233,7 +233,8 @@ void atyfb_copyarea(struct fb_info *info, const struct fb_copyarea *area) rotation = rotation24bpp(dx, direction); } - wait_for_fifo(4, par); + wait_for_fifo(5, par); + aty_st_le32(DP_PIX_WIDTH, par->crtc.dp_pix_width, par); aty_st_le32(DP_SRC, FRGD_SRC_BLIT, par); aty_st_le32(SRC_Y_X, (sx << 16) | sy, par); aty_st_le32(SRC_HEIGHT1_WIDTH1, (width << 16) | area->height, par); @@ -269,7 +270,8 @@ void atyfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect) rotation = rotation24bpp(dx, DST_X_LEFT_TO_RIGHT); } - wait_for_fifo(3, par); + wait_for_fifo(4, par); + aty_st_le32(DP_PIX_WIDTH, par->crtc.dp_pix_width, par); aty_st_le32(DP_FRGD_CLR, color, par); aty_st_le32(DP_SRC, BKGD_SRC_BKGD_CLR | FRGD_SRC_FRGD_CLR | MONO_SRC_ONE, @@ -284,7 +286,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) { struct atyfb_par *par = (struct atyfb_par *) info->par; u32 src_bytes, dx = image->dx, dy = image->dy, width = image->width; - u32 pix_width_save, pix_width, host_cntl, rotation = 0, src, mix; + u32 pix_width, rotation = 0, src, mix; if (par->asleep) return; @@ -296,8 +298,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) return; } - pix_width = pix_width_save = aty_ld_le32(DP_PIX_WIDTH, par); - host_cntl = aty_ld_le32(HOST_CNTL, par) | HOST_BYTE_ALIGN; + pix_width = par->crtc.dp_pix_width; switch (image->depth) { case 1: @@ -370,12 +371,11 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) mix = FRGD_MIX_D_XOR_S | BKGD_MIX_D; } - wait_for_fifo(6, par); - aty_st_le32(DP_WRITE_MASK, 0xFFFFFFFF, par); + wait_for_fifo(5, par); aty_st_le32(DP_PIX_WIDTH, pix_width, par); aty_st_le32(DP_MIX, mix, par); aty_st_le32(DP_SRC, src, par); - aty_st_le32(HOST_CNTL, host_cntl, par); + aty_st_le32(HOST_CNTL, HOST_BYTE_ALIGN, par); aty_st_le32(DST_CNTL, DST_Y_TOP_TO_BOTTOM | DST_X_LEFT_TO_RIGHT | rotation, par); draw_rect(dx, dy, width, image->height, par); @@ -424,8 +424,4 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) aty_st_le32(HOST_DATA0, get_unaligned_le32(pbitmap), par); } } - - /* restore pix_width */ - wait_for_fifo(1, par); - aty_st_le32(DP_PIX_WIDTH, pix_width_save, par); } From b3d3180689f1c433354ed01aa405ffee810df5f9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 25 Jul 2018 19:47:19 -0500 Subject: [PATCH 0449/2608] reset: hisilicon: fix potential NULL pointer dereference commit e9a2310fb689151166df7fd9971093362d34bd79 upstream. There is a potential execution path in which function platform_get_resource() returns NULL. If this happens, we will end up having a NULL pointer dereference. Fix this by replacing devm_ioremap with devm_ioremap_resource, which has the NULL check and the memory region request. This code was detected with the help of Coccinelle. Cc: stable@vger.kernel.org Fixes: 97b7129cd2af ("reset: hisilicon: change the definition of hisi_reset_init") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/hisilicon/reset.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/clk/hisilicon/reset.c b/drivers/clk/hisilicon/reset.c index 2a5015c736ce..43e82fa64422 100644 --- a/drivers/clk/hisilicon/reset.c +++ b/drivers/clk/hisilicon/reset.c @@ -109,9 +109,8 @@ struct hisi_reset_controller *hisi_reset_init(struct platform_device *pdev) return NULL; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - rstc->membase = devm_ioremap(&pdev->dev, - res->start, resource_size(res)); - if (!rstc->membase) + rstc->membase = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(rstc->membase)) return NULL; spin_lock_init(&rstc->lock); From 56bce0e5d5cca4377879fd983d454418f6a87263 Mon Sep 17 00:00:00 2001 From: Greg Edwards Date: Wed, 22 Aug 2018 13:21:53 -0600 Subject: [PATCH 0450/2608] vhost/scsi: truncate T10 PI iov_iter to prot_bytes commit 4542d623c7134bc1738f8a68ccb6dd546f1c264f upstream. Commands with protection information included were not truncating the protection iov_iter to the number of protection bytes in the command. This resulted in vhost_scsi mis-calculating the size of the protection SGL in vhost_scsi_calc_sgls(), and including both the protection and data SG entries in the protection SGL. Fixes: 09b13fa8c1a1 ("vhost/scsi: Add ANY_LAYOUT support in vhost_scsi_handle_vq") Signed-off-by: Greg Edwards Signed-off-by: Michael S. Tsirkin Fixes: 09b13fa8c1a1093e9458549ac8bb203a7c65c62a Cc: stable@vger.kernel.org Reviewed-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/scsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index e47c5bc3ddca..35ebf06d9ecb 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -993,7 +993,8 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin); } /* - * Set prot_iter to data_iter, and advance past any + * Set prot_iter to data_iter and truncate it to + * prot_bytes, and advance data_iter past any * preceeding prot_bytes that may be present. * * Also fix up the exp_data_len to reflect only the @@ -1002,6 +1003,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) if (prot_bytes) { exp_data_len -= prot_bytes; prot_iter = data_iter; + iov_iter_truncate(&prot_iter, prot_bytes); iov_iter_advance(&data_iter, prot_bytes); } tag = vhost64_to_cpu(vq, v_req_pi.tag); From 2cbff556f2e153af48564b125711417b7e6dc6ad Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 6 Nov 2018 00:51:21 -0800 Subject: [PATCH 0451/2608] scsi: qla2xxx: Initialize port speed to avoid setting lower speed commit f635e48e866ee1a47d2d42ce012fdcc07bf55853 upstream. This patch initializes port speed so that firmware does not set lower operating speed. Setting lower speed in firmware impacts WRITE perfomance. Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery") Cc: Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Tested-by: Laurence Oberman Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 041497c8ecd8..aef1e1a55535 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -4236,6 +4236,7 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags) fcport->loop_id = FC_NO_LOOP_ID; qla2x00_set_fcport_state(fcport, FCS_UNCONFIGURED); fcport->supported_classes = FC_COS_UNSPECIFIED; + fcport->fp_speed = PORT_SPEED_UNKNOWN; fcport->ct_desc.ct_sns = dma_alloc_coherent(&vha->hw->pdev->dev, sizeof(struct ct_sns_pkt), &fcport->ct_desc.ct_sns_dma, From a061395786ef57f96830ae4791778d1916146cd0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 14 Nov 2018 16:25:51 +0800 Subject: [PATCH 0452/2608] SCSI: fix queue cleanup race before queue initialization is done commit 8dc765d438f1e42b3e8227b3b09fad7d73f4ec9a upstream. c2856ae2f315d ("blk-mq: quiesce queue before freeing queue") has already fixed this race, however the implied synchronize_rcu() in blk_mq_quiesce_queue() can slow down LUN probe a lot, so caused performance regression. Then 1311326cf4755c7 ("blk-mq: avoid to synchronize rcu inside blk_cleanup_queue()") tried to quiesce queue for avoiding unnecessary synchronize_rcu() only when queue initialization is done, because it is usual to see lots of inexistent LUNs which need to be probed. However, turns out it isn't safe to quiesce queue only when queue initialization is done. Because when one SCSI command is completed, the user of sending command can be waken up immediately, then the scsi device may be removed, meantime the run queue in scsi_end_request() is still in-progress, so kernel panic can be caused. In Red Hat QE lab, there are several reports about this kind of kernel panic triggered during kernel booting. This patch tries to address the issue by grabing one queue usage counter during freeing one request and the following run queue. Fixes: 1311326cf4755c7 ("blk-mq: avoid to synchronize rcu inside blk_cleanup_queue()") Cc: Andrew Jones Cc: Bart Van Assche Cc: linux-scsi@vger.kernel.org Cc: Martin K. Petersen Cc: Christoph Hellwig Cc: James E.J. Bottomley Cc: stable Cc: jianchao.wang Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 5 ++--- drivers/scsi/scsi_lib.c | 8 ++++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 6aa2bc4e9652..0b14aebfd1a8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -671,9 +671,8 @@ void blk_cleanup_queue(struct request_queue *q) * dispatch may still be in-progress since we dispatch requests * from more than one contexts. * - * No need to quiesce queue if it isn't initialized yet since - * blk_freeze_queue() should be enough for cases of passthrough - * request. + * We rely on driver to deal with the race in case that queue + * initialization isn't done. */ if (q->mq_ops && blk_queue_init_done(q)) blk_mq_quiesce_queue(q); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index bfd8f12d4e9a..7f505c027ce7 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -683,6 +683,12 @@ static bool scsi_end_request(struct request *req, blk_status_t error, */ scsi_mq_uninit_cmd(cmd); + /* + * queue is still alive, so grab the ref for preventing it + * from being cleaned up during running queue. + */ + percpu_ref_get(&q->q_usage_counter); + __blk_mq_end_request(req, error); if (scsi_target(sdev)->single_lun || @@ -690,6 +696,8 @@ static bool scsi_end_request(struct request *req, blk_status_t error, kblockd_schedule_work(&sdev->requeue_work); else blk_mq_run_hw_queues(q, true); + + percpu_ref_put(&q->q_usage_counter); } else { unsigned long flags; From d172f2527081d0bb28a32f16343fd2ad84008c9f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 31 Oct 2018 08:41:34 +0000 Subject: [PATCH 0453/2608] soc: ti: QMSS: Fix usage of irq_set_affinity_hint commit 832ad0e3da4510fd17f98804abe512ea9a747035 upstream. The Keystone QMSS driver is pretty damaged, in the sense that it does things like this: irq_set_affinity_hint(irq, to_cpumask(&cpu_map)); where cpu_map is a local variable. As we leave the function, this will point to nowhere-land, and things will end-up badly. Instead, let's use a proper cpumask that gets allocated, giving the driver a chance to actually work with things like irqbalance as well as have a hypothetical 64bit future. Cc: stable@vger.kernel.org Acked-by: Santosh Shilimkar Signed-off-by: Marc Zyngier Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- drivers/soc/ti/knav_qmss.h | 4 ++-- drivers/soc/ti/knav_qmss_acc.c | 10 +++++----- drivers/soc/ti/knav_qmss_queue.c | 22 +++++++++++++++------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/soc/ti/knav_qmss.h b/drivers/soc/ti/knav_qmss.h index 905b974d1bdc..4686192e719e 100644 --- a/drivers/soc/ti/knav_qmss.h +++ b/drivers/soc/ti/knav_qmss.h @@ -321,8 +321,8 @@ struct knav_range_ops { }; struct knav_irq_info { - int irq; - u32 cpu_map; + int irq; + struct cpumask *cpu_mask; }; struct knav_range_info { diff --git a/drivers/soc/ti/knav_qmss_acc.c b/drivers/soc/ti/knav_qmss_acc.c index 3d7225f4e77f..672aebe1e378 100644 --- a/drivers/soc/ti/knav_qmss_acc.c +++ b/drivers/soc/ti/knav_qmss_acc.c @@ -205,18 +205,18 @@ static int knav_range_setup_acc_irq(struct knav_range_info *range, { struct knav_device *kdev = range->kdev; struct knav_acc_channel *acc; - unsigned long cpu_map; + struct cpumask *cpu_mask; int ret = 0, irq; u32 old, new; if (range->flags & RANGE_MULTI_QUEUE) { acc = range->acc; irq = range->irqs[0].irq; - cpu_map = range->irqs[0].cpu_map; + cpu_mask = range->irqs[0].cpu_mask; } else { acc = range->acc + queue; irq = range->irqs[queue].irq; - cpu_map = range->irqs[queue].cpu_map; + cpu_mask = range->irqs[queue].cpu_mask; } old = acc->open_mask; @@ -239,8 +239,8 @@ static int knav_range_setup_acc_irq(struct knav_range_info *range, acc->name, acc->name); ret = request_irq(irq, knav_acc_int_handler, 0, acc->name, range); - if (!ret && cpu_map) { - ret = irq_set_affinity_hint(irq, to_cpumask(&cpu_map)); + if (!ret && cpu_mask) { + ret = irq_set_affinity_hint(irq, cpu_mask); if (ret) { dev_warn(range->kdev->dev, "Failed to set IRQ affinity\n"); diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c index 39225de9d7f1..9879ca5f8c5f 100644 --- a/drivers/soc/ti/knav_qmss_queue.c +++ b/drivers/soc/ti/knav_qmss_queue.c @@ -102,19 +102,17 @@ static int knav_queue_setup_irq(struct knav_range_info *range, struct knav_queue_inst *inst) { unsigned queue = inst->id - range->queue_base; - unsigned long cpu_map; int ret = 0, irq; if (range->flags & RANGE_HAS_IRQ) { irq = range->irqs[queue].irq; - cpu_map = range->irqs[queue].cpu_map; ret = request_irq(irq, knav_queue_int_handler, 0, inst->irq_name, inst); if (ret) return ret; disable_irq(irq); - if (cpu_map) { - ret = irq_set_affinity_hint(irq, to_cpumask(&cpu_map)); + if (range->irqs[queue].cpu_mask) { + ret = irq_set_affinity_hint(irq, range->irqs[queue].cpu_mask); if (ret) { dev_warn(range->kdev->dev, "Failed to set IRQ affinity\n"); @@ -1222,9 +1220,19 @@ static int knav_setup_queue_range(struct knav_device *kdev, range->num_irqs++; - if (IS_ENABLED(CONFIG_SMP) && oirq.args_count == 3) - range->irqs[i].cpu_map = - (oirq.args[2] & 0x0000ff00) >> 8; + if (IS_ENABLED(CONFIG_SMP) && oirq.args_count == 3) { + unsigned long mask; + int bit; + + range->irqs[i].cpu_mask = devm_kzalloc(dev, + cpumask_size(), GFP_KERNEL); + if (!range->irqs[i].cpu_mask) + return -ENOMEM; + + mask = (oirq.args[2] & 0x0000ff00) >> 8; + for_each_set_bit(bit, &mask, BITS_PER_LONG) + cpumask_set_cpu(bit, range->irqs[i].cpu_mask); + } } range->num_irqs = min(range->num_irqs, range->num_queues); From ce8bf4cfe4afbd95d7d2db4ffcff9fa6f530f993 Mon Sep 17 00:00:00 2001 From: Changwei Ge Date: Fri, 2 Nov 2018 15:48:15 -0700 Subject: [PATCH 0454/2608] ocfs2: fix a misuse a of brelse after failing ocfs2_check_dir_entry commit 29aa30167a0a2e6045a0d6d2e89d8168132333d5 upstream. Somehow, file system metadata was corrupted, which causes ocfs2_check_dir_entry() to fail in function ocfs2_dir_foreach_blk_el(). According to the original design intention, if above happens we should skip the problematic block and continue to retrieve dir entry. But there is obviouse misuse of brelse around related code. After failure of ocfs2_check_dir_entry(), current code just moves to next position and uses the problematic buffer head again and again during which the problematic buffer head is released for multiple times. I suppose, this a serious issue which is long-lived in ocfs2. This may cause other file systems which is also used in a the same host insane. So we should also consider about bakcporting this patch into linux -stable. Link: http://lkml.kernel.org/r/HK2PR06MB045211675B43EED794E597B6D56E0@HK2PR06MB0452.apcprd06.prod.outlook.com Signed-off-by: Changwei Ge Suggested-by: Changkuo Shi Reviewed-by: Andrew Morton Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dir.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index febe6312ceff..3c26f2dfedf1 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1896,8 +1896,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, /* On error, skip the f_pos to the next block. */ ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1; - brelse(bh); - continue; + break; } if (le64_to_cpu(de->inode)) { unsigned char d_type = DT_UNKNOWN; From 1d0ad8ac6d1c4d8d513700096e1786e23afcfc26 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Fri, 16 Nov 2018 15:08:25 -0800 Subject: [PATCH 0455/2608] ocfs2: free up write context when direct IO failed commit 5040f8df56fb90c7919f1c9b0b6e54c843437456 upstream. The write context should also be freed even when direct IO failed. Otherwise a memory leak is introduced and entries remain in oi->ip_unwritten_list causing the following BUG later in unlink path: ERROR: bug expression: !list_empty(&oi->ip_unwritten_list) ERROR: Clear inode of 215043, inode has unwritten extents ... Call Trace: ? __set_current_blocked+0x42/0x68 ocfs2_evict_inode+0x91/0x6a0 [ocfs2] ? bit_waitqueue+0x40/0x33 evict+0xdb/0x1af iput+0x1a2/0x1f7 do_unlinkat+0x194/0x28f SyS_unlinkat+0x1b/0x2f do_syscall_64+0x79/0x1ae entry_SYSCALL_64_after_hwframe+0x151/0x0 This patch also logs, with frequency limit, direct IO failures. Link: http://lkml.kernel.org/r/20181102170632.25921-1-wen.gang.wang@oracle.com Signed-off-by: Wengang Wang Reviewed-by: Junxiao Bi Reviewed-by: Changwei Ge Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/aops.c | 12 ++++++++++-- fs/ocfs2/cluster/masklog.h | 9 +++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index d1516327b787..99550f4bd159 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2404,8 +2404,16 @@ static int ocfs2_dio_end_io(struct kiocb *iocb, /* this io's submitter should not have unlocked this before we could */ BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); - if (bytes > 0 && private) - ret = ocfs2_dio_end_io_write(inode, private, offset, bytes); + if (bytes <= 0) + mlog_ratelimited(ML_ERROR, "Direct IO failed, bytes = %lld", + (long long)bytes); + if (private) { + if (bytes > 0) + ret = ocfs2_dio_end_io_write(inode, private, offset, + bytes); + else + ocfs2_dio_free_write_ctx(inode, private); + } ocfs2_iocb_clear_rw_locked(iocb); diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 308ea0eb35fd..a396096a5099 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -178,6 +178,15 @@ do { \ ##__VA_ARGS__); \ } while (0) +#define mlog_ratelimited(mask, fmt, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + if (__ratelimit(&_rs)) \ + mlog(mask, fmt, ##__VA_ARGS__); \ +} while (0) + #define mlog_errno(st) ({ \ int _st = (st); \ if (_st != -ERESTARTSYS && _st != -EINTR && \ From e6e4f052a20670e324d7cebd68460228f00086e5 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 2 Nov 2018 15:47:59 -0700 Subject: [PATCH 0456/2608] mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings commit ac5b2c18911ffe95c08d69273917f90212cf5659 upstream. THP allocation might be really disruptive when allocated on NUMA system with the local node full or hard to reclaim. Stefan has posted an allocation stall report on 4.12 based SLES kernel which suggests the same issue: kvm: page allocation stalls for 194572ms, order:9, mode:0x4740ca(__GFP_HIGHMEM|__GFP_IO|__GFP_FS|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE|__GFP_MOVABLE|__GFP_DIRECT_RECLAIM), nodemask=(null) kvm cpuset=/ mems_allowed=0-1 CPU: 10 PID: 84752 Comm: kvm Tainted: G W 4.12.0+98-ph 0000001 SLE15 (unreleased) Hardware name: Supermicro SYS-1029P-WTRT/X11DDW-NT, BIOS 2.0 12/05/2017 Call Trace: dump_stack+0x5c/0x84 warn_alloc+0xe0/0x180 __alloc_pages_slowpath+0x820/0xc90 __alloc_pages_nodemask+0x1cc/0x210 alloc_pages_vma+0x1e5/0x280 do_huge_pmd_wp_page+0x83f/0xf00 __handle_mm_fault+0x93d/0x1060 handle_mm_fault+0xc6/0x1b0 __do_page_fault+0x230/0x430 do_page_fault+0x2a/0x70 page_fault+0x7b/0x80 [...] Mem-Info: active_anon:126315487 inactive_anon:1612476 isolated_anon:5 active_file:60183 inactive_file:245285 isolated_file:0 unevictable:15657 dirty:286 writeback:1 unstable:0 slab_reclaimable:75543 slab_unreclaimable:2509111 mapped:81814 shmem:31764 pagetables:370616 bounce:0 free:32294031 free_pcp:6233 free_cma:0 Node 0 active_anon:254680388kB inactive_anon:1112760kB active_file:240648kB inactive_file:981168kB unevictable:13368kB isolated(anon):0kB isolated(file):0kB mapped:280240kB dirty:1144kB writeback:0kB shmem:95832kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 81225728kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no Node 1 active_anon:250583072kB inactive_anon:5337144kB active_file:84kB inactive_file:0kB unevictable:49260kB isolated(anon):20kB isolated(file):0kB mapped:47016kB dirty:0kB writeback:4kB shmem:31224kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 31897600kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no The defrag mode is "madvise" and from the above report it is clear that the THP has been allocated for MADV_HUGEPAGA vma. Andrea has identified that the main source of the problem is __GFP_THISNODE usage: : The problem is that direct compaction combined with the NUMA : __GFP_THISNODE logic in mempolicy.c is telling reclaim to swap very : hard the local node, instead of failing the allocation if there's no : THP available in the local node. : : Such logic was ok until __GFP_THISNODE was added to the THP allocation : path even with MPOL_DEFAULT. : : The idea behind the __GFP_THISNODE addition, is that it is better to : provide local memory in PAGE_SIZE units than to use remote NUMA THP : backed memory. That largely depends on the remote latency though, on : threadrippers for example the overhead is relatively low in my : experience. : : The combination of __GFP_THISNODE and __GFP_DIRECT_RECLAIM results in : extremely slow qemu startup with vfio, if the VM is larger than the : size of one host NUMA node. This is because it will try very hard to : unsuccessfully swapout get_user_pages pinned pages as result of the : __GFP_THISNODE being set, instead of falling back to PAGE_SIZE : allocations and instead of trying to allocate THP on other nodes (it : would be even worse without vfio type1 GUP pins of course, except it'd : be swapping heavily instead). Fix this by removing __GFP_THISNODE for THP requests which are requesting the direct reclaim. This effectivelly reverts 5265047ac301 on the grounds that the zone/node reclaim was known to be disruptive due to premature reclaim when there was memory free. While it made sense at the time for HPC workloads without NUMA awareness on rare machines, it was ultimately harmful in the majority of cases. The existing behaviour is similar, if not as widespare as it applies to a corner case but crucially, it cannot be tuned around like zone_reclaim_mode can. The default behaviour should always be to cause the least harm for the common case. If there are specialised use cases out there that want zone_reclaim_mode in specific cases, then it can be built on top. Longterm we should consider a memory policy which allows for the node reclaim like behavior for the specific memory ranges which would allow a [1] http://lkml.kernel.org/r/20180820032204.9591-1-aarcange@redhat.com Mel said: : Both patches look correct to me but I'm responding to this one because : it's the fix. The change makes sense and moves further away from the : severe stalling behaviour we used to see with both THP and zone reclaim : mode. : : I put together a basic experiment with usemem configured to reference a : buffer multiple times that is 80% the size of main memory on a 2-socket : box with symmetric node sizes and defrag set to "always". The defrag : setting is not the default but it would be functionally similar to : accessing a buffer with madvise(MADV_HUGEPAGE). Usemem is configured to : reference the buffer multiple times and while it's not an interesting : workload, it would be expected to complete reasonably quickly as it fits : within memory. The results were; : : usemem : vanilla noreclaim-v1 : Amean Elapsd-1 42.78 ( 0.00%) 26.87 ( 37.18%) : Amean Elapsd-3 27.55 ( 0.00%) 7.44 ( 73.00%) : Amean Elapsd-4 5.72 ( 0.00%) 5.69 ( 0.45%) : : This shows the elapsed time in seconds for 1 thread, 3 threads and 4 : threads referencing buffers 80% the size of memory. With the patches : applied, it's 37.18% faster for the single thread and 73% faster with two : threads. Note that 4 threads showing little difference does not indicate : the problem is related to thread counts. It's simply the case that 4 : threads gets spread so their workload mostly fits in one node. : : The overall view from /proc/vmstats is more startling : : 4.19.0-rc1 4.19.0-rc1 : vanillanoreclaim-v1r1 : Minor Faults 35593425 708164 : Major Faults 484088 36 : Swap Ins 3772837 0 : Swap Outs 3932295 0 : : Massive amounts of swap in/out without the patch : : Direct pages scanned 6013214 0 : Kswapd pages scanned 0 0 : Kswapd pages reclaimed 0 0 : Direct pages reclaimed 4033009 0 : : Lots of reclaim activity without the patch : : Kswapd efficiency 100% 100% : Kswapd velocity 0.000 0.000 : Direct efficiency 67% 100% : Direct velocity 11191.956 0.000 : : Mostly from direct reclaim context as you'd expect without the patch. : : Page writes by reclaim 3932314.000 0.000 : Page writes file 19 0 : Page writes anon 3932295 0 : Page reclaim immediate 42336 0 : : Writes from reclaim context is never good but the patch eliminates it. : : We should never have default behaviour to thrash the system for such a : basic workload. If zone reclaim mode behaviour is ever desired but on a : single task instead of a global basis then the sensible option is to build : a mempolicy that enforces that behaviour. This was a severe regression compared to previous kernels that made important workloads unusable and it starts when __GFP_THISNODE was added to THP allocations under MADV_HUGEPAGE. It is not a significant risk to go to the previous behavior before __GFP_THISNODE was added, it worked like that for years. This was simply an optimization to some lucky workloads that can fit in a single node, but it ended up breaking the VM for others that can't possibly fit in a single node, so going back is safe. [mhocko@suse.com: rewrote the changelog based on the one from Andrea] Link: http://lkml.kernel.org/r/20180925120326.24392-2-mhocko@kernel.org Fixes: 5265047ac301 ("mm, thp: really limit transparent hugepage allocation to local node") Signed-off-by: Andrea Arcangeli Signed-off-by: Michal Hocko Reported-by: Stefan Priebe Debugged-by: Andrea Arcangeli Reported-by: Alex Williamson Reviewed-by: Mel Gorman Tested-by: Mel Gorman Cc: Zi Yan Cc: Vlastimil Babka Cc: David Rientjes Cc: "Kirill A. Shutemov" Cc: [4.1+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ecbda7f5d494..1b93535d875f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2012,8 +2012,36 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, nmask = policy_nodemask(gfp, pol); if (!nmask || node_isset(hpage_node, *nmask)) { mpol_cond_put(pol); - page = __alloc_pages_node(hpage_node, - gfp | __GFP_THISNODE, order); + /* + * We cannot invoke reclaim if __GFP_THISNODE + * is set. Invoking reclaim with + * __GFP_THISNODE set, would cause THP + * allocations to trigger heavy swapping + * despite there may be tons of free memory + * (including potentially plenty of THP + * already available in the buddy) on all the + * other NUMA nodes. + * + * At most we could invoke compaction when + * __GFP_THISNODE is set (but we would need to + * refrain from invoking reclaim even if + * compaction returned COMPACT_SKIPPED because + * there wasn't not enough memory to succeed + * compaction). For now just avoid + * __GFP_THISNODE instead of limiting the + * allocation path to a strict and single + * compaction invocation. + * + * Supposedly if direct reclaim was enabled by + * the caller, the app prefers THP regardless + * of the node it comes from so this would be + * more desiderable behavior than only + * providing THP originated from the local + * node in such case. + */ + if (!(gfp & __GFP_DIRECT_RECLAIM)) + gfp |= __GFP_THISNODE; + page = __alloc_pages_node(hpage_node, gfp, order); goto out; } } From 54ab59528026cc6140be55fd918eedbdf2ee9677 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Thu, 25 Oct 2018 12:15:43 -0700 Subject: [PATCH 0457/2608] netfilter: conntrack: fix calculation of next bucket number in early_drop commit f393808dc64149ccd0e5a8427505ba2974a59854 upstream. If there's no entry to drop in bucket that corresponds to the hash, early_drop() should look for it in other buckets. But since it increments hash instead of bucket number, it actually looks in the same bucket 8 times: hsize is 16k by default (14 bits) and hash is 32-bit value, so reciprocal_scale(hash, hsize) returns the same value for hash..hash+7 in most cases. Fix it by increasing bucket number instead of hash and rename _hash to bucket to avoid future confusion. Fixes: 3e86638e9a0b ("netfilter: conntrack: consider ct netns in early_drop logic") Cc: # v4.7+ Signed-off-by: Vasily Khoruzhick Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_core.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index a268acc48af0..b793b55d1488 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -932,19 +932,22 @@ static unsigned int early_drop_list(struct net *net, return drops; } -static noinline int early_drop(struct net *net, unsigned int _hash) +static noinline int early_drop(struct net *net, unsigned int hash) { - unsigned int i; + unsigned int i, bucket; for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { struct hlist_nulls_head *ct_hash; - unsigned int hash, hsize, drops; + unsigned int hsize, drops; rcu_read_lock(); nf_conntrack_get_ht(&ct_hash, &hsize); - hash = reciprocal_scale(_hash++, hsize); + if (!i) + bucket = reciprocal_scale(hash, hsize); + else + bucket = (bucket + 1) % hsize; - drops = early_drop_list(net, &ct_hash[hash]); + drops = early_drop_list(net, &ct_hash[bucket]); rcu_read_unlock(); if (drops) { From 2d155427ffb2057fd7ece3e735b204b393fd62cf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 5 Nov 2018 14:54:56 +0100 Subject: [PATCH 0458/2608] ARM: 8809/1: proc-v7: fix Thumb annotation of cpu_v7_hvc_switch_mm commit 6282e916f774e37845c65d1eae9f8c649004f033 upstream. Due to what appears to be a copy/paste error, the opening ENTRY() of cpu_v7_hvc_switch_mm() lacks a matching ENDPROC(), and instead, the one for cpu_v7_smc_switch_mm() is duplicated. Given that it is ENDPROC() that emits the Thumb annotation, the cpu_v7_hvc_switch_mm() routine will be called in ARM mode on a Thumb2 kernel, resulting in the following splat: Internal error: Oops - undefined instruction: 0 [#1] SMP THUMB2 Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.18.0-rc1-00030-g4d28ad89189d-dirty #488 Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 PC is at cpu_v7_hvc_switch_mm+0x12/0x18 LR is at flush_old_exec+0x31b/0x570 pc : [] lr : [] psr: 00000013 sp : ee899e50 ip : 00000000 fp : 00000001 r10: eda28f34 r9 : eda31800 r8 : c12470e0 r7 : eda1fc00 r6 : eda53000 r5 : 00000000 r4 : ee88c000 r3 : c0316eec r2 : 00000001 r1 : eda53000 r0 : 6da6c000 Flags: nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Note the 'ISA ARM' in the last line. Fix this by using the correct name in ENDPROC(). Cc: Fixes: 10115105cb3a ("ARM: spectre-v2: add firmware based hardening") Reviewed-by: Dave Martin Acked-by: Marc Zyngier Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/proc-v7.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 12468d9378d8..d8d90cf65b39 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -112,7 +112,7 @@ ENTRY(cpu_v7_hvc_switch_mm) hvc #0 ldmfd sp!, {r0 - r3} b cpu_v7_switch_mm -ENDPROC(cpu_v7_smc_switch_mm) +ENDPROC(cpu_v7_hvc_switch_mm) #endif ENTRY(cpu_v7_iciallu_switch_mm) mov r3, #0 From fbd703540e22c5f8820a0bffbee4801fb4067e6b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 Oct 2018 13:06:16 +0200 Subject: [PATCH 0459/2608] mtd: docg3: don't set conflicting BCH_CONST_PARAMS option commit be2e1c9dcf76886a83fb1c433a316e26d4ca2550 upstream. I noticed during the creation of another bugfix that the BCH_CONST_PARAMS option that is set by DOCG3 breaks setting variable parameters for any other users of the BCH library code. The only other user we have today is the MTD_NAND software BCH implementation (most flash controllers use hardware BCH these days and are not affected). I considered removing BCH_CONST_PARAMS entirely because of the inherent conflict, but according to the description in lib/bch.c there is a significant performance benefit in keeping it. To avoid the immediate problem of the conflict between MTD_NAND_BCH and DOCG3, this only sets the constant parameters if MTD_NAND_BCH is disabled, which should fix the problem for all cases that are affected. This should also work for all stable kernels. Note that there is only one machine that actually seems to use the DOCG3 driver (arch/arm/mach-pxa/mioa701.c), so most users should have the driver disabled, but it almost certainly shows up if we wanted to test random kernels on machines that use software BCH in MTD. Fixes: d13d19ece39f ("mtd: docg3: add ECC correction code") Cc: stable@vger.kernel.org Cc: Robert Jarzmik Signed-off-by: Arnd Bergmann Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/devices/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig index 6def5445e03e..64731e3221ef 100644 --- a/drivers/mtd/devices/Kconfig +++ b/drivers/mtd/devices/Kconfig @@ -206,7 +206,7 @@ comment "Disk-On-Chip Device Drivers" config MTD_DOCG3 tristate "M-Systems Disk-On-Chip G3" select BCH - select BCH_CONST_PARAMS + select BCH_CONST_PARAMS if !MTD_NAND_BCH select BITREVERSE ---help--- This provides an MTD device driver for the M-Systems DiskOnChip From 9c6f231e8ad5947b757c66bf0a8f5c10ec83ccc2 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 8 Nov 2018 18:17:03 +0800 Subject: [PATCH 0460/2608] of, numa: Validate some distance map rules commit 89c38422e072bb453e3045b8f1b962a344c3edea upstream. Currently the NUMA distance map parsing does not validate the distance table for the distance-matrix rules 1-2 in [1]. However the arch NUMA code may enforce some of these rules, but not all. Such is the case for the arm64 port, which does not enforce the rule that the distance between separates nodes cannot equal LOCAL_DISTANCE. The patch adds the following rules validation: - distance of node to self equals LOCAL_DISTANCE - distance of separate nodes > LOCAL_DISTANCE This change avoids a yet-unresolved crash reported in [2]. A note on dealing with symmetrical distances between nodes: Validating symmetrical distances between nodes is difficult. If it were mandated in the bindings that every distance must be recorded in the table, then it would be easy. However, it isn't. In addition to this, it is also possible to record [b, a] distance only (and not [a, b]). So, when processing the table for [b, a], we cannot assert that current distance of [a, b] != [b, a] as invalid, as [a, b] distance may not be present in the table and current distance would be default at REMOTE_DISTANCE. As such, we maintain the policy that we overwrite distance [a, b] = [b, a] for b > a. This policy is different to kernel ACPI SLIT validation, which allows non-symmetrical distances (ACPI spec SLIT rules allow it). However, the distance debug message is dropped as it may be misleading (for a distance which is later overwritten). Some final notes on semantics: - It is implied that it is the responsibility of the arch NUMA code to reset the NUMA distance map for an error in distance map parsing. - It is the responsibility of the FW NUMA topology parsing (whether OF or ACPI) to enforce NUMA distance rules, and not arch NUMA code. [1] Documents/devicetree/bindings/numa.txt [2] https://www.spinics.net/lists/arm-kernel/msg683304.html Cc: stable@vger.kernel.org # 4.7 Signed-off-by: John Garry Acked-by: Will Deacon Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/of_numa.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index 2db1f7a04baf..0b29ee9ee8c3 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -126,9 +126,14 @@ static int __init of_numa_parse_distance_map_v1(struct device_node *map) distance = of_read_number(matrix, 1); matrix++; + if ((nodea == nodeb && distance != LOCAL_DISTANCE) || + (nodea != nodeb && distance <= LOCAL_DISTANCE)) { + pr_err("Invalid distance[node%d -> node%d] = %d\n", + nodea, nodeb, distance); + return -EINVAL; + } + numa_set_distance(nodea, nodeb, distance); - pr_debug("distance[node%d -> node%d] = %d\n", - nodea, nodeb, distance); /* Set default distance of node B->A same as A->B */ if (nodeb > nodea) From c46d34365a1983225abee5fbe283c2852c17ac8e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 9 Nov 2018 15:22:07 -0500 Subject: [PATCH 0461/2608] x86/cpu/vmware: Do not trace vmware_sched_clock() commit 15035388439f892017d38b05214d3cda6578af64 upstream. When running function tracing on a Linux guest running on VMware Workstation, the guest would crash. This is due to tracing of the sched_clock internal call of the VMware vmware_sched_clock(), which causes an infinite recursion within the tracing code (clock calls must not be traced). Make vmware_sched_clock() not traced by ftrace. Fixes: 80e9a4f21fd7c ("x86/vmware: Add paravirt sched clock") Reported-by: GwanYeong Kim Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Borislav Petkov CC: Alok Kataria CC: GwanYeong Kim CC: "H. Peter Anvin" CC: Ingo Molnar Cc: stable@vger.kernel.org CC: Thomas Gleixner CC: virtualization@lists.linux-foundation.org CC: x86-ml Link: http://lkml.kernel.org/r/20181109152207.4d3e7d70@gandalf.local.home Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/vmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 8e005329648b..d805202c63cd 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -77,7 +77,7 @@ static __init int setup_vmw_sched_clock(char *s) } early_param("no-vmw-sched-clock", setup_vmw_sched_clock); -static unsigned long long vmware_sched_clock(void) +static unsigned long long notrace vmware_sched_clock(void) { unsigned long long ns; From de51aafd80bedac4ce10072510203d67eb6ea3b4 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 4 Nov 2018 03:48:57 +0000 Subject: [PATCH 0462/2608] x86/hyper-v: Enable PIT shutdown quirk commit 1de72c706488b7be664a601cf3843bd01e327e58 upstream. Hyper-V emulation of the PIT has a quirk such that the normal PIT shutdown path doesn't work, because clearing the counter register restarts the timer. Disable the counter clearing on PIT shutdown. Signed-off-by: Michael Kelley Signed-off-by: Thomas Gleixner Cc: "gregkh@linuxfoundation.org" Cc: "devel@linuxdriverproject.org" Cc: "daniel.lezcano@linaro.org" Cc: "virtualization@lists.linux-foundation.org" Cc: "jgross@suse.com" Cc: "akataria@vmware.com" Cc: "olaf@aepfle.de" Cc: "apw@canonical.com" Cc: vkuznets Cc: "jasowang@redhat.com" Cc: "marcelo.cerri@canonical.com" Cc: KY Srinivasan Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1541303219-11142-3-git-send-email-mikelley@microsoft.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mshyperv.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 85eb5fc180c8..c0201b11e9e2 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -243,6 +244,16 @@ static void __init ms_hyperv_init_platform(void) if (efi_enabled(EFI_BOOT)) x86_platform.get_nmi_reason = hv_get_nmi_reason; + /* + * Hyper-V VMs have a PIT emulation quirk such that zeroing the + * counter register during PIT shutdown restarts the PIT. So it + * continues to interrupt @18.2 HZ. Setting i8253_clear_counter + * to false tells pit_shutdown() not to zero the counter so that + * the PIT really is shutdown. Generation 2 VMs don't have a PIT, + * and setting this value has no effect. + */ + i8253_clear_counter_on_shutdown = false; + #if IS_ENABLED(CONFIG_HYPERV) /* * Setup the hook to get control post apic initialization. From d85114a0a131e839070728f2d7bc253a9bfa8eb8 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 22 Oct 2018 09:19:04 -0700 Subject: [PATCH 0463/2608] termios, tty/tty_baudrate.c: fix buffer overrun commit 991a25194097006ec1e0d2e0814ff920e59e3465 upstream. On architectures with CBAUDEX == 0 (Alpha and PowerPC), the code in tty_baudrate.c does not do any limit checking on the tty_baudrate[] array, and in fact a buffer overrun is possible on both architectures. Add a limit check to prevent that situation. This will be followed by a much bigger cleanup/simplification patch. Signed-off-by: H. Peter Anvin (Intel) Requested-by: Cc: Johan Hovold Cc: Jiri Slaby Cc: Al Viro Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Cc: Eugene Syromiatnikov Cc: Alan Cox Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_baudrate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/tty_baudrate.c b/drivers/tty/tty_baudrate.c index ebc797fc1afd..42e5683147d5 100644 --- a/drivers/tty/tty_baudrate.c +++ b/drivers/tty/tty_baudrate.c @@ -76,7 +76,7 @@ speed_t tty_termios_baud_rate(struct ktermios *termios) else cbaud += 15; } - return baud_table[cbaud]; + return cbaud >= n_baud_table ? 0 : baud_table[cbaud]; } EXPORT_SYMBOL(tty_termios_baud_rate); @@ -112,7 +112,7 @@ speed_t tty_termios_input_baud_rate(struct ktermios *termios) else cbaud += 15; } - return baud_table[cbaud]; + return cbaud >= n_baud_table ? 0 : baud_table[cbaud]; #else return tty_termios_baud_rate(termios); #endif From 4fa1a679ed0140e84e8d4a25d0c3edd8b02b9976 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Mon, 22 Oct 2018 09:19:05 -0700 Subject: [PATCH 0464/2608] arch/alpha, termios: implement BOTHER, IBSHIFT and termios2 commit d0ffb805b729322626639336986bc83fc2e60871 upstream. Alpha has had c_ispeed and c_ospeed, but still set speeds in c_cflags using arbitrary flags. Because BOTHER is not defined, the general Linux code doesn't allow setting arbitrary baud rates, and because CBAUDEX == 0, we can have an array overrun of the baud_rate[] table in drivers/tty/tty_baudrate.c if (c_cflags & CBAUD) == 037. Resolve both problems by #defining BOTHER to 037 on Alpha. However, userspace still needs to know if setting BOTHER is actually safe given legacy kernels (does anyone actually care about that on Alpha anymore?), so enable the TCGETS2/TCSETS*2 ioctls on Alpha, even though they use the same structure. Define struct termios2 just for compatibility; it is the exact same structure as struct termios. In a future patchset, this will be cleaned up so the uapi headers are usable from libc. Signed-off-by: H. Peter Anvin (Intel) Cc: Jiri Slaby Cc: Al Viro Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Cc: Eugene Syromiatnikov Cc: Cc: Cc: Johan Hovold Cc: Alan Cox Cc: Signed-off-by: Greg Kroah-Hartman --- arch/alpha/include/asm/termios.h | 8 +++++++- arch/alpha/include/uapi/asm/ioctls.h | 5 +++++ arch/alpha/include/uapi/asm/termbits.h | 17 +++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/arch/alpha/include/asm/termios.h b/arch/alpha/include/asm/termios.h index 6a8c53dec57e..b7c77bb1bfd2 100644 --- a/arch/alpha/include/asm/termios.h +++ b/arch/alpha/include/asm/termios.h @@ -73,9 +73,15 @@ }) #define user_termios_to_kernel_termios(k, u) \ - copy_from_user(k, u, sizeof(struct termios)) + copy_from_user(k, u, sizeof(struct termios2)) #define kernel_termios_to_user_termios(u, k) \ + copy_to_user(u, k, sizeof(struct termios2)) + +#define user_termios_to_kernel_termios_1(k, u) \ + copy_from_user(k, u, sizeof(struct termios)) + +#define kernel_termios_to_user_termios_1(u, k) \ copy_to_user(u, k, sizeof(struct termios)) #endif /* _ALPHA_TERMIOS_H */ diff --git a/arch/alpha/include/uapi/asm/ioctls.h b/arch/alpha/include/uapi/asm/ioctls.h index 3729d92d3fa8..dc8c20ac7191 100644 --- a/arch/alpha/include/uapi/asm/ioctls.h +++ b/arch/alpha/include/uapi/asm/ioctls.h @@ -32,6 +32,11 @@ #define TCXONC _IO('t', 30) #define TCFLSH _IO('t', 31) +#define TCGETS2 _IOR('T', 42, struct termios2) +#define TCSETS2 _IOW('T', 43, struct termios2) +#define TCSETSW2 _IOW('T', 44, struct termios2) +#define TCSETSF2 _IOW('T', 45, struct termios2) + #define TIOCSWINSZ _IOW('t', 103, struct winsize) #define TIOCGWINSZ _IOR('t', 104, struct winsize) #define TIOCSTART _IO('t', 110) /* start output, like ^Q */ diff --git a/arch/alpha/include/uapi/asm/termbits.h b/arch/alpha/include/uapi/asm/termbits.h index 05e0398a83a6..53b3ecff2f50 100644 --- a/arch/alpha/include/uapi/asm/termbits.h +++ b/arch/alpha/include/uapi/asm/termbits.h @@ -26,6 +26,19 @@ struct termios { speed_t c_ospeed; /* output speed */ }; +/* Alpha has identical termios and termios2 */ + +struct termios2 { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_cc[NCCS]; /* control characters */ + cc_t c_line; /* line discipline (== c_cc[19]) */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + /* Alpha has matching termios and ktermios */ struct ktermios { @@ -148,6 +161,7 @@ struct ktermios { #define B3000000 00034 #define B3500000 00035 #define B4000000 00036 +#define BOTHER 00037 #define CSIZE 00001400 #define CS5 00000000 @@ -165,6 +179,9 @@ struct ktermios { #define CMSPAR 010000000000 /* mark or space (stick) parity */ #define CRTSCTS 020000000000 /* flow control */ +#define CIBAUD 07600000 +#define IBSHIFT 16 + /* c_lflag bits */ #define ISIG 0x00000080 #define ICANON 0x00000100 From ca35750b5fb7bd3c4233a77b0b2de6f6a9c6cbdc Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 6 Jun 2018 21:42:32 +0200 Subject: [PATCH 0465/2608] watchdog/core: Add missing prototypes for weak functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 81bd415c91eb966118d773dddf254aebf3022411 upstream. The split out of the hard lockup detector exposed two new weak functions, but no prototypes for them, which triggers the build warning: kernel/watchdog.c:109:12: warning: no previous prototype for ‘watchdog_nmi_enable’ [-Wmissing-prototypes] kernel/watchdog.c:115:13: warning: no previous prototype for ‘watchdog_nmi_disable’ [-Wmissing-prototypes] Add the prototypes. Fixes: 73ce0511c436 ("kernel/watchdog.c: move hardlockup detector to separate file") Signed-off-by: Mathieu Malaterre Signed-off-by: Thomas Gleixner Cc: Babu Moger Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180606194232.17653-1-malat@debian.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nmi.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index b8d868d23e79..50d143995338 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -113,6 +113,8 @@ static inline int hardlockup_detector_perf_init(void) { return 0; } void watchdog_nmi_stop(void); void watchdog_nmi_start(void); int watchdog_nmi_probe(void); +int watchdog_nmi_enable(unsigned int cpu); +void watchdog_nmi_disable(unsigned int cpu); /** * touch_nmi_watchdog - restart NMI watchdog timeout. From 84e7fe7bd079dcc81622e56912f002287cda8b32 Mon Sep 17 00:00:00 2001 From: Lu Fengqi Date: Wed, 24 Oct 2018 20:24:03 +0800 Subject: [PATCH 0466/2608] btrfs: fix pinned underflow after transaction aborted commit fcd5e74288f7d36991b1f0fb96b8c57079645e38 upstream. When running generic/475, we may get the following warning in dmesg: [ 6902.102154] WARNING: CPU: 3 PID: 18013 at fs/btrfs/extent-tree.c:9776 btrfs_free_block_groups+0x2af/0x3b0 [btrfs] [ 6902.109160] CPU: 3 PID: 18013 Comm: umount Tainted: G W O 4.19.0-rc8+ #8 [ 6902.110971] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 [ 6902.112857] RIP: 0010:btrfs_free_block_groups+0x2af/0x3b0 [btrfs] [ 6902.118921] RSP: 0018:ffffc9000459bdb0 EFLAGS: 00010286 [ 6902.120315] RAX: ffff880175050bb0 RBX: ffff8801124a8000 RCX: 0000000000170007 [ 6902.121969] RDX: 0000000000000002 RSI: 0000000000170007 RDI: ffffffff8125fb74 [ 6902.123716] RBP: ffff880175055d10 R08: 0000000000000000 R09: 0000000000000000 [ 6902.125417] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880175055d88 [ 6902.127129] R13: ffff880175050bb0 R14: 0000000000000000 R15: dead000000000100 [ 6902.129060] FS: 00007f4507223780(0000) GS:ffff88017ba00000(0000) knlGS:0000000000000000 [ 6902.130996] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6902.132558] CR2: 00005623599cac78 CR3: 000000014b700001 CR4: 00000000003606e0 [ 6902.134270] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 6902.135981] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 6902.137836] Call Trace: [ 6902.138939] close_ctree+0x171/0x330 [btrfs] [ 6902.140181] ? kthread_stop+0x146/0x1f0 [ 6902.141277] generic_shutdown_super+0x6c/0x100 [ 6902.142517] kill_anon_super+0x14/0x30 [ 6902.143554] btrfs_kill_super+0x13/0x100 [btrfs] [ 6902.144790] deactivate_locked_super+0x2f/0x70 [ 6902.146014] cleanup_mnt+0x3b/0x70 [ 6902.147020] task_work_run+0x9e/0xd0 [ 6902.148036] do_syscall_64+0x470/0x600 [ 6902.149142] ? trace_hardirqs_off_thunk+0x1a/0x1c [ 6902.150375] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 6902.151640] RIP: 0033:0x7f45077a6a7b [ 6902.157324] RSP: 002b:00007ffd589f3e68 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [ 6902.159187] RAX: 0000000000000000 RBX: 000055e8eec732b0 RCX: 00007f45077a6a7b [ 6902.160834] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000055e8eec73490 [ 6902.162526] RBP: 0000000000000000 R08: 000055e8eec734b0 R09: 00007ffd589f26c0 [ 6902.164141] R10: 0000000000000000 R11: 0000000000000246 R12: 000055e8eec73490 [ 6902.165815] R13: 00007f4507ac61a4 R14: 0000000000000000 R15: 00007ffd589f40d8 [ 6902.167553] irq event stamp: 0 [ 6902.168998] hardirqs last enabled at (0): [<0000000000000000>] (null) [ 6902.170731] hardirqs last disabled at (0): [] copy_process.part.55+0x3b0/0x1f00 [ 6902.172773] softirqs last enabled at (0): [] copy_process.part.55+0x3b0/0x1f00 [ 6902.174671] softirqs last disabled at (0): [<0000000000000000>] (null) [ 6902.176407] ---[ end trace 463138c2986b275c ]--- [ 6902.177636] BTRFS info (device dm-3): space_info 4 has 273465344 free, is not full [ 6902.179453] BTRFS info (device dm-3): space_info total=276824064, used=4685824, pinned=18446744073708158976, reserved=0, may_use=0, readonly=65536 In the above line there's "pinned=18446744073708158976" which is an unsigned u64 value of -1392640, an obvious underflow. When transaction_kthread is running cleanup_transaction(), another fsstress is running btrfs_commit_transaction(). The btrfs_finish_extent_commit() may get the same range as btrfs_destroy_pinned_extent() got, which causes the pinned underflow. Fixes: d4b450cd4b33 ("Btrfs: fix race between transaction commit and empty block group removal") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Lu Fengqi Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5cf1bbe9754c..0e67cee73c53 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4428,13 +4428,23 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info, unpin = pinned_extents; again: while (1) { + /* + * The btrfs_finish_extent_commit() may get the same range as + * ours between find_first_extent_bit and clear_extent_dirty. + * Hence, hold the unused_bg_unpin_mutex to avoid double unpin + * the same extent range. + */ + mutex_lock(&fs_info->unused_bg_unpin_mutex); ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY, NULL); - if (ret) + if (ret) { + mutex_unlock(&fs_info->unused_bg_unpin_mutex); break; + } clear_extent_dirty(unpin, start, end); btrfs_error_unpin_extent_range(fs_info, start, end); + mutex_unlock(&fs_info->unused_bg_unpin_mutex); cond_resched(); } From 6dcd34f106ec113db47dddf9a70f0af7d81af468 Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Tue, 30 Oct 2018 18:04:04 +0800 Subject: [PATCH 0467/2608] Btrfs: fix cur_offset in the error case for nocow commit 506481b20e818db40b6198815904ecd2d6daee64 upstream. When the cow_file_range fails, the related resources are unlocked according to the range [start..end), so the unlock cannot be repeated in run_delalloc_nocow. In some cases (e.g. cur_offset <= end && cow_start != -1), cur_offset is not updated correctly, so move the cur_offset update before cow_file_range. kernel BUG at mm/page-writeback.c:2663! Internal error: Oops - BUG: 0 [#1] SMP CPU: 3 PID: 31525 Comm: kworker/u8:7 Tainted: P O Hardware name: Realtek_RTD1296 (DT) Workqueue: writeback wb_workfn (flush-btrfs-1) task: ffffffc076db3380 ti: ffffffc02e9ac000 task.ti: ffffffc02e9ac000 PC is at clear_page_dirty_for_io+0x1bc/0x1e8 LR is at clear_page_dirty_for_io+0x14/0x1e8 pc : [] lr : [] pstate: 40000145 sp : ffffffc02e9af4f0 Process kworker/u8:7 (pid: 31525, stack limit = 0xffffffc02e9ac020) Call trace: [] clear_page_dirty_for_io+0x1bc/0x1e8 [] extent_clear_unlock_delalloc+0x1e4/0x210 [btrfs] [] run_delalloc_nocow+0x3b8/0x948 [btrfs] [] run_delalloc_range+0x250/0x3a8 [btrfs] [] writepage_delalloc.isra.21+0xbc/0x1d8 [btrfs] [] __extent_writepage+0xe8/0x248 [btrfs] [] extent_write_cache_pages.isra.17+0x164/0x378 [btrfs] [] extent_writepages+0x48/0x68 [btrfs] [] btrfs_writepages+0x20/0x30 [btrfs] [] do_writepages+0x30/0x88 [] __writeback_single_inode+0x34/0x198 [] writeback_sb_inodes+0x184/0x3c0 [] __writeback_inodes_wb+0x6c/0xc0 [] wb_writeback+0x1b8/0x1c0 [] wb_workfn+0x150/0x250 [] process_one_work+0x1dc/0x388 [] worker_thread+0x130/0x500 [] kthread+0x10c/0x110 [] ret_from_fork+0x10/0x40 Code: d503201f a9025bb5 a90363b7 f90023b9 (d4210000) CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Robbie Ko Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 90568a21fa77..1c340d6c8568 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1565,12 +1565,11 @@ out_check: } btrfs_release_path(path); - if (cur_offset <= end && cow_start == (u64)-1) { + if (cur_offset <= end && cow_start == (u64)-1) cow_start = cur_offset; - cur_offset = end; - } if (cow_start != (u64)-1) { + cur_offset = end; ret = cow_file_range(inode, locked_page, cow_start, end, end, page_started, nr_written, 1, NULL); if (ret) From c7a082fb631a3282667b60ad36bfbeea45322cdd Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 5 Nov 2018 11:14:05 +0000 Subject: [PATCH 0468/2608] Btrfs: fix infinite loop on inode eviction after deduplication of eof block commit 11023d3f5fdf89bba5e1142127701ca6e6014587 upstream. If we attempt to deduplicate the last block of a file A into the middle of a file B, and file A's size is not a multiple of the block size, we end rounding the deduplication length to 0 bytes, to avoid the data corruption issue fixed by commit de02b9f6bb65 ("Btrfs: fix data corruption when deduplicating between different files"). However a length of zero will cause the insertion of an extent state with a start value greater (by 1) then the end value, leading to a corrupt extent state that will trigger a warning and cause chaos such as an infinite loop during inode eviction. Example trace: [96049.833585] ------------[ cut here ]------------ [96049.833714] WARNING: CPU: 0 PID: 24448 at fs/btrfs/extent_io.c:436 insert_state+0x101/0x120 [btrfs] [96049.833767] CPU: 0 PID: 24448 Comm: xfs_io Not tainted 4.19.0-rc7-btrfs-next-39 #1 [96049.833768] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014 [96049.833780] RIP: 0010:insert_state+0x101/0x120 [btrfs] [96049.833783] RSP: 0018:ffffafd2c3707af0 EFLAGS: 00010282 [96049.833785] RAX: 0000000000000000 RBX: 000000000004dfff RCX: 0000000000000006 [96049.833786] RDX: 0000000000000007 RSI: ffff99045c143230 RDI: ffff99047b2168a0 [96049.833787] RBP: ffff990457851cd0 R08: 0000000000000001 R09: 0000000000000000 [96049.833787] R10: ffffafd2c3707ab8 R11: 0000000000000000 R12: ffff9903b93b12c8 [96049.833788] R13: 000000000004e000 R14: ffffafd2c3707b80 R15: ffffafd2c3707b78 [96049.833790] FS: 00007f5c14e7d700(0000) GS:ffff99047b200000(0000) knlGS:0000000000000000 [96049.833791] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [96049.833792] CR2: 00007f5c146abff8 CR3: 0000000115f4c004 CR4: 00000000003606f0 [96049.833795] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [96049.833796] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [96049.833796] Call Trace: [96049.833809] __set_extent_bit+0x46c/0x6a0 [btrfs] [96049.833823] lock_extent_bits+0x6b/0x210 [btrfs] [96049.833831] ? _raw_spin_unlock+0x24/0x30 [96049.833841] ? test_range_bit+0xdf/0x130 [btrfs] [96049.833853] lock_extent_range+0x8e/0x150 [btrfs] [96049.833864] btrfs_double_extent_lock+0x78/0xb0 [btrfs] [96049.833875] btrfs_extent_same_range+0x14e/0x550 [btrfs] [96049.833885] ? rcu_read_lock_sched_held+0x3f/0x70 [96049.833890] ? __kmalloc_node+0x2b0/0x2f0 [96049.833899] ? btrfs_dedupe_file_range+0x19a/0x280 [btrfs] [96049.833909] btrfs_dedupe_file_range+0x270/0x280 [btrfs] [96049.833916] vfs_dedupe_file_range_one+0xd9/0xe0 [96049.833919] vfs_dedupe_file_range+0x131/0x1b0 [96049.833924] do_vfs_ioctl+0x272/0x6e0 [96049.833927] ? __fget+0x113/0x200 [96049.833931] ksys_ioctl+0x70/0x80 [96049.833933] __x64_sys_ioctl+0x16/0x20 [96049.833937] do_syscall_64+0x60/0x1b0 [96049.833939] entry_SYSCALL_64_after_hwframe+0x49/0xbe [96049.833941] RIP: 0033:0x7f5c1478ddd7 [96049.833943] RSP: 002b:00007ffe15b196a8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [96049.833945] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f5c1478ddd7 [96049.833946] RDX: 00005625ece322d0 RSI: 00000000c0189436 RDI: 0000000000000004 [96049.833947] RBP: 0000000000000000 R08: 00007f5c14a46f48 R09: 0000000000000040 [96049.833948] R10: 0000000000000541 R11: 0000000000000202 R12: 0000000000000000 [96049.833949] R13: 0000000000000000 R14: 0000000000000004 R15: 00005625ece322d0 [96049.833954] irq event stamp: 6196 [96049.833956] hardirqs last enabled at (6195): [] console_unlock+0x503/0x640 [96049.833958] hardirqs last disabled at (6196): [] trace_hardirqs_off_thunk+0x1a/0x1c [96049.833959] softirqs last enabled at (6114): [] __do_softirq+0x370/0x421 [96049.833964] softirqs last disabled at (6095): [] irq_exit+0xcd/0xe0 [96049.833965] ---[ end trace db7b05f01b7fa10c ]--- [96049.935816] R13: 0000000000000000 R14: 00005562e5259240 R15: 00007ffff092b910 [96049.935822] irq event stamp: 6584 [96049.935823] hardirqs last enabled at (6583): [] console_unlock+0x503/0x640 [96049.935825] hardirqs last disabled at (6584): [] trace_hardirqs_off_thunk+0x1a/0x1c [96049.935827] softirqs last enabled at (6328): [] __do_softirq+0x370/0x421 [96049.935828] softirqs last disabled at (6313): [] irq_exit+0xcd/0xe0 [96049.935829] ---[ end trace db7b05f01b7fa123 ]--- [96049.935840] ------------[ cut here ]------------ [96049.936065] WARNING: CPU: 1 PID: 24463 at fs/btrfs/extent_io.c:436 insert_state+0x101/0x120 [btrfs] [96049.936107] CPU: 1 PID: 24463 Comm: umount Tainted: G W 4.19.0-rc7-btrfs-next-39 #1 [96049.936108] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014 [96049.936117] RIP: 0010:insert_state+0x101/0x120 [btrfs] [96049.936119] RSP: 0018:ffffafd2c3637bc0 EFLAGS: 00010282 [96049.936120] RAX: 0000000000000000 RBX: 000000000004dfff RCX: 0000000000000006 [96049.936121] RDX: 0000000000000007 RSI: ffff990445cf88e0 RDI: ffff99047b2968a0 [96049.936122] RBP: ffff990457851cd0 R08: 0000000000000001 R09: 0000000000000000 [96049.936123] R10: ffffafd2c3637b88 R11: 0000000000000000 R12: ffff9904574301e8 [96049.936124] R13: 000000000004e000 R14: ffffafd2c3637c50 R15: ffffafd2c3637c48 [96049.936125] FS: 00007fe4b87e72c0(0000) GS:ffff99047b280000(0000) knlGS:0000000000000000 [96049.936126] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [96049.936128] CR2: 00005562e52618d8 CR3: 00000001151c8005 CR4: 00000000003606e0 [96049.936129] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [96049.936131] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [96049.936131] Call Trace: [96049.936141] __set_extent_bit+0x46c/0x6a0 [btrfs] [96049.936154] lock_extent_bits+0x6b/0x210 [btrfs] [96049.936167] btrfs_evict_inode+0x1e1/0x5a0 [btrfs] [96049.936172] evict+0xbf/0x1c0 [96049.936174] dispose_list+0x51/0x80 [96049.936176] evict_inodes+0x193/0x1c0 [96049.936180] generic_shutdown_super+0x3f/0x110 [96049.936182] kill_anon_super+0xe/0x30 [96049.936189] btrfs_kill_super+0x13/0x100 [btrfs] [96049.936191] deactivate_locked_super+0x3a/0x70 [96049.936193] cleanup_mnt+0x3b/0x80 [96049.936195] task_work_run+0x93/0xc0 [96049.936198] exit_to_usermode_loop+0xfa/0x100 [96049.936201] do_syscall_64+0x17f/0x1b0 [96049.936202] entry_SYSCALL_64_after_hwframe+0x49/0xbe [96049.936204] RIP: 0033:0x7fe4b80cfb37 [96049.936206] RSP: 002b:00007ffff092b688 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [96049.936207] RAX: 0000000000000000 RBX: 00005562e5259060 RCX: 00007fe4b80cfb37 [96049.936208] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 00005562e525faa0 [96049.936209] RBP: 00005562e525faa0 R08: 00005562e525f770 R09: 0000000000000015 [96049.936210] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fe4b85d1e64 [96049.936211] R13: 0000000000000000 R14: 00005562e5259240 R15: 00007ffff092b910 [96049.936211] R13: 0000000000000000 R14: 00005562e5259240 R15: 00007ffff092b910 [96049.936216] irq event stamp: 6616 [96049.936219] hardirqs last enabled at (6615): [] console_unlock+0x503/0x640 [96049.936219] hardirqs last disabled at (6616): [] trace_hardirqs_off_thunk+0x1a/0x1c [96049.936222] softirqs last enabled at (6328): [] __do_softirq+0x370/0x421 [96049.936222] softirqs last disabled at (6313): [] irq_exit+0xcd/0xe0 [96049.936223] ---[ end trace db7b05f01b7fa124 ]--- The second stack trace, from inode eviction, is repeated forever due to the infinite loop during eviction. This is the same type of problem fixed way back in 2015 by commit 113e8283869b ("Btrfs: fix inode eviction infinite loop after extent_same ioctl") and commit ccccf3d67294 ("Btrfs: fix inode eviction infinite loop after cloning into it"). So fix this by returning immediately if the deduplication range length gets rounded down to 0 bytes, as there is nothing that needs to be done in such case. Example reproducer: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ xfs_io -f -c "pwrite -S 0xe6 0 100" /mnt/foo $ xfs_io -f -c "pwrite -S 0xe6 0 1M" /mnt/bar # Unmount the filesystem and mount it again so that we start without any # extent state records when we ask for the deduplication. $ umount /mnt $ mount /dev/sdb /mnt $ xfs_io -c "dedupe /mnt/foo 0 500K 100" /mnt/bar # This unmount triggers the infinite loop. $ umount /mnt A test case for fstests will follow soon. Fixes: de02b9f6bb65 ("Btrfs: fix data corruption when deduplicating between different files") CC: # 4.19+ Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9333e4cda68d..4cdfc6f8e6a9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3178,6 +3178,8 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize; len = round_down(i_size_read(src), sz) - loff; + if (len == 0) + return 0; olen = len; } } From d62eead029806345c60becda77169d5e01009510 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 5 Nov 2018 11:14:17 +0000 Subject: [PATCH 0469/2608] Btrfs: fix data corruption due to cloning of eof block commit ac765f83f1397646c11092a032d4f62c3d478b81 upstream. We currently allow cloning a range from a file which includes the last block of the file even if the file's size is not aligned to the block size. This is fine and useful when the destination file has the same size, but when it does not and the range ends somewhere in the middle of the destination file, it leads to corruption because the bytes between the EOF and the end of the block have undefined data (when there is support for discard/trimming they have a value of 0x00). Example: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ export foo_size=$((256 * 1024 + 100)) $ xfs_io -f -c "pwrite -S 0x3c 0 $foo_size" /mnt/foo $ xfs_io -f -c "pwrite -S 0xb5 0 1M" /mnt/bar $ xfs_io -c "reflink /mnt/foo 0 512K $foo_size" /mnt/bar $ od -A d -t x1 /mnt/bar 0000000 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 * 0524288 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c * 0786528 3c 3c 3c 3c 00 00 00 00 00 00 00 00 00 00 00 00 0786544 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0790528 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 * 1048576 The bytes in the range from 786532 (512Kb + 256Kb + 100 bytes) to 790527 (512Kb + 256Kb + 4Kb - 1) got corrupted, having now a value of 0x00 instead of 0xb5. This is similar to the problem we had for deduplication that got recently fixed by commit de02b9f6bb65 ("Btrfs: fix data corruption when deduplicating between different files"). Fix this by not allowing such operations to be performed and return the errno -EINVAL to user space. This is what XFS is doing as well at the VFS level. This change however now makes us return -EINVAL instead of -EOPNOTSUPP for cases where the source range maps to an inline extent and the destination range's end is smaller then the destination file's size, since the detection of inline extents is done during the actual process of dropping file extent items (at __btrfs_drop_extents()). Returning the -EINVAL error is done early on and solely based on the input parameters (offsets and length) and destination file's size. This makes us consistent with XFS and anyone else supporting cloning since this case is now checked at a higher level in the VFS and is where the -EINVAL will be returned from starting with kernel 4.20 (the VFS changed was introduced in 4.20-rc1 by commit 07d19dc9fbe9 ("vfs: avoid problematic remapping requests into partial EOF block"). So this change is more geared towards stable kernels, as it's unlikely the new VFS checks get removed intentionally. A test case for fstests follows soon, as well as an update to filter existing tests that expect -EOPNOTSUPP to accept -EINVAL as well. CC: # 4.4+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4cdfc6f8e6a9..cddd63b9103f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3909,9 +3909,17 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, goto out_unlock; if (len == 0) olen = len = src->i_size - off; - /* if we extend to eof, continue to block boundary */ - if (off + len == src->i_size) + /* + * If we extend to eof, continue to block boundary if and only if the + * destination end offset matches the destination file's size, otherwise + * we would be corrupting data by placing the eof block into the middle + * of a file. + */ + if (off + len == src->i_size) { + if (!IS_ALIGNED(len, bs) && destoff + len < inode->i_size) + goto out_unlock; len = ALIGN(src->i_size, bs) - off; + } if (len == 0) { ret = 0; From 79d80b876396693e5b6d7d345963b83a2f917816 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 4 Nov 2018 03:48:54 +0000 Subject: [PATCH 0470/2608] clockevents/drivers/i8253: Add support for PIT shutdown quirk commit 35b69a420bfb56b7b74cb635ea903db05e357bec upstream. Add support for platforms where pit_shutdown() doesn't work because of a quirk in the PIT emulation. On these platforms setting the counter register to zero causes the PIT to start running again, negating the shutdown. Provide a global variable that controls whether the counter register is zero'ed, which platform specific code can override. Signed-off-by: Michael Kelley Signed-off-by: Thomas Gleixner Cc: "gregkh@linuxfoundation.org" Cc: "devel@linuxdriverproject.org" Cc: "daniel.lezcano@linaro.org" Cc: "virtualization@lists.linux-foundation.org" Cc: "jgross@suse.com" Cc: "akataria@vmware.com" Cc: "olaf@aepfle.de" Cc: "apw@canonical.com" Cc: vkuznets Cc: "jasowang@redhat.com" Cc: "marcelo.cerri@canonical.com" Cc: KY Srinivasan Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1541303219-11142-2-git-send-email-mikelley@microsoft.com Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/i8253.c | 14 ++++++++++++-- include/linux/i8253.h | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c index 9c38895542f4..d4350bb10b83 100644 --- a/drivers/clocksource/i8253.c +++ b/drivers/clocksource/i8253.c @@ -20,6 +20,13 @@ DEFINE_RAW_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); +/* + * Handle PIT quirk in pit_shutdown() where zeroing the counter register + * restarts the PIT, negating the shutdown. On platforms with the quirk, + * platform specific code can set this to false. + */ +bool i8253_clear_counter_on_shutdown __ro_after_init = true; + #ifdef CONFIG_CLKSRC_I8253 /* * Since the PIT overflows every tick, its not very useful @@ -109,8 +116,11 @@ static int pit_shutdown(struct clock_event_device *evt) raw_spin_lock(&i8253_lock); outb_p(0x30, PIT_MODE); - outb_p(0, PIT_CH0); - outb_p(0, PIT_CH0); + + if (i8253_clear_counter_on_shutdown) { + outb_p(0, PIT_CH0); + outb_p(0, PIT_CH0); + } raw_spin_unlock(&i8253_lock); return 0; diff --git a/include/linux/i8253.h b/include/linux/i8253.h index e6bb36a97519..8336b2f6f834 100644 --- a/include/linux/i8253.h +++ b/include/linux/i8253.h @@ -21,6 +21,7 @@ #define PIT_LATCH ((PIT_TICK_RATE + HZ/2) / HZ) extern raw_spinlock_t i8253_lock; +extern bool i8253_clear_counter_on_shutdown; extern struct clock_event_device i8253_clockevent; extern void clockevent_i8253_init(bool oneshot); From 9910b2578039587832804469610f6742c2d06148 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 3 Nov 2018 17:11:19 -0400 Subject: [PATCH 0471/2608] ext4: add missing brelse() update_backups()'s error path commit ea0abbb648452cdb6e1734b702b6330a7448fcf8 upstream. Fixes: ac27a0ec112a ("ext4: initial copy of files from ext3") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 2.6.19 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index e344e606c054..bf58402da7ac 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1097,8 +1097,10 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data, backup_block, backup_block - ext4_group_first_block_no(sb, group)); BUFFER_TRACE(bh, "get_write_access"); - if ((err = ext4_journal_get_write_access(handle, bh))) + if ((err = ext4_journal_get_write_access(handle, bh))) { + brelse(bh); break; + } lock_buffer(bh); memcpy(bh->b_data, data, size); if (rest) From feddbc0114dfc5f602ac471e400f36d1e9c6a4b6 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 3 Nov 2018 16:22:10 -0400 Subject: [PATCH 0472/2608] ext4: add missing brelse() in set_flexbg_block_bitmap()'s error path commit cea5794122125bf67559906a0762186cf417099c upstream. Fixes: 33afdcc5402d ("ext4: add a function which sets up group blocks ...") Cc: stable@kernel.org # 3.3 Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index bf58402da7ac..46098de7222e 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -444,16 +444,18 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, bh); - if (err) + if (err) { + brelse(bh); return err; + } ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block, block - start, count2); ext4_set_bits(bh->b_data, block - start, count2); err = ext4_handle_dirty_metadata(handle, NULL, bh); + brelse(bh); if (unlikely(err)) return err; - brelse(bh); } return 0; From 64176ffd7ffcd22dd748cf800fd3c1df8d18a137 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 3 Nov 2018 16:50:08 -0400 Subject: [PATCH 0473/2608] ext4: add missing brelse() add_new_gdb_meta_bg()'s error path commit 61a9c11e5e7a0dab5381afa5d9d4dd5ebf18f7a0 upstream. Fixes: 01f795f9e0d6 ("ext4: add online resizing support for meta_bg ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.7 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 46098de7222e..e689438ffb57 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -901,6 +901,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb, sizeof(struct buffer_head *), GFP_NOFS); if (!n_group_desc) { + brelse(gdb_bh); err = -ENOMEM; ext4_warning(sb, "not enough memory for %lu groups", gdb_num + 1); @@ -916,8 +917,6 @@ static int add_new_gdb_meta_bg(struct super_block *sb, kvfree(o_group_desc); BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); - if (unlikely(err)) - brelse(gdb_bh); return err; } From 4e7e558e8bfa8017a04a9735fc2ed65cf3b1ea3c Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 3 Nov 2018 16:13:17 -0400 Subject: [PATCH 0474/2608] ext4: avoid potential extra brelse in setup_new_flex_group_blocks() commit 9e4028935cca3f9ef9b6a90df9da6f1f94853536 upstream. Currently bh is set to NULL only during first iteration of for cycle, then this pointer is not cleared after end of using. Therefore rollback after errors can lead to extra brelse(bh) call, decrements bh counter and later trigger an unexpected warning in __brelse() Patch moves brelse() calls in body of cycle to exclude requirement of brelse() call in rollback. Fixes: 33afdcc5402d ("ext4: add a function which sets up group blocks ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.3+ Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index e689438ffb57..46565a6f8137 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -592,7 +592,6 @@ handle_bb: bh = bclean(handle, sb, block); if (IS_ERR(bh)) { err = PTR_ERR(bh); - bh = NULL; goto out; } overhead = ext4_group_overhead_blocks(sb, group); @@ -604,9 +603,9 @@ handle_bb: ext4_mark_bitmap_end(group_data[i].blocks_count, sb->s_blocksize * 8, bh->b_data); err = ext4_handle_dirty_metadata(handle, NULL, bh); + brelse(bh); if (err) goto out; - brelse(bh); handle_ib: if (bg_flags[i] & EXT4_BG_INODE_UNINIT) @@ -621,18 +620,16 @@ handle_ib: bh = bclean(handle, sb, block); if (IS_ERR(bh)) { err = PTR_ERR(bh); - bh = NULL; goto out; } ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, bh->b_data); err = ext4_handle_dirty_metadata(handle, NULL, bh); + brelse(bh); if (err) goto out; - brelse(bh); } - bh = NULL; /* Mark group tables in block bitmap */ for (j = 0; j < GROUP_TABLE_COUNT; j++) { @@ -663,7 +660,6 @@ handle_ib: } out: - brelse(bh); err2 = ext4_journal_stop(handle); if (err2 && !err) err = err2; From dd2775337188cc97bcc8ef75f876fdf55a4a6501 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 9 Nov 2018 11:34:40 -0500 Subject: [PATCH 0475/2608] ext4: missing !bh check in ext4_xattr_inode_write() commit eb6984fa4ce2837dcb1f66720a600f31b0bb3739 upstream. According to Ted Ts'o ext4_getblk() called in ext4_xattr_inode_write() should not return bh = NULL The only time that bh could be NULL, then, would be in the case of something really going wrong; a programming error elsewhere (perhaps a wild pointer dereference) or I/O error causing on-disk file system corruption (although that would be highly unlikely given that we had *just* allocated the blocks and so the metadata blocks in question probably would still be in the cache). Fixes: e50e5129f384 ("ext4: xattr-in-inode support") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.13 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 9bc50eef6127..34975e1aa7d2 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1387,6 +1387,12 @@ retry: bh = ext4_getblk(handle, ea_inode, block, 0); if (IS_ERR(bh)) return PTR_ERR(bh); + if (!bh) { + WARN_ON_ONCE(1); + EXT4_ERROR_INODE(ea_inode, + "ext4_getblk() return bh = NULL"); + return -EFSCORRUPTED; + } ret = ext4_journal_get_write_access(handle, bh); if (ret) goto out; From 3caa7b620c0f9e2047468447ab26684daae1913f Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Tue, 6 Nov 2018 16:20:40 -0500 Subject: [PATCH 0476/2608] ext4: fix possible inode leak in the retry loop of ext4_resize_fs() commit db6aee62406d9fbb53315fcddd81f1dc271d49fa upstream. Fixes: 1c6bd7173d66 ("ext4: convert file system to meta_bg if needed ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.7 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 46565a6f8137..075738830b87 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -2028,6 +2028,10 @@ retry: n_blocks_count_retry = 0; free_flex_gd(flex_gd); flex_gd = NULL; + if (resize_inode) { + iput(resize_inode); + resize_inode = NULL; + } goto retry; } From da497e7a38bb774a1ed6f6264b52704c9330e1fd Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Tue, 6 Nov 2018 16:49:50 -0500 Subject: [PATCH 0477/2608] ext4: avoid buffer leak on shutdown in ext4_mark_iloc_dirty() commit a6758309a005060b8297a538a457c88699cb2520 upstream. ext4_mark_iloc_dirty() callers expect that it releases iloc->bh even if it returns an error. Fixes: 0db1ff222d40 ("ext4: add shutdown bit and check for it") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.11 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9808df52ceca..bd2bf83b1a1f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5671,9 +5671,10 @@ int ext4_mark_iloc_dirty(handle_t *handle, { int err = 0; - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { + put_bh(iloc->bh); return -EIO; - + } if (IS_I_VERSION(inode)) inode_inc_iversion(inode); From cffd3297173a7397da8171cd2ef20ae8eec8cee8 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Tue, 6 Nov 2018 17:01:36 -0500 Subject: [PATCH 0478/2608] ext4: avoid buffer leak in ext4_orphan_add() after prior errors commit feaf264ce7f8d54582e2f66eb82dd9dd124c94f3 upstream. Fixes: d745a8c20c1f ("ext4: reduce contention on s_orphan_lock") Fixes: 6e3617e579e0 ("ext4: Handle non empty on-disk orphan link") Cc: Dmitry Monakhov Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 2.6.34 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index eb0d8ee39827..cfa0dc2d97cd 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2820,7 +2820,9 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) list_del_init(&EXT4_I(inode)->i_orphan); mutex_unlock(&sbi->s_orphan_lock); } - } + } else + brelse(iloc.bh); + jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); jbd_debug(4, "orphan inode %lu will point to %d\n", inode->i_ino, NEXT_ORPHAN(inode)); From 65b1ce496c490e74b88f6fafc84a3eed756807a4 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Tue, 6 Nov 2018 16:16:01 -0500 Subject: [PATCH 0479/2608] ext4: fix missing cleanup if ext4_alloc_flex_bg_array() fails while resizing commit f348e2241fb73515d65b5d77dd9c174128a7fbf2 upstream. Fixes: 117fff10d7f1 ("ext4: grow the s_flex_groups array as needed ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.7 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 075738830b87..c2cbd21bf023 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1992,7 +1992,7 @@ retry: err = ext4_alloc_flex_bg_array(sb, n_group + 1); if (err) - return err; + goto out; err = ext4_mb_alloc_groupinfo(sb, n_group + 1); if (err) From bf04dace8318d6ae20a11c9e7692f36d9e387c23 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 6 Nov 2018 17:18:17 -0500 Subject: [PATCH 0480/2608] ext4: avoid possible double brelse() in add_new_gdb() on error path commit 4f32c38b4662312dd3c5f113d8bdd459887fb773 upstream. Fixes: b40971426a83 ("ext4: add error checking to calls to ...") Reported-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 2.6.38 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index c2cbd21bf023..deebb8842c82 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -846,6 +846,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); if (unlikely(err)) { ext4_std_error(sb, err); + iloc.bh = NULL; goto exit_inode; } brelse(dind); From 1124e5a89bd285468563df6723ceebab754735f6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 7 Nov 2018 10:32:53 -0500 Subject: [PATCH 0481/2608] ext4: fix possible leak of sbi->s_group_desc_leak in error path commit 9e463084cdb22e0b56b2dfbc50461020409a5fd3 upstream. Fixes: bfe0a5f47ada ("ext4: add more mount time checks of the superblock") Reported-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.18 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 46ad267ef6d6..d788b963e7ac 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4012,6 +4012,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_groups_count = blocks_count; sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); + if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != + le32_to_cpu(es->s_inodes_count)) { + ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", + le32_to_cpu(es->s_inodes_count), + ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); + ret = -EINVAL; + goto failed_mount; + } db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); if (ext4_has_feature_meta_bg(sb)) { @@ -4031,14 +4039,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ret = -ENOMEM; goto failed_mount; } - if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != - le32_to_cpu(es->s_inodes_count)) { - ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", - le32_to_cpu(es->s_inodes_count), - ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); - ret = -EINVAL; - goto failed_mount; - } bgl_lock_init(sbi->s_blockgroup_lock); From 181224f970c445bc3de2b3b42e30f2e2a294e30f Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 7 Nov 2018 10:56:28 -0500 Subject: [PATCH 0482/2608] ext4: fix possible leak of s_journal_flag_rwsem in error path commit af18e35bfd01e6d65a5e3ef84ffe8b252d1628c5 upstream. Fixes: c8585c6fcaf2 ("ext4: fix races between changing inode journal ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.7 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d788b963e7ac..7fd64f5f70f0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4442,6 +4442,7 @@ failed_mount6: percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); + percpu_free_rwsem(&sbi->s_journal_flag_rwsem); failed_mount5: ext4_ext_release(sb); ext4_release_system_zone(sb); From 70ca35b44acea71dff052bb45e9c806a991d8bca Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 7 Nov 2018 11:01:33 -0500 Subject: [PATCH 0483/2608] ext4: fix buffer leak in ext4_xattr_get_block() on error path commit ecaaf408478b6fb4d9986f9b6652f3824e374f4c upstream. Fixes: dec214d00e0d ("ext4: xattr inode deduplication") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.13 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 34975e1aa7d2..3e6f326c3480 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2281,8 +2281,10 @@ static struct buffer_head *ext4_xattr_get_block(struct inode *inode) if (!bh) return ERR_PTR(-EIO); error = ext4_xattr_check_block(inode, bh); - if (error) + if (error) { + brelse(bh); return ERR_PTR(error); + } return bh; } From 8113972d21440db9d098d363ff00cca96d9ce1ab Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 7 Nov 2018 11:07:01 -0500 Subject: [PATCH 0484/2608] ext4: release bs.bh before re-using in ext4_xattr_block_find() commit 45ae932d246f721e6584430017176cbcadfde610 upstream. bs.bh was taken in previous ext4_xattr_block_find() call, it should be released before re-using Fixes: 7e01c8e5420b ("ext3/4: fix uninitialized bs in ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 2.6.26 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 3e6f326c3480..49b3d9b038e0 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2404,6 +2404,8 @@ retry_inode: error = ext4_xattr_block_set(handle, inode, &i, &bs); } else if (error == -ENOSPC) { if (EXT4_I(inode)->i_file_acl && !bs.s.base) { + brelse(bs.bh); + bs.bh = NULL; error = ext4_xattr_block_find(inode, &i, &bs); if (error) goto cleanup; From 4096fc0912778196f04d66fef5189a897dba0131 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 7 Nov 2018 11:10:21 -0500 Subject: [PATCH 0485/2608] ext4: fix buffer leak in ext4_xattr_move_to_block() on error path commit 6bdc9977fcdedf47118d2caf7270a19f4b6d8a8f upstream. Fixes: 3f2571c1f91f ("ext4: factor out xattr moving") Fixes: 6dd4ee7cab7e ("ext4: Expand extra_inodes space per ...") Reviewed-by: Jan Kara Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 2.6.23 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 49b3d9b038e0..da46b5162937 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2626,6 +2626,8 @@ out: kfree(buffer); if (is) brelse(is->iloc.bh); + if (bs) + brelse(bs->bh); kfree(is); kfree(bs); From be7e29ce83f689887c198eeb45b2a81ba6497ea4 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 7 Nov 2018 11:14:35 -0500 Subject: [PATCH 0486/2608] ext4: fix buffer leak in ext4_expand_extra_isize_ea() on error path commit 53692ec074d00589c2cf1d6d17ca76ad0adce6ec upstream. Fixes: de05ca852679 ("ext4: move call to ext4_error() into ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.17 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index da46b5162937..a5923a1d0ff4 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2707,7 +2707,6 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, struct ext4_inode *raw_inode, handle_t *handle) { struct ext4_xattr_ibody_header *header; - struct buffer_head *bh; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); static unsigned int mnt_count; size_t min_offs; @@ -2748,13 +2747,17 @@ retry: * EA block can hold new_extra_isize bytes. */ if (EXT4_I(inode)->i_file_acl) { + struct buffer_head *bh; + bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); error = -EIO; if (!bh) goto cleanup; error = ext4_xattr_check_block(inode, bh); - if (error) + if (error) { + brelse(bh); goto cleanup; + } base = BHDR(bh); end = bh->b_data + bh->b_size; min_offs = end - base; From 68b908750ea4cb285aef4f58ed89056e3b0039db Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 7 Nov 2018 22:36:23 -0500 Subject: [PATCH 0487/2608] ext4: fix buffer leak in __ext4_read_dirblock() on error path commit de59fae0043f07de5d25e02ca360f7d57bfa5866 upstream. Fixes: dc6982ff4db1 ("ext4: refactor code to read directory blocks ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.9 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index cfa0dc2d97cd..4e301b0cdfb5 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -125,6 +125,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, if (!is_dx_block && type == INDEX) { ext4_error_inode(inode, func, line, block, "directory leaf block found instead of index block"); + brelse(bh); return ERR_PTR(-EFSCORRUPTED); } if (!ext4_has_metadata_csum(inode->i_sb) || From 5e64ee8788f585d3e7c144fc232efa45eb61a202 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 Oct 2018 10:21:38 -0500 Subject: [PATCH 0488/2608] mount: Retest MNT_LOCKED in do_umount commit 25d202ed820ee347edec0bf3bf553544556bf64b upstream. It was recently pointed out that the one instance of testing MNT_LOCKED outside of the namespace_sem is in ksys_umount. Fix that by adding a test inside of do_umount with namespace_sem and the mount_lock held. As it helps to fail fails the existing test is maintained with an additional comment pointing out that it may be racy because the locks are not held. Cc: stable@vger.kernel.org Reported-by: Al Viro Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 9dc146e7b5e0..97d3907ca177 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1625,8 +1625,13 @@ static int do_umount(struct mount *mnt, int flags) namespace_lock(); lock_mount_hash(); - event++; + /* Recheck MNT_LOCKED with the locks held */ + retval = -EINVAL; + if (mnt->mnt.mnt_flags & MNT_LOCKED) + goto out; + + event++; if (flags & MNT_DETACH) { if (!list_empty(&mnt->mnt_list)) umount_tree(mnt, UMOUNT_PROPAGATE); @@ -1640,6 +1645,7 @@ static int do_umount(struct mount *mnt, int flags) retval = 0; } } +out: unlock_mount_hash(); namespace_unlock(); return retval; @@ -1730,7 +1736,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) goto dput_and_out; if (!check_mnt(mnt)) goto dput_and_out; - if (mnt->mnt.mnt_flags & MNT_LOCKED) + if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */ goto dput_and_out; retval = -EPERM; if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) From c4585861f8f8055e02fd42b81259a0cb81e32f71 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Oct 2018 09:04:18 -0500 Subject: [PATCH 0489/2608] mount: Don't allow copying MNT_UNBINDABLE|MNT_LOCKED mounts commit df7342b240185d58d3d9665c0bbf0a0f5570ec29 upstream. Jonathan Calmels from NVIDIA reported that he's able to bypass the mount visibility security check in place in the Linux kernel by using a combination of the unbindable property along with the private mount propagation option to allow a unprivileged user to see a path which was purposefully hidden by the root user. Reproducer: # Hide a path to all users using a tmpfs root@castiana:~# mount -t tmpfs tmpfs /sys/devices/ root@castiana:~# # As an unprivileged user, unshare user namespace and mount namespace stgraber@castiana:~$ unshare -U -m -r # Confirm the path is still not accessible root@castiana:~# ls /sys/devices/ # Make /sys recursively unbindable and private root@castiana:~# mount --make-runbindable /sys root@castiana:~# mount --make-private /sys # Recursively bind-mount the rest of /sys over to /mnnt root@castiana:~# mount --rbind /sys/ /mnt # Access our hidden /sys/device as an unprivileged user root@castiana:~# ls /mnt/devices/ breakpoint cpu cstate_core cstate_pkg i915 intel_pt isa kprobe LNXSYSTM:00 msr pci0000:00 platform pnp0 power software system tracepoint uncore_arb uncore_cbox_0 uncore_cbox_1 uprobe virtual Solve this by teaching copy_tree to fail if a mount turns out to be both unbindable and locked. Cc: stable@vger.kernel.org Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Reported-by: Jonathan Calmels Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 97d3907ca177..86273dab89da 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1814,8 +1814,14 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, for (s = r; s; s = next_mnt(s, r)) { if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(s)) { - s = skip_mnt_tree(s); - continue; + if (s->mnt.mnt_flags & MNT_LOCKED) { + /* Both unbindable and locked. */ + q = ERR_PTR(-EPERM); + goto out; + } else { + s = skip_mnt_tree(s); + continue; + } } if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(s->mnt.mnt_root)) { From f3f529741e54be10d162797275fe432cebe51b96 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Oct 2018 12:05:11 -0500 Subject: [PATCH 0490/2608] mount: Prevent MNT_DETACH from disconnecting locked mounts commit 9c8e0a1b683525464a2abe9fb4b54404a50ed2b4 upstream. Timothy Baldwin wrote: > As per mount_namespaces(7) unprivileged users should not be able to look under mount points: > > Mounts that come as a single unit from more privileged mount are locked > together and may not be separated in a less privileged mount namespace. > > However they can: > > 1. Create a mount namespace. > 2. In the mount namespace open a file descriptor to the parent of a mount point. > 3. Destroy the mount namespace. > 4. Use the file descriptor to look under the mount point. > > I have reproduced this with Linux 4.16.18 and Linux 4.18-rc8. > > The setup: > > $ sudo sysctl kernel.unprivileged_userns_clone=1 > kernel.unprivileged_userns_clone = 1 > $ mkdir -p A/B/Secret > $ sudo mount -t tmpfs hide A/B > > > "Secret" is indeed hidden as expected: > > $ ls -lR A > A: > total 0 > drwxrwxrwt 2 root root 40 Feb 12 21:08 B > > A/B: > total 0 > > > The attack revealing "Secret": > > $ unshare -Umr sh -c "exec unshare -m ls -lR /proc/self/fd/4/ 4 /proc/self/fd/4/: > total 0 > drwxr-xr-x 3 root root 60 Feb 12 21:08 B > > /proc/self/fd/4/B: > total 0 > drwxr-xr-x 2 root root 40 Feb 12 21:08 Secret > > /proc/self/fd/4/B/Secret: > total 0 I tracked this down to put_mnt_ns running passing UMOUNT_SYNC and disconnecting all of the mounts in a mount namespace. Fix this by factoring drop_mounts out of drop_collected_mounts and passing 0 instead of UMOUNT_SYNC. There are two possible behavior differences that result from this. - No longer setting UMOUNT_SYNC will no longer set MNT_SYNC_UMOUNT on the vfsmounts being unmounted. This effects the lazy rcu walk by kicking the walk out of rcu mode and forcing it to be a non-lazy walk. - No longer disconnecting locked mounts will keep some mounts around longer as they stay because the are locked to other mounts. There are only two users of drop_collected mounts: audit_tree.c and put_mnt_ns. In audit_tree.c the mounts are private and there are no rcu lazy walks only calls to iterate_mounts. So the changes should have no effect except for a small timing effect as the connected mounts are disconnected. In put_mnt_ns there may be references from process outside the mount namespace to the mounts. So the mounts remaining connected will be the bug fix that is needed. That rcu walks are allowed to continue appears not to be a problem especially as the rcu walk change was about an implementation detail not about semantics. Cc: stable@vger.kernel.org Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Reported-by: Timothy Baldwin Tested-by: Timothy Baldwin Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 86273dab89da..e9c13eedd739 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1874,7 +1874,7 @@ void drop_collected_mounts(struct vfsmount *mnt) { namespace_lock(); lock_mount_hash(); - umount_tree(real_mount(mnt), UMOUNT_SYNC); + umount_tree(real_mount(mnt), 0); unlock_mount_hash(); namespace_unlock(); } From ce583650a908b4b4fdfb3ac13edacf17d4c55c2f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 27 Sep 2018 17:17:49 +0000 Subject: [PATCH 0491/2608] kdb: use correct pointer when 'btc' calls 'btt' commit dded2e159208a9edc21dd5c5f583afa28d378d39 upstream. On a powerpc 8xx, 'btc' fails as follows: Entering kdb (current=0x(ptrval), pid 282) due to Keyboard Entry kdb> btc btc: cpu status: Currently on cpu 0 Available cpus: 0 kdb_getarea: Bad address 0x0 when booting the kernel with 'debug_boot_weak_hash', it fails as well Entering kdb (current=0xba99ad80, pid 284) due to Keyboard Entry kdb> btc btc: cpu status: Currently on cpu 0 Available cpus: 0 kdb_getarea: Bad address 0xba99ad80 On other platforms, Oopses have been observed too, see https://github.com/linuxppc/linux/issues/139 This is due to btc calling 'btt' with %p pointer as an argument. This patch replaces %p by %px to get the real pointer value as expected by 'btt' Fixes: ad67b74d2469 ("printk: hash addresses printed with %p") Cc: Signed-off-by: Christophe Leroy Reviewed-by: Daniel Thompson Signed-off-by: Daniel Thompson Signed-off-by: Greg Kroah-Hartman --- kernel/debug/kdb/kdb_bt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 6ad4a9fcbd6f..7921ae4fca8d 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -179,14 +179,14 @@ kdb_bt(int argc, const char **argv) kdb_printf("no process for cpu %ld\n", cpu); return 0; } - sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu)); + sprintf(buf, "btt 0x%px\n", KDB_TSK(cpu)); kdb_parse(buf); return 0; } kdb_printf("btc: cpu status: "); kdb_parse("cpu\n"); for_each_online_cpu(cpu) { - sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu)); + sprintf(buf, "btt 0x%px\n", KDB_TSK(cpu)); kdb_parse(buf); touch_nmi_watchdog(); } From dedde93bd683730e78da54db42e52cb87cdb8a0c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 27 Sep 2018 17:17:57 +0000 Subject: [PATCH 0492/2608] kdb: print real address of pointers instead of hashed addresses commit 568fb6f42ac6851320adaea25f8f1b94de14e40a upstream. Since commit ad67b74d2469 ("printk: hash addresses printed with %p"), all pointers printed with %p are printed with hashed addresses instead of real addresses in order to avoid leaking addresses in dmesg and syslog. But this applies to kdb too, with is unfortunate: Entering kdb (current=0x(ptrval), pid 329) due to Keyboard Entry kdb> ps 15 sleeping system daemon (state M) processes suppressed, use 'ps A' to see all. Task Addr Pid Parent [*] cpu State Thread Command 0x(ptrval) 329 328 1 0 R 0x(ptrval) *sh 0x(ptrval) 1 0 0 0 S 0x(ptrval) init 0x(ptrval) 3 2 0 0 D 0x(ptrval) rcu_gp 0x(ptrval) 4 2 0 0 D 0x(ptrval) rcu_par_gp 0x(ptrval) 5 2 0 0 D 0x(ptrval) kworker/0:0 0x(ptrval) 6 2 0 0 D 0x(ptrval) kworker/0:0H 0x(ptrval) 7 2 0 0 D 0x(ptrval) kworker/u2:0 0x(ptrval) 8 2 0 0 D 0x(ptrval) mm_percpu_wq 0x(ptrval) 10 2 0 0 D 0x(ptrval) rcu_preempt The whole purpose of kdb is to debug, and for debugging real addresses need to be known. In addition, data displayed by kdb doesn't go into dmesg. This patch replaces all %p by %px in kdb in order to display real addresses. Fixes: ad67b74d2469 ("printk: hash addresses printed with %p") Cc: Signed-off-by: Christophe Leroy Signed-off-by: Daniel Thompson Signed-off-by: Greg Kroah-Hartman --- kernel/debug/kdb/kdb_main.c | 14 +++++++------- kernel/debug/kdb/kdb_support.c | 12 ++++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 07aefa8dbee8..993db6b2348e 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1182,7 +1182,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, if (reason == KDB_REASON_DEBUG) { /* special case below */ } else { - kdb_printf("\nEntering kdb (current=0x%p, pid %d) ", + kdb_printf("\nEntering kdb (current=0x%px, pid %d) ", kdb_current, kdb_current ? kdb_current->pid : 0); #if defined(CONFIG_SMP) kdb_printf("on processor %d ", raw_smp_processor_id()); @@ -1198,7 +1198,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, */ switch (db_result) { case KDB_DB_BPT: - kdb_printf("\nEntering kdb (0x%p, pid %d) ", + kdb_printf("\nEntering kdb (0x%px, pid %d) ", kdb_current, kdb_current->pid); #if defined(CONFIG_SMP) kdb_printf("on processor %d ", raw_smp_processor_id()); @@ -2037,7 +2037,7 @@ static int kdb_lsmod(int argc, const char **argv) if (mod->state == MODULE_STATE_UNFORMED) continue; - kdb_printf("%-20s%8u 0x%p ", mod->name, + kdb_printf("%-20s%8u 0x%px ", mod->name, mod->core_layout.size, (void *)mod); #ifdef CONFIG_MODULE_UNLOAD kdb_printf("%4d ", module_refcount(mod)); @@ -2048,7 +2048,7 @@ static int kdb_lsmod(int argc, const char **argv) kdb_printf(" (Loading)"); else kdb_printf(" (Live)"); - kdb_printf(" 0x%p", mod->core_layout.base); + kdb_printf(" 0x%px", mod->core_layout.base); #ifdef CONFIG_MODULE_UNLOAD { @@ -2330,7 +2330,7 @@ void kdb_ps1(const struct task_struct *p) return; cpu = kdb_process_cpu(p); - kdb_printf("0x%p %8d %8d %d %4d %c 0x%p %c%s\n", + kdb_printf("0x%px %8d %8d %d %4d %c 0x%px %c%s\n", (void *)p, p->pid, p->parent->pid, kdb_task_has_cpu(p), kdb_process_cpu(p), kdb_task_state_char(p), @@ -2343,7 +2343,7 @@ void kdb_ps1(const struct task_struct *p) } else { if (KDB_TSK(cpu) != p) kdb_printf(" Error: does not match running " - "process table (0x%p)\n", KDB_TSK(cpu)); + "process table (0x%px)\n", KDB_TSK(cpu)); } } } @@ -2722,7 +2722,7 @@ int kdb_register_flags(char *cmd, for_each_kdbcmd(kp, i) { if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) { kdb_printf("Duplicate kdb command registered: " - "%s, func %p help %s\n", cmd, func, help); + "%s, func %px help %s\n", cmd, func, help); return 1; } } diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index d35cc2d3a4cc..84422d2b95c0 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -40,7 +40,7 @@ int kdbgetsymval(const char *symname, kdb_symtab_t *symtab) { if (KDB_DEBUG(AR)) - kdb_printf("kdbgetsymval: symname=%s, symtab=%p\n", symname, + kdb_printf("kdbgetsymval: symname=%s, symtab=%px\n", symname, symtab); memset(symtab, 0, sizeof(*symtab)); symtab->sym_start = kallsyms_lookup_name(symname); @@ -88,7 +88,7 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) char *knt1 = NULL; if (KDB_DEBUG(AR)) - kdb_printf("kdbnearsym: addr=0x%lx, symtab=%p\n", addr, symtab); + kdb_printf("kdbnearsym: addr=0x%lx, symtab=%px\n", addr, symtab); memset(symtab, 0, sizeof(*symtab)); if (addr < 4096) @@ -149,7 +149,7 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) symtab->mod_name = "kernel"; if (KDB_DEBUG(AR)) kdb_printf("kdbnearsym: returns %d symtab->sym_start=0x%lx, " - "symtab->mod_name=%p, symtab->sym_name=%p (%s)\n", ret, + "symtab->mod_name=%px, symtab->sym_name=%px (%s)\n", ret, symtab->sym_start, symtab->mod_name, symtab->sym_name, symtab->sym_name); @@ -887,13 +887,13 @@ void debug_kusage(void) __func__, dah_first); if (dah_first) { h_used = (struct debug_alloc_header *)debug_alloc_pool; - kdb_printf("%s: h_used %p size %d\n", __func__, h_used, + kdb_printf("%s: h_used %px size %d\n", __func__, h_used, h_used->size); } do { h_used = (struct debug_alloc_header *) ((char *)h_free + dah_overhead + h_free->size); - kdb_printf("%s: h_used %p size %d caller %p\n", + kdb_printf("%s: h_used %px size %d caller %px\n", __func__, h_used, h_used->size, h_used->caller); h_free = (struct debug_alloc_header *) (debug_alloc_pool + h_free->next); @@ -902,7 +902,7 @@ void debug_kusage(void) ((char *)h_free + dah_overhead + h_free->size); if ((char *)h_used - debug_alloc_pool != sizeof(debug_alloc_pool_aligned)) - kdb_printf("%s: h_used %p size %d caller %p\n", + kdb_printf("%s: h_used %px size %d caller %px\n", __func__, h_used, h_used->size, h_used->caller); out: spin_unlock(&dap_lock); From a56609f725c3cfff71e0878036263477d59755a4 Mon Sep 17 00:00:00 2001 From: Frank Sorenson Date: Tue, 30 Oct 2018 15:10:40 -0500 Subject: [PATCH 0493/2608] sunrpc: correct the computation for page_ptr when truncating commit 5d7a5bcb67c70cbc904057ef52d3fcfeb24420bb upstream. When truncating the encode buffer, the page_ptr is getting advanced, causing the next page to be skipped while encoding. The page is still included in the response, so the response contains a page of bogus data. We need to adjust the page_ptr backwards to ensure we encode the next page into the correct place. We saw this triggered when concurrent directory modifications caused nfsd4_encode_direct_fattr() to return nfserr_noent, and the resulting call to xdr_truncate_encode() corrupted the READDIR reply. Signed-off-by: Frank Sorenson Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xdr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index e34f4ee7f2b6..13695ba8fc54 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -639,11 +639,10 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) WARN_ON_ONCE(xdr->iov); return; } - if (fraglen) { + if (fraglen) xdr->end = head->iov_base + head->iov_len; - xdr->page_ptr--; - } /* (otherwise assume xdr->end is already set) */ + xdr->page_ptr--; head->iov_len = len; buf->len = len; xdr->p = head->iov_base + head->iov_len; From 6d1c38aa47d432f425c5829261eaa8e624274a4f Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 8 Nov 2018 11:11:36 -0500 Subject: [PATCH 0494/2608] nfsd: COPY and CLONE operations require the saved filehandle to be set commit 01310bb7c9c98752cc763b36532fab028e0f8f81 upstream. Make sure we have a saved filehandle, otherwise we'll oops with a null pointer dereference in nfs4_preprocess_stateid_op(). Signed-off-by: Scott Mayhew Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4proc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 6d16399a350e..ee765abad2ef 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1049,6 +1049,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; + if (!cstate->save_fh.fh_dentry) + return nfserr_nofilehandle; + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, src_stateid, RD_STATE, src, NULL); if (status) { From 677f423c995d27bfafd34a7b86773830db6ac09f Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 5 Nov 2018 03:48:25 +0000 Subject: [PATCH 0495/2608] rtc: hctosys: Add missing range error reporting commit 7ce9a992ffde8ce93d5ae5767362a5c7389ae895 upstream. Fix an issue with the 32-bit range error path in `rtc_hctosys' where no error code is set and consequently the successful preceding call result from `rtc_read_time' is propagated to `rtc_hctosys_ret'. This in turn makes any subsequent call to `hctosys_show' incorrectly report in sysfs that the system time has been set from this RTC while it has not. Set the error to ERANGE then if we can't express the result due to an overflow. Signed-off-by: Maciej W. Rozycki Fixes: b3a5ac42ab18 ("rtc: hctosys: Ensure system time doesn't overflow time_t") Cc: stable@vger.kernel.org # 4.17+ Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/hctosys.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/hctosys.c b/drivers/rtc/hctosys.c index e79f2a181ad2..b9ec4a16db1f 100644 --- a/drivers/rtc/hctosys.c +++ b/drivers/rtc/hctosys.c @@ -50,8 +50,10 @@ static int __init rtc_hctosys(void) tv64.tv_sec = rtc_tm_to_time64(&tm); #if BITS_PER_LONG == 32 - if (tv64.tv_sec > INT_MAX) + if (tv64.tv_sec > INT_MAX) { + err = -ERANGE; goto err_read; + } #endif err = do_settimeofday64(&tv64); From a42d933dc28180d92c88131e4cc77fe1b747b5e3 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 9 Nov 2018 14:51:46 +0100 Subject: [PATCH 0496/2608] fuse: fix use-after-free in fuse_direct_IO() commit ebacb81273599555a7a19f7754a1451206a5fc4f upstream. In async IO blocking case the additional reference to the io is taken for it to survive fuse_aio_complete(). In non blocking case this additional reference is not needed, however we still reference io to figure out whether to wait for completion or not. This is wrong and will lead to use-after-free. Fix it by storing blocking information in separate variable. This was spotted by KASAN when running generic/208 fstest. Signed-off-by: Lukas Czerner Reported-by: Zorro Lang Signed-off-by: Miklos Szeredi Fixes: 744742d692e3 ("fuse: Add reference counting for fuse_io_priv") Cc: # v4.6 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index fb4738ef162f..47d7a510be5b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2912,10 +2912,12 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } if (io->async) { + bool blocking = io->blocking; + fuse_aio_complete(io, ret < 0 ? ret : 0, -1); /* we have a non-extending, async request, so return */ - if (!io->blocking) + if (!blocking) return -EIOCBQUEUED; wait_for_completion(&wait); From 6ceec07cc84a26152e5678e121c824f3d1097de6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 9 Nov 2018 15:52:16 +0100 Subject: [PATCH 0497/2608] fuse: fix leaked notify reply commit 7fabaf303458fcabb694999d6fa772cc13d4e217 upstream. fuse_request_send_notify_reply() may fail if the connection was reset for some reason (e.g. fs was unmounted). Don't leak request reference in this case. Besides leaking memory, this resulted in fc->num_waiting not being decremented and hence fuse_wait_aborted() left in a hanging and unkillable state. Fixes: 2d45ba381a74 ("fuse: add retrieve request") Fixes: b8f95e5d13f5 ("fuse: umount should wait for all requests") Reported-and-tested-by: syzbot+6339eda9cb4ebbc4c37b@syzkaller.appspotmail.com Signed-off-by: Miklos Szeredi Cc: #v2.6.36 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index a8b114ae931f..f7280c44cd4b 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1721,8 +1721,10 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, req->in.args[1].size = total_len; err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique); - if (err) + if (err) { fuse_retrieve_end(fc, req); + fuse_put_request(fc, req); + } return err; } From 56d10194be8102077430b683b2bb156d7ffdd84e Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 1 Jul 2018 13:56:54 -0700 Subject: [PATCH 0498/2608] configfs: replace strncpy with memcpy commit 1823342a1f2b47a4e6f5667f67cd28ab6bc4d6cd upstream. gcc 8.1.0 complains: fs/configfs/symlink.c:67:3: warning: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length fs/configfs/symlink.c: In function 'configfs_get_link': fs/configfs/symlink.c:63:13: note: length computed here Using strncpy() is indeed less than perfect since the length of data to be copied has already been determined with strlen(). Replace strncpy() with memcpy() to address the warning and optimize the code a little. Signed-off-by: Guenter Roeck Signed-off-by: Christoph Hellwig Signed-off-by: Nobuhiro Iwamatsu Signed-off-by: Greg Kroah-Hartman --- fs/configfs/symlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index c8aabba502f6..9993cdb81e7d 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -64,7 +64,7 @@ static void fill_item_path(struct config_item * item, char * buffer, int length) /* back up enough to print this bus id with '/' */ length -= cur; - strncpy(buffer + length,config_item_name(p),cur); + memcpy(buffer + length, config_item_name(p), cur); *(buffer + --length) = '/'; } } From 6c75d139608a603642cd731ce75e4222754fff0e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 5 Nov 2018 22:57:24 +0000 Subject: [PATCH 0499/2608] gfs2: Put bitmap buffers in put_super commit 10283ea525d30f2e99828978fd04d8427876a7ad upstream. gfs2_put_super calls gfs2_clear_rgrpd to destroy the gfs2_rgrpd objects attached to the resource group glocks. That function should release the buffers attached to the gfs2_bitmap objects (bi_bh), but the call to gfs2_rgrp_brelse for doing that is missing. When gfs2_releasepage later runs across these buffers which are still referenced, it refuses to free them. This causes the pages the buffers are attached to to remain referenced as well. With enough mount/unmount cycles, the system will eventually run out of memory. Fix this by adding the missing call to gfs2_rgrp_brelse in gfs2_clear_rgrpd. (Also fix a gfs2_rgrp_relse -> gfs2_rgrp_brelse typo in a comment.) Fixes: 39b0f1e92908 ("GFS2: Don't brelse rgrp buffer_heads every allocation") Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Andreas Gruenbacher Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/rgrp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 5fe033131f03..b0eee90738ff 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -706,6 +706,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) if (gl) { glock_clear_object(gl, rgd); + gfs2_rgrp_brelse(rgd); gfs2_glock_put(gl); } @@ -1115,7 +1116,7 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd) * @rgd: the struct gfs2_rgrpd describing the RG to read in * * Read in all of a Resource Group's header and bitmap blocks. - * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. + * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps. * * Returns: errno */ From fdc427442b374e84077d4214733764efb1a38a0e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 3 Nov 2018 14:56:00 -0700 Subject: [PATCH 0500/2608] crypto: user - fix leaking uninitialized memory to userspace commit f43f39958beb206b53292801e216d9b8a660f087 upstream. All bytes of the NETLINK_CRYPTO report structures must be initialized, since they are copied to userspace. The change from strncpy() to strlcpy() broke this. As a minimal fix, change it back. Fixes: 4473710df1f8 ("crypto: user - Prepare for CRYPTO_MAX_ALG_NAME expansion") Cc: # v4.12+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/crypto_user.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 0dbe2be7f783..b5758768920b 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -83,7 +83,7 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_cipher rcipher; - strlcpy(rcipher.type, "cipher", sizeof(rcipher.type)); + strncpy(rcipher.type, "cipher", sizeof(rcipher.type)); rcipher.blocksize = alg->cra_blocksize; rcipher.min_keysize = alg->cra_cipher.cia_min_keysize; @@ -102,7 +102,7 @@ static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_comp rcomp; - strlcpy(rcomp.type, "compression", sizeof(rcomp.type)); + strncpy(rcomp.type, "compression", sizeof(rcomp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, sizeof(struct crypto_report_comp), &rcomp)) goto nla_put_failure; @@ -116,7 +116,7 @@ static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_acomp racomp; - strlcpy(racomp.type, "acomp", sizeof(racomp.type)); + strncpy(racomp.type, "acomp", sizeof(racomp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP, sizeof(struct crypto_report_acomp), &racomp)) @@ -131,7 +131,7 @@ static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_akcipher rakcipher; - strlcpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); + strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER, sizeof(struct crypto_report_akcipher), &rakcipher)) @@ -146,7 +146,7 @@ static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_kpp rkpp; - strlcpy(rkpp.type, "kpp", sizeof(rkpp.type)); + strncpy(rkpp.type, "kpp", sizeof(rkpp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_KPP, sizeof(struct crypto_report_kpp), &rkpp)) @@ -160,10 +160,10 @@ nla_put_failure: static int crypto_report_one(struct crypto_alg *alg, struct crypto_user_alg *ualg, struct sk_buff *skb) { - strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); - strlcpy(ualg->cru_driver_name, alg->cra_driver_name, + strncpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); + strncpy(ualg->cru_driver_name, alg->cra_driver_name, sizeof(ualg->cru_driver_name)); - strlcpy(ualg->cru_module_name, module_name(alg->cra_module), + strncpy(ualg->cru_module_name, module_name(alg->cra_module), sizeof(ualg->cru_module_name)); ualg->cru_type = 0; @@ -176,7 +176,7 @@ static int crypto_report_one(struct crypto_alg *alg, if (alg->cra_flags & CRYPTO_ALG_LARVAL) { struct crypto_report_larval rl; - strlcpy(rl.type, "larval", sizeof(rl.type)); + strncpy(rl.type, "larval", sizeof(rl.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_LARVAL, sizeof(struct crypto_report_larval), &rl)) goto nla_put_failure; From efcbe502ddcc9f28d88a7c35b90f121a4e176823 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 16 Nov 2018 15:08:35 -0800 Subject: [PATCH 0501/2608] lib/ubsan.c: don't mark __ubsan_handle_builtin_unreachable as noreturn commit 1c23b4108d716cc848b38532063a8aca4f86add8 upstream. gcc-8 complains about the prototype for this function: lib/ubsan.c:432:1: error: ignoring attribute 'noreturn' in declaration of a built-in function '__ubsan_handle_builtin_unreachable' because it conflicts with attribute 'const' [-Werror=attributes] This is actually a GCC's bug. In GCC internals __ubsan_handle_builtin_unreachable() declared with both 'noreturn' and 'const' attributes instead of only 'noreturn': https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84210 Workaround this by removing the noreturn attribute. [aryabinin: add information about GCC bug in changelog] Link: http://lkml.kernel.org/r/20181107144516.4587-1-aryabinin@virtuozzo.com Signed-off-by: Arnd Bergmann Signed-off-by: Andrey Ryabinin Acked-by: Olof Johansson Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/ubsan.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index 50d1d5c25deb..60e108c5c173 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -451,8 +451,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data, EXPORT_SYMBOL(__ubsan_handle_shift_out_of_bounds); -void __noreturn -__ubsan_handle_builtin_unreachable(struct unreachable_data *data) +void __ubsan_handle_builtin_unreachable(struct unreachable_data *data) { unsigned long flags; From 52fcb8dd8cc224740259800eee9d2031c8ffa830 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 16 Nov 2018 15:08:04 -0800 Subject: [PATCH 0502/2608] hugetlbfs: fix kernel BUG at fs/hugetlbfs/inode.c:444! commit 5e41540c8a0f0e98c337dda8b391e5dda0cde7cf upstream. This bug has been experienced several times by the Oracle DB team. The BUG is in remove_inode_hugepages() as follows: /* * If page is mapped, it was faulted in after being * unmapped in caller. Unmap (again) now after taking * the fault mutex. The mutex will prevent faults * until we finish removing the page. * * This race can only happen in the hole punch case. * Getting here in a truncate operation is a bug. */ if (unlikely(page_mapped(page))) { BUG_ON(truncate_op); In this case, the elevated map count is not the result of a race. Rather it was incorrectly incremented as the result of a bug in the huge pmd sharing code. Consider the following: - Process A maps a hugetlbfs file of sufficient size and alignment (PUD_SIZE) that a pmd page could be shared. - Process B maps the same hugetlbfs file with the same size and alignment such that a pmd page is shared. - Process B then calls mprotect() to change protections for the mapping with the shared pmd. As a result, the pmd is 'unshared'. - Process B then calls mprotect() again to chage protections for the mapping back to their original value. pmd remains unshared. - Process B then forks and process C is created. During the fork process, we do dup_mm -> dup_mmap -> copy_page_range to copy page tables. Copying page tables for hugetlb mappings is done in the routine copy_hugetlb_page_range. In copy_hugetlb_page_range(), the destination pte is obtained by: dst_pte = huge_pte_alloc(dst, addr, sz); If pmd sharing is possible, the returned pointer will be to a pte in an existing page table. In the situation above, process C could share with either process A or process B. Since process A is first in the list, the returned pte is a pointer to a pte in process A's page table. However, the check for pmd sharing in copy_hugetlb_page_range is: /* If the pagetables are shared don't copy or take references */ if (dst_pte == src_pte) continue; Since process C is sharing with process A instead of process B, the above test fails. The code in copy_hugetlb_page_range which follows assumes dst_pte points to a huge_pte_none pte. It copies the pte entry from src_pte to dst_pte and increments this map count of the associated page. This is how we end up with an elevated map count. To solve, check the dst_pte entry for huge_pte_none. If !none, this implies PMD sharing so do not copy. Link: http://lkml.kernel.org/r/20181105212315.14125-1-mike.kravetz@oracle.com Fixes: c5c99429fa57 ("fix hugepages leak due to pagetable page sharing") Signed-off-by: Mike Kravetz Reviewed-by: Naoya Horiguchi Cc: Michal Hocko Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Cc: Davidlohr Bueso Cc: Prakash Sangappa Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e073099083ca..f46040aed2da 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3211,7 +3211,7 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte) int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma) { - pte_t *src_pte, *dst_pte, entry; + pte_t *src_pte, *dst_pte, entry, dst_entry; struct page *ptepage; unsigned long addr; int cow; @@ -3239,15 +3239,30 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, break; } - /* If the pagetables are shared don't copy or take references */ - if (dst_pte == src_pte) + /* + * If the pagetables are shared don't copy or take references. + * dst_pte == src_pte is the common case of src/dest sharing. + * + * However, src could have 'unshared' and dst shares with + * another vma. If dst_pte !none, this implies sharing. + * Check here before taking page table lock, and once again + * after taking the lock below. + */ + dst_entry = huge_ptep_get(dst_pte); + if ((dst_pte == src_pte) || !huge_pte_none(dst_entry)) continue; dst_ptl = huge_pte_lock(h, dst, dst_pte); src_ptl = huge_pte_lockptr(h, src, src_pte); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); entry = huge_ptep_get(src_pte); - if (huge_pte_none(entry)) { /* skip none entry */ + dst_entry = huge_ptep_get(dst_pte); + if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) { + /* + * Skip if src entry none. Also, skip in the + * unlikely case dst entry !none as this implies + * sharing with another vma. + */ ; } else if (unlikely(is_hugetlb_entry_migration(entry) || is_hugetlb_entry_hwpoisoned(entry))) { From d85699259bcda421e28f22b72ef2bd9e9907d66b Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 16 Nov 2018 15:08:11 -0800 Subject: [PATCH 0503/2608] mm/swapfile.c: use kvzalloc for swap_info_struct allocation commit 873d7bcfd066663e3e50113dc4a0de19289b6354 upstream. Commit a2468cc9bfdf ("swap: choose swap device according to numa node") changed 'avail_lists' field of 'struct swap_info_struct' to an array. In popular linux distros it increased size of swap_info_struct up to 40 Kbytes and now swap_info_struct allocation requires order-4 page. Switch to kvzmalloc allows to avoid unexpected allocation failures. Link: http://lkml.kernel.org/r/fc23172d-3c75-21e2-d551-8b1808cbe593@virtuozzo.com Fixes: a2468cc9bfdf ("swap: choose swap device according to numa node") Signed-off-by: Vasily Averin Acked-by: Aaron Lu Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Huang Ying Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/swapfile.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 8cbc7d6fd52e..08e8cd21770c 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2830,7 +2830,7 @@ static struct swap_info_struct *alloc_swap_info(void) unsigned int type; int i; - p = kzalloc(sizeof(*p), GFP_KERNEL); + p = kvzalloc(sizeof(*p), GFP_KERNEL); if (!p) return ERR_PTR(-ENOMEM); @@ -2841,7 +2841,7 @@ static struct swap_info_struct *alloc_swap_info(void) } if (type >= MAX_SWAPFILES) { spin_unlock(&swap_lock); - kfree(p); + kvfree(p); return ERR_PTR(-EPERM); } if (type >= nr_swapfiles) { @@ -2855,7 +2855,7 @@ static struct swap_info_struct *alloc_swap_info(void) smp_wmb(); nr_swapfiles++; } else { - kfree(p); + kvfree(p); p = swap_info[type]; /* * Do not memset this entry: a racing procfs swap_next() From 011d0d8b5bba980c56ecbf8ad7482b721cf52f6f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 14 Nov 2018 09:55:42 -0800 Subject: [PATCH 0504/2608] efi/arm/libstub: Pack FDT after populating it commit 72a58a63a164b4e9d2d914e65caeb551846883f1 upstream. Commit: 24d7c494ce46 ("efi/arm-stub: Round up FDT allocation to mapping size") increased the allocation size for the FDT image created by the stub to a fixed value of 2 MB, to simplify the former code that made several attempts with increasing values for the size. This is reasonable given that the allocation is of type EFI_LOADER_DATA, which is released to the kernel unless it is explicitly memblock_reserve()d by the early boot code. However, this allocation size leaked into the 'size' field of the FDT header metadata, and so the entire allocation remains occupied by the device tree binary, even if most of it is not used to store device tree information. So call fdt_pack() to shrink the FDT data structure to its minimum size after populating all the fields, so that the remaining memory is no longer wasted. Signed-off-by: Ard Biesheuvel Cc: # v4.12+ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 24d7c494ce46 ("efi/arm-stub: Round up FDT allocation to mapping size") Link: http://lkml.kernel.org/r/20181114175544.12860-4-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/fdt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index 8830fa601e45..0c0d2312f4a8 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -158,6 +158,10 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, return efi_status; } } + + /* shrink the FDT back to its minimum size */ + fdt_pack(fdt); + return EFI_SUCCESS; fdt_set_fail: From 7fb7a5ab36922bec92072199cb5085ff04882ed2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 5 Aug 2018 13:48:07 +0100 Subject: [PATCH 0505/2608] drm/rockchip: Allow driver to be shutdown on reboot/kexec commit 7f3ef5dedb146e3d5063b6845781ad1bb59b92b5 upstream. Leaving the DRM driver enabled on reboot or kexec has the annoying effect of leaving the display generating transactions whilst the IOMMU has been shut down. In turn, the IOMMU driver (which shares its interrupt line with the VOP) starts warning either on shutdown or when entering the secondary kernel in the kexec case (nothing is expected on that front). A cheap way of ensuring that things are nicely shut down is to register a shutdown callback in the platform driver. Signed-off-by: Marc Zyngier Tested-by: Vicente Bergas Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20180805124807.18169-1-marc.zyngier@arm.com Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index ff3d0f5efbb1..4cacb03f6733 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -425,6 +425,11 @@ static int rockchip_drm_platform_remove(struct platform_device *pdev) return 0; } +static void rockchip_drm_platform_shutdown(struct platform_device *pdev) +{ + rockchip_drm_platform_remove(pdev); +} + static const struct of_device_id rockchip_drm_dt_ids[] = { { .compatible = "rockchip,display-subsystem", }, { /* sentinel */ }, @@ -434,6 +439,7 @@ MODULE_DEVICE_TABLE(of, rockchip_drm_dt_ids); static struct platform_driver rockchip_drm_platform_driver = { .probe = rockchip_drm_platform_probe, .remove = rockchip_drm_platform_remove, + .shutdown = rockchip_drm_platform_shutdown, .driver = { .name = "rockchip-drm", .of_match_table = rockchip_drm_dt_ids, From a02eadf0639bb5e007e7e270385b97e9c0a4a944 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 28 Aug 2018 14:16:23 -0500 Subject: [PATCH 0506/2608] drm/amdgpu: add missing CHIP_HAINAN in amdgpu_ucode_get_load_type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d9997b64c52b70bd98c48f443f068253621d1ffc upstream. This caused a confusing error message, but there is functionally no problem since the default method is DIRECT. Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 36c763310df5..684769c9a48e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -247,6 +247,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_PITCAIRN: case CHIP_VERDE: case CHIP_OLAND: + case CHIP_HAINAN: return AMDGPU_FW_LOAD_DIRECT; #endif #ifdef CONFIG_DRM_AMDGPU_CIK From 438f22af0688cb512f800d61f98cb8bd9b54e323 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 6 Sep 2018 17:43:21 -0400 Subject: [PATCH 0507/2608] drm/nouveau: Check backlight IDs are >= 0, not > 0 commit dc854914999d5d52ac1b31740cb0ea8d89d0372e upstream. Remember, ida IDs start at 0, not 1! Signed-off-by: Lyude Paul Reviewed-by: Karol Herbst Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_backlight.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index 408b955e5c39..6dd72bc32897 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -116,7 +116,7 @@ nv40_backlight_init(struct drm_connector *connector) &nv40_bl_ops, &props); if (IS_ERR(bd)) { - if (bl_connector.id > 0) + if (bl_connector.id >= 0) ida_simple_remove(&bl_ida, bl_connector.id); return PTR_ERR(bd); } @@ -249,7 +249,7 @@ nv50_backlight_init(struct drm_connector *connector) nv_encoder, ops, &props); if (IS_ERR(bd)) { - if (bl_connector.id > 0) + if (bl_connector.id >= 0) ida_simple_remove(&bl_ida, bl_connector.id); return PTR_ERR(bd); } From b1fd8f59962239c7317933a0522dac8ef8c121a1 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Fri, 9 Nov 2018 11:00:12 +0200 Subject: [PATCH 0508/2608] drm/dp_mst: Check if primary mstb is null commit 23d8003907d094f77cf959228e2248d6db819fa7 upstream. Unfortunately drm_dp_get_mst_branch_device which is called from both drm_dp_mst_handle_down_rep and drm_dp_mst_handle_up_rep seem to rely on that mgr->mst_primary is not NULL, which seem to be wrong as it can be cleared with simultaneous mode set, if probing fails or in other case. mgr->lock mutex doesn't protect against that as it might just get assigned to NULL right before, not simultaneously. There are currently bugs 107738, 108616 bugs which crash in drm_dp_get_mst_branch_device, caused by this issue. v2: Refactored the code, as it was nicely noticed. Fixed Bugzilla bug numbers(second was 108616, but not 108816) and added links. [changed title and added stable cc] Signed-off-by: Lyude Paul Signed-off-by: Stanislav Lisovskiy Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108616 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107738 Link: https://patchwork.freedesktop.org/patch/msgid/20181109090012.24438-1-stanislav.lisovskiy@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index c022ab6e84bd..2a4cf6837324 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1232,6 +1232,9 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device(struct drm_dp_mst_ mutex_lock(&mgr->lock); mstb = mgr->mst_primary; + if (!mstb) + goto out; + for (i = 0; i < lct - 1; i++) { int shift = (i % 2) ? 0 : 4; int port_num = (rad[i / 2] >> shift) & 0xf; From b24df93f18880cde76a69532a23d1cb1f568b611 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 3 Oct 2018 17:49:51 +0300 Subject: [PATCH 0509/2608] drm/i915: Restore vblank interrupts earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7cada4d0b7a0fb813dbc9777fec092e9ed0546e9 upstream. Plane sanitation needs vblank interrupts (on account of CxSR disable). So let's restore vblank interrupts earlier. v2: Make it actually build v3: Add comment to explain why we need this (Daniel) Cc: stable@vger.kernel.org Cc: Dennis Tested-by: Dennis Tested-by: Peter Nowee Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105637 Fixes: b1e01595a66d ("drm/i915: Redo plane sanitation during readout") Signed-off-by: Ville Syrjälä Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20181003144951.4397-1-ville.syrjala@linux.intel.com (cherry picked from commit 68bc30deac625b8be8d3950b30dc93d09a3645f5) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2006ab44fbf9..944cb3c2ba5c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14829,13 +14829,9 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, I915_READ(reg) & ~PIPECONF_FRAME_START_DELAY_MASK); } - /* restore vblank interrupts to correct state */ - drm_crtc_vblank_reset(&crtc->base); if (crtc->active) { struct intel_plane *plane; - drm_crtc_vblank_on(&crtc->base); - /* Disable everything but the primary plane */ for_each_intel_plane_on_crtc(dev, crtc, plane) { const struct intel_plane_state *plane_state = @@ -15148,7 +15144,6 @@ intel_modeset_setup_hw_state(struct drm_device *dev, struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(dev); - enum pipe pipe; struct intel_crtc *crtc; struct intel_encoder *encoder; int i; @@ -15167,15 +15162,23 @@ intel_modeset_setup_hw_state(struct drm_device *dev, /* HW state is read out, now we need to sanitize this mess. */ get_encoder_power_domains(dev_priv); - intel_sanitize_plane_mapping(dev_priv); + /* + * intel_sanitize_plane_mapping() may need to do vblank + * waits, so we need vblank interrupts restored beforehand. + */ + for_each_intel_crtc(&dev_priv->drm, crtc) { + drm_crtc_vblank_reset(&crtc->base); - for_each_intel_encoder(dev, encoder) { - intel_sanitize_encoder(encoder); + if (crtc->active) + drm_crtc_vblank_on(&crtc->base); } - for_each_pipe(dev_priv, pipe) { - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + intel_sanitize_plane_mapping(dev_priv); + for_each_intel_encoder(dev, encoder) + intel_sanitize_encoder(encoder); + + for_each_intel_crtc(&dev_priv->drm, crtc) { intel_sanitize_crtc(crtc, ctx); intel_dump_pipe_config(crtc, crtc->config, "[setup_hw_state]"); From 9ab995dba88232317946124055eb89c915bd5f33 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 8 Oct 2018 19:24:32 -0400 Subject: [PATCH 0510/2608] drm/i915: Don't unset intel_connector->mst_port commit 80c188695a77eddaa6e8885510ff4ef59fd478c3 upstream. Currently we set intel_connector->mst_port to NULL to signify that the MST port has been removed from the system so that we can prevent further action on the port such as connector probes, mode probing, etc. However, we're going to need access to intel_connector->mst_port in order to fixup ->best_encoder() so that it can always return the correct encoder for an MST port to prevent legacy DPMS prop changes from failing. This should be safe, so instead keep intel_connector->mst_port always set and instead just check the status of drm_connector->regustered to signify whether or not the connector has disappeared from the system. Changes since v2: - Add a comment to mst_port_gone (Jani Nikula) - Change mst_port_gone to a u8 instead of a bool, per the kernel bot. Apparently bool is discouraged in structs these days Changes since v4: - Don't use mst_port_gone at all! Just check if the connector is registered or not - Daniel Vetter Signed-off-by: Lyude Paul Reviewed-by: Daniel Vetter Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20181008232437.5571-4-lyude@redhat.com (cherry picked from commit 6ed5bb1fbad34382c8cfe9a9bf737e9a43053df5) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dp_mst.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 93fc8ab9bb31..8d73edc033fe 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -311,9 +311,8 @@ static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector) struct edid *edid; int ret; - if (!intel_dp) { + if (!READ_ONCE(connector->registered)) return intel_connector_update_modes(connector, NULL); - } edid = drm_dp_mst_get_edid(connector, &intel_dp->mst_mgr, intel_connector->port); ret = intel_connector_update_modes(connector, edid); @@ -328,9 +327,10 @@ intel_dp_mst_detect(struct drm_connector *connector, bool force) struct intel_connector *intel_connector = to_intel_connector(connector); struct intel_dp *intel_dp = intel_connector->mst_port; - if (!intel_dp) + if (!READ_ONCE(connector->registered)) return connector_status_disconnected; - return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr, intel_connector->port); + return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr, + intel_connector->port); } static void @@ -370,7 +370,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector, int bpp = 24; /* MST uses fixed bpp */ int max_rate, mode_rate, max_lanes, max_link_clock; - if (!intel_dp) + if (!READ_ONCE(connector->registered)) return MODE_ERROR; max_link_clock = intel_dp_max_link_rate(intel_dp); @@ -399,7 +399,7 @@ static struct drm_encoder *intel_mst_atomic_best_encoder(struct drm_connector *c struct intel_dp *intel_dp = intel_connector->mst_port; struct intel_crtc *crtc = to_intel_crtc(state->crtc); - if (!intel_dp) + if (!READ_ONCE(connector->registered)) return NULL; return &intel_dp->mst_encoders[crtc->pipe]->base.base; } @@ -491,7 +491,6 @@ static void intel_dp_register_mst_connector(struct drm_connector *connector) static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, struct drm_connector *connector) { - struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_i915_private *dev_priv = to_i915(connector->dev); drm_connector_unregister(connector); @@ -499,10 +498,6 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, if (dev_priv->fbdev) drm_fb_helper_remove_one_connector(&dev_priv->fbdev->helper, connector); - /* prevent race with the check in ->detect */ - drm_modeset_lock(&connector->dev->mode_config.connection_mutex, NULL); - intel_connector->mst_port = NULL; - drm_modeset_unlock(&connector->dev->mode_config.connection_mutex); drm_connector_unreference(connector); DRM_DEBUG_KMS("\n"); From 99e43c5415276df371aaab9765876cf4f71cdee4 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 8 Oct 2018 19:24:33 -0400 Subject: [PATCH 0511/2608] drm/i915: Skip vcpi allocation for MSTB ports that are gone commit c02ba4ef16eefe663fdefcccaa57fad32d5481bf upstream. Since we need to be able to allow DPMS on->off prop changes after an MST port has disappeared from the system, we need to be able to make sure we can compute a config for the resulting atomic commit. Currently this is impossible when the port has disappeared, since the VCPI slot searching we try to do in intel_dp_mst_compute_config() will fail with -EINVAL. Since the only commits we want to allow on no-longer-present MST ports are ones that shut off display hardware, we already know that no VCPI allocations are needed. So, hardcode the VCPI slot count to 0 when intel_dp_mst_compute_config() is called on an MST port that's gone. Changes since V4: - Don't use mst_port_gone at all, just check whether or not the drm connector is registered - Daniel Vetter Signed-off-by: Lyude Paul Reviewed-by: Daniel Vetter Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20181008232437.5571-5-lyude@redhat.com (cherry picked from commit f67207d78ceaf98b7531bc22df6f21328559c8d4) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dp_mst.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 8d73edc033fe..b83a8f2b2a3a 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -37,11 +37,11 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; - struct intel_connector *connector = - to_intel_connector(conn_state->connector); + struct drm_connector *connector = conn_state->connector; + void *port = to_intel_connector(connector)->port; struct drm_atomic_state *state = pipe_config->base.state; int bpp; - int lane_count, slots; + int lane_count, slots = 0; const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; int mst_pbn; bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc, @@ -66,17 +66,23 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->port_clock = intel_dp_max_link_rate(intel_dp); - if (drm_dp_mst_port_has_audio(&intel_dp->mst_mgr, connector->port)) + if (drm_dp_mst_port_has_audio(&intel_dp->mst_mgr, port)) pipe_config->has_audio = true; mst_pbn = drm_dp_calc_pbn_mode(adjusted_mode->crtc_clock, bpp); pipe_config->pbn = mst_pbn; - slots = drm_dp_atomic_find_vcpi_slots(state, &intel_dp->mst_mgr, - connector->port, mst_pbn); - if (slots < 0) { - DRM_DEBUG_KMS("failed finding vcpi slots:%d\n", slots); - return false; + /* Zombie connectors can't have VCPI slots */ + if (READ_ONCE(connector->registered)) { + slots = drm_dp_atomic_find_vcpi_slots(state, + &intel_dp->mst_mgr, + port, + mst_pbn); + if (slots < 0) { + DRM_DEBUG_KMS("failed finding vcpi slots:%d\n", + slots); + return false; + } } intel_link_compute_m_n(bpp, lane_count, From 943f7d5be05c44999145c053b53ce330217d3992 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 12 Oct 2018 15:02:28 +0100 Subject: [PATCH 0512/2608] drm/i915: Large page offsets for pread/pwrite commit ab0d6a141843e0b4b2709dfd37b53468b5452c3a upstream. Handle integer overflow when computing the sub-page length for shmem backed pread/pwrite. Reported-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20181012140228.29783-1-chris@chris-wilson.co.uk (cherry picked from commit a5e856a5348f6cd50889d125c40bbeec7328e466) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f354cfe63f7b..1f19e6d9a717 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -980,11 +980,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj, offset = offset_in_page(args->offset); for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { struct page *page = i915_gem_object_get_page(obj, idx); - int length; - - length = remain; - if (offset + length > PAGE_SIZE) - length = PAGE_SIZE - offset; + unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); ret = shmem_pread(page, offset, length, user_data, page_to_phys(page) & obj_do_bit17_swizzling, @@ -1406,11 +1402,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, offset = offset_in_page(args->offset); for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { struct page *page = i915_gem_object_get_page(obj, idx); - int length; - - length = remain; - if (offset + length > PAGE_SIZE) - length = PAGE_SIZE - offset; + unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); ret = shmem_pwrite(page, offset, length, user_data, page_to_phys(page) & obj_do_bit17_swizzling, From dfd13ef64df70fe49c7839b1ed3ee460759364de Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Thu, 25 Oct 2018 11:52:00 -0700 Subject: [PATCH 0513/2608] drm/i915/hdmi: Add HDMI 2.0 audio clock recovery N values commit 6503493145cba4413ecd3d4d153faeef4a1e9b85 upstream. HDMI 2.0 594Mhz modes were incorrectly selecting 25.200Mhz Automatic N value mode instead of HDMI specification values. V2: Fix 88.2 Hz N value Cc: Jani Nikula Cc: stable@vger.kernel.org Signed-off-by: Clint Taylor Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1540493521-1746-2-git-send-email-clinton.a.taylor@intel.com (cherry picked from commit 5a400aa3c562c4a726b4da286e63c96db905ade1) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_audio.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 9240fa79de7c..bc9f735395bc 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -134,6 +134,9 @@ static const struct { /* HDMI N/CTS table */ #define TMDS_297M 297000 #define TMDS_296M 296703 +#define TMDS_594M 594000 +#define TMDS_593M 593407 + static const struct { int sample_rate; int clock; @@ -154,6 +157,20 @@ static const struct { { 176400, TMDS_297M, 18816, 247500 }, { 192000, TMDS_296M, 23296, 281250 }, { 192000, TMDS_297M, 20480, 247500 }, + { 44100, TMDS_593M, 8918, 937500 }, + { 44100, TMDS_594M, 9408, 990000 }, + { 48000, TMDS_593M, 5824, 562500 }, + { 48000, TMDS_594M, 6144, 594000 }, + { 32000, TMDS_593M, 5824, 843750 }, + { 32000, TMDS_594M, 3072, 445500 }, + { 88200, TMDS_593M, 17836, 937500 }, + { 88200, TMDS_594M, 18816, 990000 }, + { 96000, TMDS_593M, 11648, 562500 }, + { 96000, TMDS_594M, 12288, 594000 }, + { 176400, TMDS_593M, 35672, 937500 }, + { 176400, TMDS_594M, 37632, 990000 }, + { 192000, TMDS_593M, 23296, 562500 }, + { 192000, TMDS_594M, 24576, 594000 }, }; /* get AUD_CONFIG_PIXEL_CLOCK_HDMI_* value for mode */ From 46c397cd043fce2a2248c9025211eb9c5a16f845 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 5 Nov 2018 21:46:04 +0200 Subject: [PATCH 0514/2608] drm/i915: Don't oops during modeset shutdown after lpe audio deinit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6a8915d0f8cf323e1beb792a33095cf652db4056 upstream. We deinit the lpe audio device before we call drm_atomic_helper_shutdown(), which means the platform device may already be gone when it comes time to shut down the crtc. As we don't know when the last reference to the platform device gets dropped by the audio driver we can't assume that the device and its data are still around when turning off the crtc. Mark the platform device as gone as soon as we do the audio deinit. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181105194604.6994-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit f45a7977d1140c11f334e01a9f77177ed68e3bfa) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lpe_audio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 8a541d0e3e80..30280323e1d8 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -303,8 +303,10 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv) lpe_audio_platdev_destroy(dev_priv); irq_free_desc(dev_priv->lpe_audio.irq); -} + dev_priv->lpe_audio.irq = -1; + dev_priv->lpe_audio.platdev = NULL; +} /** * intel_lpe_audio_notify() - notify lpe audio event From a5d74ffb65754b4fc751289a287f21b26838df40 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 2 Nov 2018 16:12:09 +0000 Subject: [PATCH 0515/2608] drm/i915: Mark pin flags as u64 commit 0014868b9c3c1dda1de6711cf58c3486fb422d07 upstream. Since the flags are being used to operate on a u64 variable, they too need to be marked as such so that the inverses are full width (and not zero extended on 32b kernels and bdw+). Reported-by: Sergii Romantsov Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org Reviewed-by: Lionel Landwerlin Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20181102161232.17742-2-chris@chris-wilson.co.uk (cherry picked from commit 83b466b1dc5f0b4d33f0a901e8b00197a8f3582d) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_gtt.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index b4e3aa7c0ce1..0dbbe840f5f0 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -584,20 +584,20 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, u64 start, u64 end, unsigned int flags); /* Flags used by pin/bind&friends. */ -#define PIN_NONBLOCK BIT(0) -#define PIN_MAPPABLE BIT(1) -#define PIN_ZONE_4G BIT(2) -#define PIN_NONFAULT BIT(3) -#define PIN_NOEVICT BIT(4) +#define PIN_NONBLOCK BIT_ULL(0) +#define PIN_MAPPABLE BIT_ULL(1) +#define PIN_ZONE_4G BIT_ULL(2) +#define PIN_NONFAULT BIT_ULL(3) +#define PIN_NOEVICT BIT_ULL(4) -#define PIN_MBZ BIT(5) /* I915_VMA_PIN_OVERFLOW */ -#define PIN_GLOBAL BIT(6) /* I915_VMA_GLOBAL_BIND */ -#define PIN_USER BIT(7) /* I915_VMA_LOCAL_BIND */ -#define PIN_UPDATE BIT(8) +#define PIN_MBZ BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */ +#define PIN_GLOBAL BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */ +#define PIN_USER BIT_ULL(7) /* I915_VMA_LOCAL_BIND */ +#define PIN_UPDATE BIT_ULL(8) -#define PIN_HIGH BIT(9) -#define PIN_OFFSET_BIAS BIT(10) -#define PIN_OFFSET_FIXED BIT(11) +#define PIN_HIGH BIT_ULL(9) +#define PIN_OFFSET_BIAS BIT_ULL(10) +#define PIN_OFFSET_FIXED BIT_ULL(11) #define PIN_OFFSET_MASK (-I915_GTT_PAGE_SIZE) #endif From 6576364f6496f5a95f8384d57fcb2b5377765a44 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 8 Nov 2018 08:17:38 +0000 Subject: [PATCH 0516/2608] drm/i915/execlists: Force write serialisation into context image vs execution commit 0a823e8fd4fd67726697854578f3584ee3a49b1d upstream. Ensure that the writes into the context image are completed prior to the register mmio to trigger execution. Although previously we were assured by the SDM that all writes are flushed before an uncached memory transaction (our mmio write to submit the context to HW for execution), we have empirical evidence to believe that this is not actually the case. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108656 References: https://bugs.freedesktop.org/show_bug.cgi?id=108315 References: https://bugs.freedesktop.org/show_bug.cgi?id=106887 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20181108081740.25615-1-chris@chris-wilson.co.uk Cc: stable@vger.kernel.org (cherry picked from commit 987abd5c62f92ee4970b45aa077f47949974e615) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lrc.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d638b641b760..0775e71ea95b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -325,7 +325,8 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail); - /* True 32b PPGTT with dynamic page allocation: update PDP + /* + * True 32b PPGTT with dynamic page allocation: update PDP * registers and point the unallocated PDPs to scratch page. * PML4 is allocated during ppgtt init, so this is not needed * in 48-bit mode. @@ -333,6 +334,17 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) if (ppgtt && !i915_vm_is_48bit(&ppgtt->base)) execlists_update_context_pdps(ppgtt, reg_state); + /* + * Make sure the context image is complete before we submit it to HW. + * + * Ostensibly, writes (including the WCB) should be flushed prior to + * an uncached write such as our mmio register access, the empirical + * evidence (esp. on Braswell) suggests that the WC write into memory + * may not be visible to the HW prior to the completion of the UC + * register write and that we may begin execution from the context + * before its image is complete leading to invalid PD chasing. + */ + wmb(); return ce->lrc_desc; } From 9891f6816000e7addacb6f88986cc2d4f9004be0 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 31 Oct 2018 16:11:49 -0700 Subject: [PATCH 0517/2608] CONFIG_XEN_PV breaks xen_create_contiguous_region on ARM commit f9005571701920551bcf54a500973fb61f2e1eda upstream. xen_create_contiguous_region has now only an implementation if CONFIG_XEN_PV is defined. However, on ARM we never set CONFIG_XEN_PV but we do have an implementation of xen_create_contiguous_region which is required for swiotlb-xen to work correctly (although it just sets *dma_handle). [backport: remove change to xen_remap_pfn] Cc: # 4.12 Fixes: 16624390816c ("xen: create xen_create/destroy_contiguous_region() stubs for PVHVM only builds") Signed-off-by: Stefano Stabellini Reviewed-by: Juergen Gross CC: Jeff.Kubascik@dornerworks.com CC: Jarvis.Roach@dornerworks.com CC: Nathan.Studer@dornerworks.com CC: vkuznets@redhat.com CC: boris.ostrovsky@oracle.com CC: jgross@suse.com CC: julien.grall@arm.com Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- include/xen/xen-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index a95e65ec83c3..8d3786f290d9 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -40,7 +40,7 @@ int xen_setup_shutdown_event(void); extern unsigned long *xen_contiguous_bitmap; -#ifdef CONFIG_XEN_PV +#if defined(CONFIG_XEN_PV) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, dma_addr_t *dma_handle); From cd09665ab60b6dcfd416999202f0e9f5e6157797 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 31 Oct 2018 12:15:23 +0100 Subject: [PATCH 0518/2608] ovl: check whiteout in ovl_create_over_whiteout() commit 5e1275808630ea3b2c97c776f40e475017535f72 upstream. Kaixuxia repors that it's possible to crash overlayfs by removing the whiteout on the upper layer before creating a directory over it. This is a reproducer: mkdir lower upper work merge touch lower/file mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merge rm merge/file ls -al merge/file rm upper/file ls -al merge/ mkdir merge/file Before commencing with a vfs_rename(..., RENAME_EXCHANGE) verify that the lookup of "upper" is positive and is a whiteout, and return ESTALE otherwise. Reported by: kaixuxia Signed-off-by: Miklos Szeredi Fixes: e9be9d5e76e3 ("overlay filesystem") Cc: # v3.18 Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/dir.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 0568898393f2..ef11fa7b869e 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -392,6 +392,10 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (IS_ERR(upper)) goto out_dput; + err = -ESTALE; + if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper))) + goto out_dput2; + err = ovl_create_real(wdir, newdentry, cattr, hardlink, true); if (err) goto out_dput2; From c9b8d580b3fb0ab65d37c372aef19a318fda3199 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 16 Jan 2018 13:47:16 +0900 Subject: [PATCH 0519/2608] printk: Never set console_may_schedule in console_trylock() commit fd5f7cde1b85d4c8e09ca46ce948e008a2377f64 upstream. This patch, basically, reverts commit 6b97a20d3a79 ("printk: set may_schedule for some of console_trylock() callers"). That commit was a mistake, it introduced a big dependency on the scheduler, by enabling preemption under console_sem in printk()->console_unlock() path, which is rather too critical. The patch did not significantly reduce the possibilities of printk() lockups, but made it possible to stall printk(), as has been reported by Tetsuo Handa [1]. Another issues is that preemption under console_sem also messes up with Steven Rostedt's hand off scheme, by making it possible to sleep with console_sem both in console_unlock() and in vprintk_emit(), after acquiring the console_sem ownership (anywhere between printk_safe_exit_irqrestore() in console_trylock_spinning() and printk_safe_enter_irqsave() in console_unlock()). This makes hand off less likely and, at the same time, may result in a significant amount of pending logbuf messages. Preempted console_sem owner makes it impossible for other CPUs to emit logbuf messages, but does not make it impossible for other CPUs to append new messages to the logbuf. Reinstate the old behavior and make printk() non-preemptible. Should any printk() lockup reports arrive they must be handled in a different way. [1] http://lkml.kernel.org/r/201603022101.CAH73907.OVOOMFHFFtQJSL%20()%20I-love%20!%20SAKURA%20!%20ne%20!%20jp Fixes: 6b97a20d3a79 ("printk: set may_schedule for some of console_trylock() callers") Link: http://lkml.kernel.org/r/20180116044716.GE6607@jagdpanzerIV To: Tetsuo Handa Cc: Sergey Senozhatsky Cc: Tejun Heo Cc: akpm@linux-foundation.org Cc: linux-mm@kvack.org Cc: Cong Wang Cc: Dave Hansen Cc: Johannes Weiner Cc: Mel Gorman Cc: Michal Hocko Cc: Vlastimil Babka Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jan Kara Cc: Mathieu Desnoyers Cc: Byungchul Park Cc: Pavel Machek Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Reported-by: Tetsuo Handa Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Petr Mladek Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- kernel/printk/printk.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 7161312593dd..a9cf2e15f6a3 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1762,6 +1762,12 @@ asmlinkage int vprintk_emit(int facility, int level, /* If called from the scheduler, we can not call up(). */ if (!in_sched) { + /* + * Disable preemption to avoid being preempted while holding + * console_sem which would prevent anyone from printing to + * console + */ + preempt_disable(); /* * Try to acquire and then immediately release the console * semaphore. The release will print out buffers and wake up @@ -1769,6 +1775,7 @@ asmlinkage int vprintk_emit(int facility, int level, */ if (console_trylock()) console_unlock(); + preempt_enable(); } return printed_len; @@ -2083,20 +2090,7 @@ int console_trylock(void) return 0; } console_locked = 1; - /* - * When PREEMPT_COUNT disabled we can't reliably detect if it's - * safe to schedule (e.g. calling printk while holding a spin_lock), - * because preempt_disable()/preempt_enable() are just barriers there - * and preempt_count() is always 0. - * - * RCU read sections have a separate preemption counter when - * PREEMPT_RCU enabled thus we must take extra care and check - * rcu_preempt_depth(), otherwise RCU read sections modify - * preempt_count(). - */ - console_may_schedule = !oops_in_progress && - preemptible() && - !rcu_preempt_depth(); + console_may_schedule = 0; return 1; } EXPORT_SYMBOL(console_trylock); From a8ba76fc043e1a34e98055f6e818c67ddc3c05c2 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 12 Apr 2018 09:16:04 -0600 Subject: [PATCH 0520/2608] nvme-loop: fix kernel oops in case of unhandled command commit 11d9ea6f2ca69237d35d6c55755beba3e006b106 upstream. When nvmet_req_init() fails, __nvmet_req_complete() is called to handle the target request via .queue_response(), so nvme_loop_queue_response() shouldn't be called again for handling the failure. This patch fixes this case by the following way: - move blk_mq_start_request() before nvmet_req_init(), so nvme_loop_queue_response() may work well to complete this host request - don't call nvme_cleanup_cmd() which is done in nvme_loop_complete_rq() - don't call nvme_loop_queue_response() which is done via .queue_response() Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig [trimmed changelog] Signed-off-by: Keith Busch Signed-off-by: Jens Axboe Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/loop.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 02aff5cc48bf..3388d2788fe0 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -183,15 +183,12 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, if (ret) return ret; + blk_mq_start_request(req); iod->cmd.common.flags |= NVME_CMD_SGL_METABUF; iod->req.port = nvmet_loop_port; if (!nvmet_req_init(&iod->req, &queue->nvme_cq, - &queue->nvme_sq, &nvme_loop_ops)) { - nvme_cleanup_cmd(req); - blk_mq_start_request(req); - nvme_loop_queue_response(&iod->req); + &queue->nvme_sq, &nvme_loop_ops)) return BLK_STS_OK; - } if (blk_rq_bytes(req)) { iod->sg_table.sgl = iod->first_sgl; @@ -204,8 +201,6 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl); } - blk_mq_start_request(req); - schedule_work(&iod->work); return BLK_STS_OK; } From dabaf70e3ad6ec7d327bfa98916163985a103e4a Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Tue, 24 Oct 2017 12:54:47 -0700 Subject: [PATCH 0521/2608] gpio: brcmstb: release the bgpio lock during irq handlers commit 142c168e0e50164e67c9399c28dedd65a307cfe5 upstream. The basic memory-mapped GPIO controller lock must be released before calling the registered GPIO interrupt handlers to allow the interrupt handlers to access the hardware. Examples of why a GPIO interrupt handler might want to access the GPIO hardware include an interrupt that is configured to trigger on rising and falling edges that needs to read the current level of the input to know how to respond, or an interrupt that causes a change in a GPIO output in the same bank. If the lock is not released before enterring the handler the hardware accesses will deadlock when they attempt to grab the lock. Since the lock is only needed to protect the calculation of unmasked pending interrupts create a dedicated function to perform this and hide the complexity. Fixes: 19a7b6940b78 ("gpio: brcmstb: Add interrupt and wakeup source support") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Acked-by: Gregory Fong Signed-off-by: Linus Walleij Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-brcmstb.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c index dd0308cc8bb0..bfc9b1afa388 100644 --- a/drivers/gpio/gpio-brcmstb.c +++ b/drivers/gpio/gpio-brcmstb.c @@ -63,6 +63,21 @@ brcmstb_gpio_gc_to_priv(struct gpio_chip *gc) return bank->parent_priv; } +static unsigned long +brcmstb_gpio_get_active_irqs(struct brcmstb_gpio_bank *bank) +{ + void __iomem *reg_base = bank->parent_priv->reg_base; + unsigned long status; + unsigned long flags; + + spin_lock_irqsave(&bank->gc.bgpio_lock, flags); + status = bank->gc.read_reg(reg_base + GIO_STAT(bank->id)) & + bank->gc.read_reg(reg_base + GIO_MASK(bank->id)); + spin_unlock_irqrestore(&bank->gc.bgpio_lock, flags); + + return status; +} + static void brcmstb_gpio_set_imask(struct brcmstb_gpio_bank *bank, unsigned int offset, bool enable) { @@ -205,11 +220,8 @@ static void brcmstb_gpio_irq_bank_handler(struct brcmstb_gpio_bank *bank) struct irq_domain *irq_domain = bank->gc.irqdomain; void __iomem *reg_base = priv->reg_base; unsigned long status; - unsigned long flags; - spin_lock_irqsave(&bank->gc.bgpio_lock, flags); - while ((status = bank->gc.read_reg(reg_base + GIO_STAT(bank->id)) & - bank->gc.read_reg(reg_base + GIO_MASK(bank->id)))) { + while ((status = brcmstb_gpio_get_active_irqs(bank))) { int bit; for_each_set_bit(bit, &status, 32) { @@ -224,7 +236,6 @@ static void brcmstb_gpio_irq_bank_handler(struct brcmstb_gpio_bank *bank) generic_handle_irq(irq_find_mapping(irq_domain, bit)); } } - spin_unlock_irqrestore(&bank->gc.bgpio_lock, flags); } /* Each UPG GIO block has one IRQ for all banks */ From a21f3c11b41beb0b3b7d5de28d3d5a0692c5b22b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 21 Nov 2018 09:24:18 +0100 Subject: [PATCH 0522/2608] Linux 4.14.82 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2fe1424d61d2..cac5323bc95d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 81 +SUBLEVEL = 82 EXTRAVERSION = NAME = Petit Gorille From ceb85d878ddce8f28394f91b2fcb26b1f8e8006e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EB=B0=B0=EC=84=9D=EC=A7=84?= Date: Fri, 9 Nov 2018 16:53:06 -0800 Subject: [PATCH 0523/2608] flow_dissector: do not dissect l4 ports for fragments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 62230715fd2453b3ba948c9d83cfb3ada9169169 ] Only first fragment has the sport/dport information, not the following ones. If we want consistent hash for all fragments, we need to ignore ports even for first fragment. This bug is visible for IPv6 traffic, if incoming fragments do not have a flow label, since skb_get_hash() will give different results for first fragment and following ones. It is also visible if any routing rule wants dissection and sport or dport. See commit 5e5d6fed3741 ("ipv6: route: dissect flow in input path if fib rules need it") for details. [edumazet] rewrote the changelog completely. Fixes: 06635a35d13d ("flow_dissect: use programable dissector in skb_flow_dissect and friends") Signed-off-by: 배석진 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/flow_dissector.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d8796a7874b6..e2e716003ede 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -838,8 +838,8 @@ ip_proto_again: break; } - if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS)) { + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) && + !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) { key_ports = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS, target_container); From d6e976571c9f844872ba357408c7cec868bbde4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Wed, 7 Nov 2018 17:50:52 +0100 Subject: [PATCH 0524/2608] ibmvnic: fix accelerated VLAN handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e84b47941e15e6666afb8ee8b21d1c3fc1a013af ] Don't request tag insertion when it isn't present in outgoing skb. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 046af22a37cb..5c7134ccc1fd 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1259,7 +1259,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.sge_len = cpu_to_be32(skb->len); tx_crq.v1.ioba = cpu_to_be64(data_dma_addr); - if (adapter->vlan_header_insertion) { + if (adapter->vlan_header_insertion && skb_vlan_tag_present(skb)) { tx_crq.v1.flags2 |= IBMVNIC_TX_VLAN_INSERT; tx_crq.v1.vlan_id = cpu_to_be16(skb->vlan_tci); } From dbd5f23bd962198b298d4589565c5c5ada62e3b8 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 16 Nov 2018 16:58:19 +0100 Subject: [PATCH 0525/2608] ip_tunnel: don't force DF when MTU is locked [ Upstream commit 16f7eb2b77b55da816c4e207f3f9440a8cafc00a ] The various types of tunnels running over IPv4 can ask to set the DF bit to do PMTU discovery. However, PMTU discovery is subject to the threshold set by the net.ipv4.route.min_pmtu sysctl, and is also disabled on routes with "mtu lock". In those cases, we shouldn't set the DF bit. This patch makes setting the DF bit conditional on the route's MTU locking state. This issue seems to be older than git history. Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 2f39479be92f..423091727e15 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -80,7 +80,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, iph->version = 4; iph->ihl = sizeof(struct iphdr) >> 2; - iph->frag_off = df; + iph->frag_off = ip_mtu_locked(&rt->dst) ? 0 : df; iph->protocol = proto; iph->tos = tos; iph->daddr = dst; From 590f89a7ee6b73c928f886010fe2d7a08384874e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 18 Nov 2018 10:45:30 -0800 Subject: [PATCH 0526/2608] ipv6: Fix PMTU updates for UDP/raw sockets in presence of VRF [ Upstream commit 7ddacfa564870cdd97275fd87decb6174abc6380 ] Preethi reported that PMTU discovery for UDP/raw applications is not working in the presence of VRF when the socket is not bound to a device. The problem is that ip6_sk_update_pmtu does not consider the L3 domain of the skb device if the socket is not bound. Update the function to set oif to the L3 master device if relevant. Fixes: ca254490c8df ("net: Add VRF support to IPv6 stack") Reported-by: Preethi Ramachandra Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 30204bc2fc48..74dd35d6567c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1547,10 +1547,13 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) { + int oif = sk->sk_bound_dev_if; struct dst_entry *dst; - ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid); + if (!oif && skb->dev) + oif = l3mdev_master_ifindex(skb->dev); + + ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid); dst = __sk_dst_get(sk); if (!dst || !dst->obsolete || From 8b9c0eb1ff03e9a026a9f3ca2996c03fc33af182 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 17 Nov 2018 21:57:02 -0800 Subject: [PATCH 0527/2608] net-gro: reset skb->pkt_type in napi_reuse_skb() [ Upstream commit 33d9a2c72f086cbf1087b2fd2d1a15aa9df14a7f ] eth_type_trans() assumes initial value for skb->pkt_type is PACKET_HOST. This is indeed the value right after a fresh skb allocation. However, it is possible that GRO merged a packet with a different value (like PACKET_OTHERHOST in case macvlan is used), so we need to make sure napi->skb will have pkt_type set back to PACKET_HOST. Otherwise, valid packets might be dropped by the stack because their pkt_type is not PACKET_HOST. napi_reuse_skb() was added in commit 96e93eab2033 ("gro: Add internal interfaces for VLAN"), but this bug always has been there. Fixes: 96e93eab2033 ("gro: Add internal interfaces for VLAN") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index e8a66ad6d07c..4337450a5fdb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4993,6 +4993,10 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->vlan_tci = 0; skb->dev = napi->dev; skb->skb_iif = 0; + + /* eth_type_trans() assumes pkt_type is PACKET_HOST */ + skb->pkt_type = PACKET_HOST; + skb->encapsulation = 0; skb_shinfo(skb)->gso_type = 0; skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); From c7051eb3acbe0908f29fa56b464a6c79c067f93f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 18 Nov 2018 15:21:53 +0800 Subject: [PATCH 0528/2608] sctp: not allow to set asoc prsctp_enable by sockopt [ Upstream commit cc3ccf26f0649089b3a34a2781977755ea36e72c ] As rfc7496#section4.5 says about SCTP_PR_SUPPORTED: This socket option allows the enabling or disabling of the negotiation of PR-SCTP support for future associations. For existing associations, it allows one to query whether or not PR-SCTP support was negotiated on a particular association. It means only sctp sock's prsctp_enable can be set. Note that for the limitation of SCTP_{CURRENT|ALL}_ASSOC, we will add it when introducing SCTP_{FUTURE|CURRENT|ALL}_ASSOC for linux sctp in another patchset. v1->v2: - drop the params.assoc_id check as Neil suggested. Fixes: 28aa4c26fce2 ("sctp: add SCTP_PR_SUPPORTED on sctp sockopt") Reported-by: Ying Xu Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d87d56978b4c..6a2532370545 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3750,32 +3750,16 @@ static int sctp_setsockopt_pr_supported(struct sock *sk, unsigned int optlen) { struct sctp_assoc_value params; - struct sctp_association *asoc; - int retval = -EINVAL; if (optlen != sizeof(params)) - goto out; + return -EINVAL; - if (copy_from_user(¶ms, optval, optlen)) { - retval = -EFAULT; - goto out; - } + if (copy_from_user(¶ms, optval, optlen)) + return -EFAULT; - asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - asoc->prsctp_enable = !!params.assoc_value; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); + sctp_sk(sk)->ep->prsctp_enable = !!params.assoc_value; - sp->ep->prsctp_enable = !!params.assoc_value; - } else { - goto out; - } - - retval = 0; - -out: - return retval; + return 0; } static int sctp_setsockopt_default_prinfo(struct sock *sk, From 8e201ff7140a23ea154cad8d348d1e6e79cf0bc7 Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam Date: Tue, 20 Nov 2018 10:04:04 +0530 Subject: [PATCH 0529/2608] tg3: Add PHY reset for 5717/5719/5720 in change ring and flow control paths [ Upstream commit 59663e42199c93d1d7314d1446f6782fc4b1eb81 ] This patch has the fix to avoid PHY lockup with 5717/5719/5720 in change ring and flow control paths. This patch solves the RX hang while doing continuous ring or flow control parameters with heavy traffic from peer. Signed-off-by: Siva Reddy Kallam Acked-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 1b1d2a67f412..bc0221eafe5c 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12395,6 +12395,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e { struct tg3 *tp = netdev_priv(dev); int i, irq_sync = 0, err = 0; + bool reset_phy = false; if ((ering->rx_pending > tp->rx_std_ring_mask) || (ering->rx_jumbo_pending > tp->rx_jmb_ring_mask) || @@ -12426,7 +12427,13 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e if (netif_running(dev)) { tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); - err = tg3_restart_hw(tp, false); + /* Reset PHY to avoid PHY lock up */ + if (tg3_asic_rev(tp) == ASIC_REV_5717 || + tg3_asic_rev(tp) == ASIC_REV_5719 || + tg3_asic_rev(tp) == ASIC_REV_5720) + reset_phy = true; + + err = tg3_restart_hw(tp, reset_phy); if (!err) tg3_netif_start(tp); } @@ -12460,6 +12467,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam { struct tg3 *tp = netdev_priv(dev); int err = 0; + bool reset_phy = false; if (tp->link_config.autoneg == AUTONEG_ENABLE) tg3_warn_mgmt_link_flap(tp); @@ -12550,7 +12558,13 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam if (netif_running(dev)) { tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); - err = tg3_restart_hw(tp, false); + /* Reset PHY to avoid PHY lock up */ + if (tg3_asic_rev(tp) == ASIC_REV_5717 || + tg3_asic_rev(tp) == ASIC_REV_5719 || + tg3_asic_rev(tp) == ASIC_REV_5720) + reset_phy = true; + + err = tg3_restart_hw(tp, reset_phy); if (!err) tg3_netif_start(tp); } From 453fcc4b536a6427829e0a89a41d605a2f29bc0e Mon Sep 17 00:00:00 2001 From: Matthew Cover Date: Sun, 18 Nov 2018 00:46:00 -0700 Subject: [PATCH 0530/2608] tuntap: fix multiqueue rx [ Upstream commit 8ebebcba559a1bfbaec7bbda64feb9870b9c58da ] When writing packets to a descriptor associated with a combined queue, the packets should end up on that queue. Before this change all packets written to any descriptor associated with a tap interface end up on rx-0, even when the descriptor is associated with a different queue. The rx traffic can be generated by either of the following. 1. a simple tap program which spins up multiple queues and writes packets to each of the file descriptors 2. tx from a qemu vm with a tap multiqueue netdev The queue for rx traffic can be observed by either of the following (done on the hypervisor in the qemu case). 1. a simple netmap program which opens and reads from per-queue descriptors 2. configuring RPS and doing per-cpu captures with rxtxcpu Alternatively, if you printk() the return value of skb_get_rx_queue() just before each instance of netif_receive_skb() in tun.c, you will get 65535 for every skb. Calling skb_record_rx_queue() to set the rx queue to the queue_index fixes the association between descriptor and rx queue. Signed-off-by: Matthew Cover Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 7f8c7e3aa356..0a008d136aae 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1214,6 +1214,7 @@ static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile, if (!rx_batched || (!more && skb_queue_empty(queue))) { local_bh_disable(); + skb_record_rx_queue(skb, tfile->queue_index); netif_receive_skb(skb); local_bh_enable(); return; @@ -1233,8 +1234,11 @@ static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile, struct sk_buff *nskb; local_bh_disable(); - while ((nskb = __skb_dequeue(&process_queue))) + while ((nskb = __skb_dequeue(&process_queue))) { + skb_record_rx_queue(nskb, tfile->queue_index); netif_receive_skb(nskb); + } + skb_record_rx_queue(skb, tfile->queue_index); netif_receive_skb(skb); local_bh_enable(); } From 136ff9cad44a5da95187e561f473a2969d658b7e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 1 Nov 2018 15:55:38 -0700 Subject: [PATCH 0531/2608] net: systemport: Protect stop from timeout [ Upstream commit 7cb6a2a2c72c1ed8f42fb01f1a661281b568dead ] A timing hazard exists when the network interface is stopped that allows a watchdog timeout to be processed by a separate core in parallel. This creates the potential for the timeout handler to wake the queues while the driver is shutting down, or access registers after their clocks have been removed. The more common case is that the watchdog timeout will produce a warning message which doesn't lead to a crash. The chances of this are greatly increased by the fact that bcm_sysport_netif_stop stops the transmit queues which can easily precipitate a watchdog time- out because of stale trans_start data in the queues. This commit corrects the behavior by ensuring that the watchdog timeout is disabled before enterring bcm_sysport_netif_stop. There are currently only two users of the bcm_sysport_netif_stop function: close and suspend. The close case already handles the issue by exiting the RUNNING state before invoking the driver close service. The suspend case now performs the netif_device_detach to exit the PRESENT state before the call to bcm_sysport_netif_stop rather than after it. These behaviors prevent any future scheduling of the driver timeout service during the window. The netif_tx_stop_all_queues function in bcm_sysport_netif_stop is replaced with netif_tx_disable to ensure synchronization with any transmit or timeout threads that may already be executing on other cores. For symmetry, the netif_device_attach call upon resume is moved to after the call to bcm_sysport_netif_start. Since it wakes the transmit queues it is not necessary to invoke netif_tx_start_all_queues from bcm_sysport_netif_start so it is moved into the driver open service. Fixes: 40755a0fce17 ("net: systemport: add suspend and resume support") Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bcmsysport.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 6e7f9a470ea1..45462557e51c 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1774,9 +1774,6 @@ static void bcm_sysport_netif_start(struct net_device *dev) intrl2_1_mask_clear(priv, 0xffffffff); else intrl2_0_mask_clear(priv, INTRL2_0_TDMA_MBDONE_MASK); - - /* Last call before we start the real business */ - netif_tx_start_all_queues(dev); } static void rbuf_init(struct bcm_sysport_priv *priv) @@ -1922,6 +1919,8 @@ static int bcm_sysport_open(struct net_device *dev) bcm_sysport_netif_start(dev); + netif_tx_start_all_queues(dev); + return 0; out_clear_rx_int: @@ -1945,7 +1944,7 @@ static void bcm_sysport_netif_stop(struct net_device *dev) struct bcm_sysport_priv *priv = netdev_priv(dev); /* stop all software from updating hardware */ - netif_tx_stop_all_queues(dev); + netif_tx_disable(dev); napi_disable(&priv->napi); phy_stop(dev->phydev); @@ -2267,12 +2266,12 @@ static int bcm_sysport_suspend(struct device *d) if (!netif_running(dev)) return 0; + netif_device_detach(dev); + bcm_sysport_netif_stop(dev); phy_suspend(dev->phydev); - netif_device_detach(dev); - /* Disable UniMAC RX */ umac_enable_set(priv, CMD_RX_EN, 0); @@ -2356,8 +2355,6 @@ static int bcm_sysport_resume(struct device *d) goto out_free_rx_ring; } - netif_device_attach(dev); - /* RX pipe enable */ topctrl_writel(priv, 0, RX_FLUSH_CNTL); @@ -2402,6 +2399,8 @@ static int bcm_sysport_resume(struct device *d) bcm_sysport_netif_start(dev); + netif_device_attach(dev); + return 0; out_free_rx_ring: From 0ecbb0cff18453398b13de638fec1b82ad53e14a Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Fri, 9 Nov 2018 18:56:27 -0700 Subject: [PATCH 0532/2608] net: qualcomm: rmnet: Fix incorrect assignment of real_dev [ Upstream commit d02854dc1999ed3e7fd79ec700c64ac23ac0c458 ] A null dereference was observed when a sysctl was being set from userspace and rmnet was stuck trying to complete some actions in the NETDEV_REGISTER callback. This is because the real_dev is set only after the device registration handler completes. sysctl call stack - <6> Unable to handle kernel NULL pointer dereference at virtual address 00000108 <2> pc : rmnet_vnd_get_iflink+0x1c/0x28 <2> lr : dev_get_iflink+0x2c/0x40 <2> rmnet_vnd_get_iflink+0x1c/0x28 <2> inet6_fill_ifinfo+0x15c/0x234 <2> inet6_ifinfo_notify+0x68/0xd4 <2> ndisc_ifinfo_sysctl_change+0x1b8/0x234 <2> proc_sys_call_handler+0xac/0x100 <2> proc_sys_write+0x3c/0x4c <2> __vfs_write+0x54/0x14c <2> vfs_write+0xcc/0x188 <2> SyS_write+0x60/0xc0 <2> el0_svc_naked+0x34/0x38 device register call stack - <2> notifier_call_chain+0x84/0xbc <2> raw_notifier_call_chain+0x38/0x48 <2> call_netdevice_notifiers_info+0x40/0x70 <2> call_netdevice_notifiers+0x38/0x60 <2> register_netdevice+0x29c/0x3d8 <2> rmnet_vnd_newlink+0x68/0xe8 <2> rmnet_newlink+0xa0/0x160 <2> rtnl_newlink+0x57c/0x6c8 <2> rtnetlink_rcv_msg+0x1dc/0x328 <2> netlink_rcv_skb+0xac/0x118 <2> rtnetlink_rcv+0x24/0x30 <2> netlink_unicast+0x158/0x1f0 <2> netlink_sendmsg+0x32c/0x338 <2> sock_sendmsg+0x44/0x60 <2> SyS_sendto+0x150/0x1ac <2> el0_svc_naked+0x34/0x38 Fixes: b752eff5be24 ("net: qualcomm: rmnet: Implement ndo_get_iflink") Signed-off-by: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 7f90d5587653..fda701419039 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -102,12 +102,14 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, struct rmnet_port *port, struct net_device *real_dev) { - struct rmnet_priv *priv; + struct rmnet_priv *priv = netdev_priv(rmnet_dev); int rc; if (port->rmnet_devices[id]) return -EINVAL; + priv->real_dev = real_dev; + rc = register_netdevice(rmnet_dev); if (!rc) { port->rmnet_devices[id] = rmnet_dev; @@ -115,9 +117,7 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, rmnet_dev->rtnl_link_ops = &rmnet_link_ops; - priv = netdev_priv(rmnet_dev); priv->mux_id = id; - priv->real_dev = real_dev; netdev_dbg(rmnet_dev, "rmnet dev created\n"); } From 3ae22cc948b00b12c18e2471ad32db366b113832 Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Fri, 2 Nov 2018 19:23:41 -0700 Subject: [PATCH 0533/2608] net: dsa: microchip: initialize mutex before use [ Upstream commit 284fb78ed7572117846f8e1d1d8e3dbfd16880c2 ] Initialize mutex before use. Avoid kernel complaint when CONFIG_DEBUG_LOCK_ALLOC is enabled. Fixes: b987e98e50ab90e5 ("dsa: add DSA switch driver for Microchip KSZ9477") Signed-off-by: Tristram Ha Reviewed-by: Pavel Machek Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/microchip/ksz_common.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 56cd6d365352..6f4c9913f8f5 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -1104,11 +1104,6 @@ static int ksz_switch_init(struct ksz_device *dev) { int i; - mutex_init(&dev->reg_mutex); - mutex_init(&dev->stats_mutex); - mutex_init(&dev->alu_mutex); - mutex_init(&dev->vlan_mutex); - dev->ds->ops = &ksz_switch_ops; for (i = 0; i < ARRAY_SIZE(ksz_switch_chips); i++) { @@ -1193,6 +1188,11 @@ int ksz_switch_register(struct ksz_device *dev) if (dev->pdata) dev->chip_id = dev->pdata->chip_id; + mutex_init(&dev->reg_mutex); + mutex_init(&dev->stats_mutex); + mutex_init(&dev->alu_mutex); + mutex_init(&dev->vlan_mutex); + if (ksz_switch_detect(dev)) return -EINVAL; From 56782f53537aaa10c1323c86236959e62bb75330 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 3 Nov 2018 13:59:45 +0800 Subject: [PATCH 0534/2608] sctp: fix strchange_flags name for Stream Change Event [ Upstream commit fd82d61ba142f0b83463e47064bf5460aac57b6e ] As defined in rfc6525#section-6.1.3, SCTP_STREAM_CHANGE_DENIED and SCTP_STREAM_CHANGE_FAILED should be used instead of SCTP_ASSOC_CHANGE_DENIED and SCTP_ASSOC_CHANGE_FAILED. To keep the compatibility, fix it by adding two macros. Fixes: b444153fb5a6 ("sctp: add support for generating add stream change event notification") Reported-by: Jianwen Ji Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/sctp.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index cfe971296835..e2554c1bcf58 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -519,6 +519,8 @@ struct sctp_assoc_reset_event { #define SCTP_ASSOC_CHANGE_DENIED 0x0004 #define SCTP_ASSOC_CHANGE_FAILED 0x0008 +#define SCTP_STREAM_CHANGE_DENIED SCTP_ASSOC_CHANGE_DENIED +#define SCTP_STREAM_CHANGE_FAILED SCTP_ASSOC_CHANGE_FAILED struct sctp_stream_change_event { __u16 strchange_type; __u16 strchange_flags; From fd2e5f8ae1183d8b4c8dd49a87535dc97057d258 Mon Sep 17 00:00:00 2001 From: Martin Schiller Date: Fri, 16 Nov 2018 08:38:36 +0100 Subject: [PATCH 0535/2608] net: phy: mdio-gpio: Fix working over slow can_sleep GPIOs [ Upstream commit df5a8ec64eed7fe45b556cfff503acd6429ab817 ] Up until commit 7e5fbd1e0700 ("net: mdio-gpio: Convert to use gpiod functions where possible"), the _cansleep variants of the gpio_ API was used. After that commit and the change to gpiod_ API, the _cansleep() was dropped. This then results in WARN_ON() when used with GPIO devices which do sleep. Add back the _cansleep() to avoid this. Fixes: 7e5fbd1e0700 ("net: mdio-gpio: Convert to use gpiod functions where possible") Signed-off-by: Martin Schiller Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/mdio-gpio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 4333c6e14742..638923a0ca44 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -79,7 +79,7 @@ static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir) * assume the pin serves as pull-up. If direction is * output, the default value is high. */ - gpiod_set_value(bitbang->mdo, 1); + gpiod_set_value_cansleep(bitbang->mdo, 1); return; } @@ -94,7 +94,7 @@ static int mdio_get(struct mdiobb_ctrl *ctrl) struct mdio_gpio_info *bitbang = container_of(ctrl, struct mdio_gpio_info, ctrl); - return gpiod_get_value(bitbang->mdio); + return gpiod_get_value_cansleep(bitbang->mdio); } static void mdio_set(struct mdiobb_ctrl *ctrl, int what) @@ -103,9 +103,9 @@ static void mdio_set(struct mdiobb_ctrl *ctrl, int what) container_of(ctrl, struct mdio_gpio_info, ctrl); if (bitbang->mdo) - gpiod_set_value(bitbang->mdo, what); + gpiod_set_value_cansleep(bitbang->mdo, what); else - gpiod_set_value(bitbang->mdio, what); + gpiod_set_value_cansleep(bitbang->mdio, what); } static void mdc_set(struct mdiobb_ctrl *ctrl, int what) @@ -113,7 +113,7 @@ static void mdc_set(struct mdiobb_ctrl *ctrl, int what) struct mdio_gpio_info *bitbang = container_of(ctrl, struct mdio_gpio_info, ctrl); - gpiod_set_value(bitbang->mdc, what); + gpiod_set_value_cansleep(bitbang->mdc, what); } static const struct mdiobb_ops mdio_gpio_ops = { From eebe064ce49c7ea3b730885183d6b109b1b17418 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 18 Nov 2018 21:59:49 +0800 Subject: [PATCH 0536/2608] sctp: not increase stream's incnt before sending addstrm_in request [ Upstream commit e1e46479847e66f78f79d8c24d5169a5954b3fc2 ] Different from processing the addstrm_out request, The receiver handles an addstrm_in request by sending back an addstrm_out request to the sender who will increase its stream's in and incnt later. Now stream->incnt has been increased since it sent out the addstrm_in request in sctp_send_add_streams(), with the wrong stream->incnt will even cause crash when copying stream info from the old stream's in to the new one's in sctp_process_strreset_addstrm_out(). This patch is to fix it by simply removing the stream->incnt change from sctp_send_add_streams(). Fixes: 242bd2d519d7 ("sctp: implement sender-side procedures for Add Incoming/Outgoing Streams Request Parameter") Reported-by: Jianwen Ji Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/stream.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 9ea6057ed28b..61273534ae10 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -310,7 +310,6 @@ int sctp_send_add_streams(struct sctp_association *asoc, goto out; } - stream->incnt = incnt; stream->outcnt = outcnt; asoc->strreset_outstanding = !!out + !!in; From 1ada2e1b289f719ea2e6b0dc5331f6c40b0341a7 Mon Sep 17 00:00:00 2001 From: Shalom Toledo Date: Fri, 2 Nov 2018 19:49:15 +0000 Subject: [PATCH 0537/2608] mlxsw: spectrum: Fix IP2ME CPU policer configuration [ Upstream commit 96801552f846460fe9ac10f1b189602992f004e1 ] The CPU policer used to police packets being trapped via a local route (IP2ME) was incorrectly configured to police based on bytes per second instead of packets per second. Change the policer to police based on packets per second and avoid packet loss under certain circumstances. Fixes: 9148e7cf73ce ("mlxsw: spectrum: Add policers for trap groups") Signed-off-by: Shalom Toledo Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 8b48338b4a70..18bb6798937b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3471,7 +3471,6 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) burst_size = 7; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME: - is_bytes = true; rate = 4 * 1024; burst_size = 4; break; From 61c6bb13506f0bc14b0e511167a248ba703f4b0d Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Thu, 8 Nov 2018 20:38:26 +0100 Subject: [PATCH 0538/2608] net: smsc95xx: Fix MTU range [ Upstream commit 85b18b0237ce9986a81a1b9534b5e2ee116f5504 ] The commit f77f0aee4da4 ("net: use core MTU range checking in USB NIC drivers") introduce a common MTU handling for usbnet. But it's missing the necessary changes for smsc95xx. So set the MTU range accordingly. This patch has been tested on a Raspberry Pi 3. Fixes: f77f0aee4da4 ("net: use core MTU range checking in USB NIC drivers") Signed-off-by: Stefan Wahren Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/smsc95xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 99e684e39d35..f74739664f89 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1321,6 +1321,8 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->ethtool_ops = &smsc95xx_ethtool_ops; dev->net->flags |= IFF_MULTICAST; dev->net->hard_header_len += SMSC95XX_TX_OVERHEAD_CSUM; + dev->net->min_mtu = ETH_MIN_MTU; + dev->net->max_mtu = ETH_DATA_LEN; dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; pdata->dev = dev; From 039df755bd84022be5b423fb866e871b87a34ad5 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Wed, 31 Oct 2018 22:52:19 +0100 Subject: [PATCH 0539/2608] usbnet: smsc95xx: disable carrier check while suspending [ Upstream commit 7b900ead6cc66b2ee873cb042dfba169aa68b56c ] We need to make sure, that the carrier check polling is disabled while suspending. Otherwise we can end up with usbnet_read_cmd() being issued when only usbnet_read_cmd_nopm() is allowed. If this happens, read operations lock up. Fixes: d69d169493 ("usbnet: smsc95xx: fix link detection for disabled autonegotiation") Signed-off-by: Frieder Schrempf Reviewed-by: Raghuram Chary J Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/smsc95xx.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index f74739664f89..2f65975a121f 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1600,6 +1600,8 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message) return ret; } + cancel_delayed_work_sync(&pdata->carrier_check); + if (pdata->suspend_flags) { netdev_warn(dev->net, "error during last resume\n"); pdata->suspend_flags = 0; @@ -1842,6 +1844,11 @@ done: */ if (ret && PMSG_IS_AUTO(message)) usbnet_resume(intf); + + if (ret) + schedule_delayed_work(&pdata->carrier_check, + CARRIER_CHECK_DELAY); + return ret; } From 59b830bf4436bdac3eace160fe1a9d7eb9e37392 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Nov 2018 17:34:27 -0800 Subject: [PATCH 0540/2608] inet: frags: better deal with smp races MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0d5b9311baf27bb545f187f12ecfd558220c607d ] Multiple cpus might attempt to insert a new fragment in rhashtable, if for example RPS is buggy, as reported by 배석진 in https://patchwork.ozlabs.org/patch/994601/ We use rhashtable_lookup_get_insert_key() instead of rhashtable_insert_fast() to let cpus losing the race free their own inet_frag_queue and use the one that was inserted by another cpu. Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units") Signed-off-by: Eric Dumazet Reported-by: 배석진 Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_fragment.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index f6764537148c..653be98fe3fb 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -180,21 +180,22 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, } static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, - void *arg) + void *arg, + struct inet_frag_queue **prev) { struct inet_frags *f = nf->f; struct inet_frag_queue *q; - int err; q = inet_frag_alloc(nf, f, arg); - if (!q) + if (!q) { + *prev = ERR_PTR(-ENOMEM); return NULL; - + } mod_timer(&q->timer, jiffies + nf->timeout); - err = rhashtable_insert_fast(&nf->rhashtable, &q->node, - f->rhash_params); - if (err < 0) { + *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key, + &q->node, f->rhash_params); + if (*prev) { q->flags |= INET_FRAG_COMPLETE; inet_frag_kill(q); inet_frag_destroy(q); @@ -206,19 +207,20 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) { - struct inet_frag_queue *fq; + struct inet_frag_queue *fq = NULL, *prev; rcu_read_lock(); - fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); - if (fq) { + prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); + if (!prev) + fq = inet_frag_create(nf, key, &prev); + if (prev && !IS_ERR(prev)) { + fq = prev; if (!refcount_inc_not_zero(&fq->refcnt)) fq = NULL; - rcu_read_unlock(); - return fq; } rcu_read_unlock(); - return inet_frag_create(nf, key); + return fq; } EXPORT_SYMBOL(inet_frag_find); From c33b5cc7076b9140d5d393a2d750475b08d8efad Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 21 Nov 2018 19:11:06 +0100 Subject: [PATCH 0541/2608] Revert "x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation" This reverts commit 8a13906ae519b3ed95cd0fb73f1098b46362f6c4 which is commit 53c613fe6349994f023245519265999eed75957f upstream. It's not ready for the stable trees as there are major slowdowns involved with this patch. Reported-by: Jiri Kosina Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: "WoodhouseDavid" Cc: Andi Kleen Cc: Tim Chen Cc: "SchauflerCasey" Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 57 ++++---------------------------------- kernel/cpu.c | 11 +------- 2 files changed, 7 insertions(+), 61 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index aa6e7f75bccc..e92aedd93806 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -34,10 +34,12 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); -/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ -u64 x86_spec_ctrl_base; +/* + * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any + * writes to SPEC_CTRL contain whatever reserved bits have been set. + */ +u64 __ro_after_init x86_spec_ctrl_base; EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); -static DEFINE_MUTEX(spec_ctrl_mutex); /* * The vendor and possibly platform specific bits which can be modified in @@ -321,46 +323,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } -static bool stibp_needed(void) -{ - if (spectre_v2_enabled == SPECTRE_V2_NONE) - return false; - - if (!boot_cpu_has(X86_FEATURE_STIBP)) - return false; - - return true; -} - -static void update_stibp_msr(void *info) -{ - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -} - -void arch_smt_update(void) -{ - u64 mask; - - if (!stibp_needed()) - return; - - mutex_lock(&spec_ctrl_mutex); - mask = x86_spec_ctrl_base; - if (cpu_smt_control == CPU_SMT_ENABLED) - mask |= SPEC_CTRL_STIBP; - else - mask &= ~SPEC_CTRL_STIBP; - - if (mask != x86_spec_ctrl_base) { - pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - cpu_smt_control == CPU_SMT_ENABLED ? - "Enabling" : "Disabling"); - x86_spec_ctrl_base = mask; - on_each_cpu(update_stibp_msr, NULL, 1); - } - mutex_unlock(&spec_ctrl_mutex); -} - static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -460,9 +422,6 @@ specv2_set_mode: setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } - - /* Enable STIBP if appropriate */ - arch_smt_update(); } #undef pr_fmt @@ -855,8 +814,6 @@ static ssize_t l1tf_show_state(char *buf) static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { - int ret; - if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); @@ -871,12 +828,10 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - ret = sprintf(buf, "%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", spectre_v2_module_string()); - return ret; case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); diff --git a/kernel/cpu.c b/kernel/cpu.c index 90cf6a04e08a..f3f389e33343 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2045,12 +2045,6 @@ static void cpuhp_online_cpu_device(unsigned int cpu) kobject_uevent(&dev->kobj, KOBJ_ONLINE); } -/* - * Architectures that need SMT-specific errata handling during SMT hotplug - * should override this. - */ -void __weak arch_smt_update(void) { }; - static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { int cpu, ret = 0; @@ -2077,10 +2071,8 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) */ cpuhp_offline_cpu_device(cpu); } - if (!ret) { + if (!ret) cpu_smt_control = ctrlval; - arch_smt_update(); - } cpu_maps_update_done(); return ret; } @@ -2091,7 +2083,6 @@ static int cpuhp_smt_enable(void) cpu_maps_update_begin(); cpu_smt_control = CPU_SMT_ENABLED; - arch_smt_update(); for_each_present_cpu(cpu) { /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) From 8a1bd68b06ebc9fe5b20297c52a51c82fde744a6 Mon Sep 17 00:00:00 2001 From: Chris Paterson Date: Thu, 14 Dec 2017 09:08:40 +0000 Subject: [PATCH 0542/2608] ARM: dts: r8a7791: Correct critical CPU temperature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e4fdf59bcce3b490bbc7197145bcb9f9d5a18cd3 upstream. The R-Car M2W hardware manual states that Tc = –40°C to +105°C. The thermal sensor has an accuracy of ±5°C and there can be a temperature difference of 1 or 2 degrees between Tjmax and the thermal sensor due to the location of the latter. This means that 95°C is a safer value to use. Fixes: cac68a56e34b9810 ("ARM: dts: r8a7791: enable to use thermal-zone") Signed-off-by: Chris Paterson Reviewed-by: Geert Uytterhoeven Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/r8a7791.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/r8a7791.dtsi b/arch/arm/boot/dts/r8a7791.dtsi index f1d1a9772153..0aceb3736e5c 100644 --- a/arch/arm/boot/dts/r8a7791.dtsi +++ b/arch/arm/boot/dts/r8a7791.dtsi @@ -91,7 +91,7 @@ trips { cpu-crit { - temperature = <115000>; + temperature = <95000>; hysteresis = <0>; type = "critical"; }; From 004ef8f590a464bb1977f38d3848b18df225267e Mon Sep 17 00:00:00 2001 From: Chris Paterson Date: Thu, 14 Dec 2017 09:08:41 +0000 Subject: [PATCH 0543/2608] ARM: dts: r8a7793: Correct critical CPU temperature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1dfc65cef481ac6af64380f26186d5cc585b46eb upstream. The R-Car M2N hardware manual states that Tc = –40°C to +105°C. The thermal sensor has an accuracy of ±5°C and there can be a temperature difference of 1 or 2 degrees between Tjmax and the thermal sensor due to the location of the latter. This means that 95°C is a safer value to use. Fixes: 57f9156bc620ac56 ("ARM: dts: r8a7793: enable to use thermal-zone") Signed-off-by: Chris Paterson Reviewed-by: Geert Uytterhoeven Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/r8a7793.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/r8a7793.dtsi b/arch/arm/boot/dts/r8a7793.dtsi index 497716b6fbe2..fd12564aabc3 100644 --- a/arch/arm/boot/dts/r8a7793.dtsi +++ b/arch/arm/boot/dts/r8a7793.dtsi @@ -88,7 +88,7 @@ trips { cpu-crit { - temperature = <115000>; + temperature = <95000>; hysteresis = <0>; type = "critical"; }; From 1d4bd2e4e1d26c5384d52ca4a1953cb0cfc934f1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 23 Nov 2018 08:19:27 +0100 Subject: [PATCH 0544/2608] Linux 4.14.83 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cac5323bc95d..0f42814095a4 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 82 +SUBLEVEL = 83 EXTRAVERSION = NAME = Petit Gorille From 426477d9f68b56ab71f52a5ce2dd6da0699d89c3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 1 Nov 2018 13:14:30 +0000 Subject: [PATCH 0545/2608] cifs: don't dereference smb_file_target before null check [ Upstream commit 8c6c9bed8773375b1d54ccca2911ec892c59db5d ] There is a null check on dst_file->private data which suggests it can be potentially null. However, before this check, pointer smb_file_target is derived from dst_file->private and dereferenced in the call to tlink_tcon, hence there is a potential null pointer deference. Fix this by assigning smb_file_target and target_tcon after the null pointer sanity checks. Detected by CoverityScan, CID#1475302 ("Dereference before null check") Fixes: 04b38d601239 ("vfs: pull btrfs clone API to vfs layer") Signed-off-by: Colin Ian King Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/cifsfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 44a7b2dea688..c5fd5abf7206 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -933,8 +933,8 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off, struct inode *src_inode = file_inode(src_file); struct inode *target_inode = file_inode(dst_file); struct cifsFileInfo *smb_file_src = src_file->private_data; - struct cifsFileInfo *smb_file_target = dst_file->private_data; - struct cifs_tcon *target_tcon = tlink_tcon(smb_file_target->tlink); + struct cifsFileInfo *smb_file_target; + struct cifs_tcon *target_tcon; unsigned int xid; int rc; @@ -948,6 +948,9 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off, goto out; } + smb_file_target = dst_file->private_data; + target_tcon = tlink_tcon(smb_file_target->tlink); + /* * Note: cifs case is easier than btrfs since server responsible for * checks for proper open modes and file type and if it wants From bc32d90e4d45b4f544527efdce2ea57d83f77542 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 25 Oct 2018 15:43:36 +1000 Subject: [PATCH 0546/2608] cifs: fix return value for cifs_listxattr [ Upstream commit 0c5d6cb6643f48ad3775322f3ebab6c7eb67484e ] If the application buffer was too small to fit all the names we would still count the number of bytes and return this for listxattr. This would then trigger a BUG in usercopy.c Fix the computation of the size so that we return -ERANGE correctly when the buffer is too small. This fixes the kernel BUG for xfstest generic/377 Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Signed-off-by: Sasha Levin --- fs/cifs/smb2ops.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 4e5b05263e4a..3372eedaa94d 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -441,6 +441,7 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size, int rc = 0; unsigned int ea_name_len = ea_name ? strlen(ea_name) : 0; char *name, *value; + size_t buf_size = dst_size; size_t name_len, value_len, user_name_len; while (src_size > 0) { @@ -476,9 +477,10 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size, /* 'user.' plus a terminating null */ user_name_len = 5 + 1 + name_len; - rc += user_name_len; - - if (dst_size >= user_name_len) { + if (buf_size == 0) { + /* skip copy - calc size only */ + rc += user_name_len; + } else if (dst_size >= user_name_len) { dst_size -= user_name_len; memcpy(dst, "user.", 5); dst += 5; @@ -486,8 +488,7 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size, dst += name_len; *dst = 0; ++dst; - } else if (dst_size == 0) { - /* skip copy - calc size only */ + rc += user_name_len; } else { /* stop before overrun buffer */ rc = -ERANGE; From aa9571b2015d270a3e73a66d3a5b27c37836fd6a Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 30 Oct 2018 12:38:50 +0100 Subject: [PATCH 0547/2608] arm64: kprobe: make page to RO mode when allocate it [ Upstream commit 966866892cf89d606544bca22d584ba2ef9ec208 ] Commit 1404d6f13e47 ("arm64: dump: Add checking for writable and exectuable pages") has successfully identified code that leaves a page with W+X permissions. [ 3.245140] arm64/mm: Found insecure W+X mapping at address (____ptrval____)/0xffff000000d90000 [ 3.245771] WARNING: CPU: 0 PID: 1 at ../arch/arm64/mm/dump.c:232 note_page+0x410/0x420 [ 3.246141] Modules linked in: [ 3.246653] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.0-rc5-next-20180928-00001-ge70ae259b853-dirty #62 [ 3.247008] Hardware name: linux,dummy-virt (DT) [ 3.247347] pstate: 80000005 (Nzcv daif -PAN -UAO) [ 3.247623] pc : note_page+0x410/0x420 [ 3.247898] lr : note_page+0x410/0x420 [ 3.248071] sp : ffff00000804bcd0 [ 3.248254] x29: ffff00000804bcd0 x28: ffff000009274000 [ 3.248578] x27: ffff00000921a000 x26: ffff80007dfff000 [ 3.248845] x25: ffff0000093f5000 x24: ffff000009526f6a [ 3.249109] x23: 0000000000000004 x22: ffff000000d91000 [ 3.249396] x21: ffff000000d90000 x20: 0000000000000000 [ 3.249661] x19: ffff00000804bde8 x18: 0000000000000400 [ 3.249924] x17: 0000000000000000 x16: 0000000000000000 [ 3.250271] x15: ffffffffffffffff x14: 295f5f5f5f6c6176 [ 3.250594] x13: 7274705f5f5f5f28 x12: 2073736572646461 [ 3.250941] x11: 20746120676e6970 x10: 70616d20582b5720 [ 3.251252] x9 : 6572756365736e69 x8 : 3039643030303030 [ 3.251519] x7 : 306666666678302f x6 : ffff0000095467b2 [ 3.251802] x5 : 0000000000000000 x4 : 0000000000000000 [ 3.252060] x3 : 0000000000000000 x2 : ffffffffffffffff [ 3.252323] x1 : 4d151327adc50b00 x0 : 0000000000000000 [ 3.252664] Call trace: [ 3.252953] note_page+0x410/0x420 [ 3.253186] walk_pgd+0x12c/0x238 [ 3.253417] ptdump_check_wx+0x68/0xf8 [ 3.253637] mark_rodata_ro+0x68/0x98 [ 3.253847] kernel_init+0x38/0x160 [ 3.254103] ret_from_fork+0x10/0x18 kprobes allocates a writable executable page with module_alloc() in order to store executable code. Reworked to that when allocate a page it sets mode RO. Inspired by commit 63fef14fc98a ("kprobes/x86: Make insn buffer always ROX and use text_poke()"). Suggested-by: Arnd Bergmann Suggested-by: Ard Biesheuvel Acked-by: Will Deacon Acked-by: Masami Hiramatsu Reviewed-by: Laura Abbott Signed-off-by: Anders Roxell [catalin.marinas@arm.com: removed unnecessary casts] Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/kernel/probes/kprobes.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 22a5921562c7..0417c929d21a 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -23,7 +23,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -42,10 +44,21 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); static void __kprobes post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); +static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) +{ + void *addrs[1]; + u32 insns[1]; + + addrs[0] = addr; + insns[0] = opcode; + + return aarch64_insn_patch_text(addrs, insns, 1); +} + static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { /* prepare insn slot */ - p->ainsn.api.insn[0] = cpu_to_le32(p->opcode); + patch_text(p->ainsn.api.insn, p->opcode); flush_icache_range((uintptr_t) (p->ainsn.api.insn), (uintptr_t) (p->ainsn.api.insn) + @@ -118,15 +131,15 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } -static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) +void *alloc_insn_page(void) { - void *addrs[1]; - u32 insns[1]; + void *page; - addrs[0] = (void *)addr; - insns[0] = (u32)opcode; + page = vmalloc_exec(PAGE_SIZE); + if (page) + set_memory_ro((unsigned long)page, 1); - return aarch64_insn_patch_text(addrs, insns, 1); + return page; } /* arm kprobe: install breakpoint in text */ From f54c21ee5dcede4164e8924a525d801f2fe855fd Mon Sep 17 00:00:00 2001 From: Radoslaw Tyl Date: Mon, 22 Oct 2018 08:44:31 +0200 Subject: [PATCH 0548/2608] ixgbe: fix MAC anti-spoofing filter after VFLR [ Upstream commit 6702185c1ffec3421181b5e24491e3fac920cb61 ] This change resolves a driver bug where the driver is logging a message that says "Spoofed packets detected". This can occur on the PF (host) when a VF has VLAN+MACVLAN enabled and is re-started with a different MAC address. MAC and VLAN anti-spoofing filters are to be enabled together. Signed-off-by: Radoslaw Tyl Tested-by: Andrew Bowers Acked-by: Piotr Skajewski Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 112d24c6c9ce..4904a63b83ef 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -760,8 +760,10 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) ixgbe_set_vmvir(adapter, vfinfo->pf_vlan, adapter->default_up, vf); - if (vfinfo->spoofchk_enabled) + if (vfinfo->spoofchk_enabled) { hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf); + hw->mac.ops.set_mac_anti_spoofing(hw, true, vf); + } } /* reset multicast table array for vf */ From 195169e26eaa18da05f0b63cc4b7dbfa50d70752 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 30 Oct 2018 15:06:38 -0700 Subject: [PATCH 0549/2608] reiserfs: propagate errors from fill_with_dentries() properly [ Upstream commit b10298d56c9623f9b173f19959732d3184b35f4f ] fill_with_dentries() failed to propagate errors up to reiserfs_for_each_xattr() properly. Plumb them through. Note that reiserfs_for_each_xattr() is only used by reiserfs_delete_xattrs() and reiserfs_chown_xattrs(). The result of reiserfs_delete_xattrs() is discarded anyway, the only difference there is whether a warning is printed to dmesg. The result of reiserfs_chown_xattrs() does matter because it can block chowning of the file to which the xattrs belong; but either way, the resulting state can have misaligned ownership, so my patch doesn't improve things greatly. Credit for making me look at this code goes to Al Viro, who pointed out that the ->actor calling convention is suboptimal and should be changed. Link: http://lkml.kernel.org/r/20180802163335.83312-1-jannh@google.com Signed-off-by: Jann Horn Reviewed-by: Andrew Morton Cc: Jeff Mahoney Cc: Eric Biggers Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/reiserfs/xattr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 505f87a8c724..83423192588c 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -185,6 +185,7 @@ struct reiserfs_dentry_buf { struct dir_context ctx; struct dentry *xadir; int count; + int err; struct dentry *dentries[8]; }; @@ -207,6 +208,7 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen, dentry = lookup_one_len(name, dbuf->xadir, namelen); if (IS_ERR(dentry)) { + dbuf->err = PTR_ERR(dentry); return PTR_ERR(dentry); } else if (d_really_is_negative(dentry)) { /* A directory entry exists, but no file? */ @@ -215,6 +217,7 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen, "not found for file %pd.\n", dentry, dbuf->xadir); dput(dentry); + dbuf->err = -EIO; return -EIO; } @@ -262,6 +265,10 @@ static int reiserfs_for_each_xattr(struct inode *inode, err = reiserfs_readdir_inode(d_inode(dir), &buf.ctx); if (err) break; + if (buf.err) { + err = buf.err; + break; + } if (!buf.count) break; for (i = 0; !err && i < buf.count && buf.dentries[i]; i++) { From dc50dd3ac68b092c0bcb05cbd8b604e8d517185a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Tue, 30 Oct 2018 15:06:07 -0700 Subject: [PATCH 0550/2608] hfs: prevent btree data loss on root split MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d057c036672f33d43a5f7344acbb08cf3a8a0c09 ] This bug is triggered whenever hfs_brec_update_parent() needs to split the root node. The height of the btree is not increased, which leaves the new node orphaned and its records lost. It is not possible for this to happen on a valid hfs filesystem because the index nodes have fixed length keys. For reasons I ignore, the hfs module does have support for a number of hfsplus features. A corrupt btree header may report variable length keys and trigger this bug, so it's better to fix it. Link: http://lkml.kernel.org/r/9750b1415685c4adca10766895f6d5ef12babdb0.1535682463.git.ernesto.mnd.fernandez@gmail.com Signed-off-by: Ernesto A. Fernández Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfs/brec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index 9a8772465a90..da25c49203cc 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c @@ -425,6 +425,10 @@ skip: if (new_node) { __be32 cnid; + if (!new_node->parent) { + hfs_btree_inc_height(tree); + new_node->parent = tree->root; + } fd->bnode = hfs_bnode_find(tree, new_node->parent); /* create index key and entry */ hfs_bnode_read_key(new_node, fd->search_key, 14); From f2e71fed903fa6441188f4ac8b0df6528711cae3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Tue, 30 Oct 2018 15:06:00 -0700 Subject: [PATCH 0551/2608] hfsplus: prevent btree data loss on root split MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0a3021d4f5295aa073c7bf5c5e4de60a2e292578 ] Creating, renaming or deleting a file may cause catalog corruption and data loss. This bug is randomly triggered by xfstests generic/027, but here is a faster reproducer: truncate -s 50M fs.iso mkfs.hfsplus fs.iso mount fs.iso /mnt i=100 while [ $i -le 150 ]; do touch /mnt/$i &>/dev/null ((++i)) done i=100 while [ $i -le 150 ]; do mv /mnt/$i /mnt/$(perl -e "print $i x82") &>/dev/null ((++i)) done umount /mnt fsck.hfsplus -n fs.iso The bug is triggered whenever hfs_brec_update_parent() needs to split the root node. The height of the btree is not increased, which leaves the new node orphaned and its records lost. Link: http://lkml.kernel.org/r/26d882184fc43043a810114258f45277752186c7.1535682461.git.ernesto.mnd.fernandez@gmail.com Signed-off-by: Ernesto A. Fernández Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfsplus/brec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 808f4d8c859c..d3f36982f685 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -428,6 +428,10 @@ skip: if (new_node) { __be32 cnid; + if (!new_node->parent) { + hfs_btree_inc_height(tree); + new_node->parent = tree->root; + } fd->bnode = hfs_bnode_find(tree, new_node->parent); /* create index key and entry */ hfs_bnode_read_key(new_node, fd->search_key, 14); From a134b05a43ae2327036e860fb10c74389b5bd971 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 15 Jun 2018 16:42:56 +0200 Subject: [PATCH 0552/2608] um: Give start_idle_thread() a return code [ Upstream commit 7ff1e34bbdc15acab823b1ee4240e94623d50ee8 ] Fixes: arch/um/os-Linux/skas/process.c:613:1: warning: control reaches end of non-void function [-Wreturn-type] longjmp() never returns but gcc still warns that the end of the function can be reached. Add a return code and debug aid to detect this impossible case. Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- arch/um/os-Linux/skas/process.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index c94c3bd70ccd..df4a985716eb 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -610,6 +610,11 @@ int start_idle_thread(void *stack, jmp_buf *switch_buf) fatal_sigsegv(); } longjmp(*switch_buf, 1); + + /* unreachable */ + printk(UM_KERN_ERR "impossible long jump!"); + fatal_sigsegv(); + return 0; } void initial_thread_cb_skas(void (*proc)(void *), void *arg) From a3e69ef80467644e7970fdc39452f2247994fc3f Mon Sep 17 00:00:00 2001 From: "Lee, Shawn C" Date: Sun, 28 Oct 2018 22:49:33 -0700 Subject: [PATCH 0553/2608] drm/edid: Add 6 bpc quirk for BOE panel. [ Upstream commit 922dceff8dc1fb4dafc9af78139ba65671408103 ] BOE panel (ID: 0x0771) that reports "DFP 1.x compliant TMDS". But it's 6bpc panel only instead of 8 bpc. Add panel ID to edid quirk list and set 6 bpc as default to work around this issue. Cc: Jani Nikula Cc: Maarten Lankhorst Cc: Gustavo Padovan Cc: Cooper Chiou Signed-off-by: Lee, Shawn C > Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1540792173-7288-1-git-send-email-shawn.c.lee@intel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index d1191ebed072..ed01e3aae0e8 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -120,6 +120,9 @@ static const struct edid_quirk { /* SDC panel of Lenovo B50-80 reports 8 bpc, but is a 6 bpc panel */ { "SDC", 0x3652, EDID_QUIRK_FORCE_6BPC }, + /* BOE model 0x0771 reports 8 bpc, but is a 6 bpc panel */ + { "BOE", 0x0771, EDID_QUIRK_FORCE_6BPC }, + /* Belinea 10 15 55 */ { "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 }, { "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 }, From b36c78af0c4d210aa99b705c4d777adf6a44d67f Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Sat, 6 Oct 2018 12:21:13 +0530 Subject: [PATCH 0554/2608] platform/x86: intel_telemetry: report debugfs failure [ Upstream commit 8d98b1ef368feeb7720b8b9b6f3bd93f2ad892bc ] On some Goldmont based systems such as ASRock J3455M the BIOS may not enable the IPC1 device that provides access to the PMC and PUNIT. In such scenarios, the IOSS and PSS resources from the platform device can not be obtained and result in a invalid telemetry_plt_config which is an internal data structure that holds platform config and is maintained by the telemetry platform driver. This is also applicable to the platforms where the BIOS supports IPC1 device under debug configurations but IPC1 is disabled by user or the policy. This change allows user to know the reason for not seeing entries under /sys/kernel/debug/telemetry/* when there is no apparent failure at boot. Cc: Matt Turner Cc: Len Brown Cc: Souvik Kumar Chakravarty Cc: Kuppuswamy Sathyanarayanan Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=198779 Acked-by: Matt Turner Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/platform/x86/intel_telemetry_debugfs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c index d4fc42b4cbeb..401bdc7a9d94 100644 --- a/drivers/platform/x86/intel_telemetry_debugfs.c +++ b/drivers/platform/x86/intel_telemetry_debugfs.c @@ -968,12 +968,16 @@ static int __init telemetry_debugfs_init(void) debugfs_conf = (struct telemetry_debugfs_conf *)id->driver_data; err = telemetry_pltconfig_valid(); - if (err < 0) + if (err < 0) { + pr_info("Invalid pltconfig, ensure IPC1 device is enabled in BIOS\n"); return -ENODEV; + } err = telemetry_debugfs_check_evts(); - if (err < 0) + if (err < 0) { + pr_info("telemetry_debugfs_check_evts failed\n"); return -EINVAL; + } register_pm_notifier(&pm_notifier); From 0333b8fc793bdb01b454a9c2ad3bf2c63a8cb530 Mon Sep 17 00:00:00 2001 From: Alan Tull Date: Thu, 18 Oct 2018 14:54:11 -0500 Subject: [PATCH 0555/2608] clk: fixed-rate: fix of_node_get-put imbalance [ Upstream commit 52091c256bdcad0d01e2852a63f19cd2cce6af96 ] When the fixed rate clock is created by devicetree, of_clk_add_provider is called. Add a call to of_clk_del_provider in the remove function to balance it out. Signed-off-by: Alan Tull Fixes: 435779fe1336 ("clk: fixed-rate: Convert into a module platform driver") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk-fixed-rate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c index b5c46b3f8764..6d6475c32ee5 100644 --- a/drivers/clk/clk-fixed-rate.c +++ b/drivers/clk/clk-fixed-rate.c @@ -200,6 +200,7 @@ static int of_fixed_clk_remove(struct platform_device *pdev) { struct clk *clk = platform_get_drvdata(pdev); + of_clk_del_provider(pdev->dev.of_node); clk_unregister_fixed_rate(clk); return 0; From bff410ea7ec2541a38d353a9e012f7f4622e17c1 Mon Sep 17 00:00:00 2001 From: David Miller Date: Wed, 17 Oct 2018 12:08:59 -0700 Subject: [PATCH 0556/2608] perf symbols: Set PLT entry/header sizes properly on Sparc [ Upstream commit d6afa561e1471ccfdaf7191230c0c59a37e45a5b ] Using the sh_entsize for both values isn't correct. It happens to be correct on x86... For both 32-bit and 64-bit sparc, there are four PLT entries in the PLT section. Signed-off-by: David S. Miller Cc: Alexander Shishkin Cc: Alexis Berlemont Cc: David Tolnay Cc: Hanjun Guo Cc: Hemant Kumar Cc: Li Bin Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Cc: zhangmengting@huawei.com Fixes: b2f7605076d6 ("perf symbols: Fix plt entry calculation for ARM and AARCH64") Link: http://lkml.kernel.org/r/20181017.120859.2268840244308635255.davem@davemloft.net Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/symbol-elf.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 2de770511e70..8ad4296de98b 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -338,7 +338,17 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * plt_entry_size = 16; break; - default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */ + case EM_SPARC: + plt_header_size = 48; + plt_entry_size = 12; + break; + + case EM_SPARCV9: + plt_header_size = 128; + plt_entry_size = 32; + break; + + default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */ plt_header_size = shdr_plt.sh_entsize; plt_entry_size = shdr_plt.sh_entsize; break; From a088fde602b8dc3183fb40fa2e706455243bd900 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 13 Jun 2018 12:05:13 +0800 Subject: [PATCH 0557/2608] fs/exofs: fix potential memory leak in mount option parsing [ Upstream commit 515f1867addaba49c1c6ac73abfaffbc192c1db4 ] There are some cases can cause memory leak when parsing option 'osdname'. Signed-off-by: Chengguang Xu Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/exofs/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 819624cfc8da..c9ec652e2fcd 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -100,6 +100,7 @@ static int parse_options(char *options, struct exofs_mountopt *opts) token = match_token(p, tokens, args); switch (token) { case Opt_name: + kfree(opts->dev_name); opts->dev_name = match_strdup(&args[0]); if (unlikely(!opts->dev_name)) { EXOFS_ERR("Error allocating dev_name"); @@ -863,8 +864,10 @@ static struct dentry *exofs_mount(struct file_system_type *type, int ret; ret = parse_options(data, &opts); - if (ret) + if (ret) { + kfree(opts.dev_name); return ERR_PTR(ret); + } if (!opts.dev_name) opts.dev_name = dev_name; From e2a11843feaac0a4c7f6047ab433a059f0122603 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 24 Sep 2018 13:01:20 +0200 Subject: [PATCH 0558/2608] clk: samsung: exynos5420: Enable PERIS clocks for suspend [ Upstream commit b33228029d842269e17bba591609e83ed422005d ] Ensure that clocks for core SoC modules (including TZPC0..9 modules) are enabled for suspend/resume cycle. This fixes suspend/resume support on Exynos5422-based Odroid XU3/XU4 boards. Suggested-by: Joonyoung Shim Signed-off-by: Marek Szyprowski Signed-off-by: Sylwester Nawrocki Signed-off-by: Sasha Levin --- drivers/clk/samsung/clk-exynos5420.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c index 25601967d1cd..500a55415e90 100644 --- a/drivers/clk/samsung/clk-exynos5420.c +++ b/drivers/clk/samsung/clk-exynos5420.c @@ -280,6 +280,7 @@ static const struct samsung_clk_reg_dump exynos5420_set_clksrc[] = { { .offset = GATE_BUS_TOP, .value = 0xffffffff, }, { .offset = GATE_BUS_DISP1, .value = 0xffffffff, }, { .offset = GATE_IP_PERIC, .value = 0xffffffff, }, + { .offset = GATE_IP_PERIS, .value = 0xffffffff, }, }; static int exynos5420_clk_suspend(void) From d0a636aa44f9e7cbac2a90e191bc0ad840b10b9e Mon Sep 17 00:00:00 2001 From: Zubin Mithra Date: Thu, 27 Sep 2018 14:49:17 -0700 Subject: [PATCH 0559/2608] apparmor: Fix uninitialized value in aa_split_fqname [ Upstream commit 250f2da49cb8e582215a65c03f50e8ddf5cd119c ] Syzkaller reported a OOB-read with the stacktrace below. This occurs inside __aa_lookupn_ns as `n` is not initialized. `n` is obtained from aa_splitn_fqname. In cases where `name` is invalid, aa_splitn_fqname returns without initializing `ns_name` and `ns_len`. Fix this by always initializing `ns_name` and `ns_len`. __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:430 memcmp+0xe3/0x160 lib/string.c:861 strnstr+0x4b/0x70 lib/string.c:934 __aa_lookupn_ns+0xc1/0x570 security/apparmor/policy_ns.c:209 aa_lookupn_ns+0x88/0x1e0 security/apparmor/policy_ns.c:240 aa_fqlookupn_profile+0x1b9/0x1010 security/apparmor/policy.c:468 fqlookupn_profile+0x80/0xc0 security/apparmor/label.c:1844 aa_label_strn_parse+0xa3a/0x1230 security/apparmor/label.c:1908 aa_label_parse+0x42/0x50 security/apparmor/label.c:1943 aa_change_profile+0x513/0x3510 security/apparmor/domain.c:1362 apparmor_setprocattr+0xaa4/0x1150 security/apparmor/lsm.c:658 security_setprocattr+0x66/0xc0 security/security.c:1298 proc_pid_attr_write+0x301/0x540 fs/proc/base.c:2555 __vfs_write+0x119/0x9f0 fs/read_write.c:485 vfs_write+0x1fc/0x560 fs/read_write.c:549 ksys_write+0x101/0x260 fs/read_write.c:598 __do_sys_write fs/read_write.c:610 [inline] __se_sys_write fs/read_write.c:607 [inline] __x64_sys_write+0x73/0xb0 fs/read_write.c:607 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 3b0aaf5866bf ("apparmor: add lib fn to find the "split" for fqnames") Reported-by: syzbot+61e4b490d9d2da591b50@syzkaller.appspotmail.com Signed-off-by: Zubin Mithra Reviewed-by: Kees Cook Signed-off-by: John Johansen Signed-off-by: Sasha Levin --- security/apparmor/lib.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 08ca26bcca77..451654372a76 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -90,10 +90,12 @@ const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name, const char *end = fqname + n; const char *name = skipn_spaces(fqname, n); - if (!name) - return NULL; *ns_name = NULL; *ns_len = 0; + + if (!name) + return NULL; + if (name[0] == ':') { char *split = strnchr(&name[1], end - &name[1], ':'); *ns_name = skipn_spaces(&name[1], end - &name[1]); From ad2e60ff51a7bbcaf87cf66875e3a28aaefbaedd Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 3 Oct 2018 00:49:21 +0800 Subject: [PATCH 0560/2608] x86/earlyprintk: Add a force option for pciserial device [ Upstream commit d2266bbfa9e3e32e3b642965088ca461bd24a94f ] The "pciserial" earlyprintk variant helps much on many modern x86 platforms, but unfortunately there are still some platforms with PCI UART devices which have the wrong PCI class code. In that case, the current class code check does not allow for them to be used for logging. Add a sub-option "force" which overrides the class code check and thus the use of such device can be enforced. [ bp: massage formulations. ] Suggested-by: Borislav Petkov Signed-off-by: Feng Tang Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: "Stuart R . Anderson" Cc: Bjorn Helgaas Cc: David Rientjes Cc: Feng Tang Cc: Frederic Weisbecker Cc: Greg Kroah-Hartman Cc: H Peter Anvin Cc: Ingo Molnar Cc: Jiri Kosina Cc: Jonathan Corbet Cc: Kai-Heng Feng Cc: Kate Stewart Cc: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Cc: Philippe Ombredanne Cc: Thomas Gleixner Cc: Thymo van Beers Cc: alan@linux.intel.com Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/20181002164921.25833-1-feng.tang@intel.com Signed-off-by: Sasha Levin --- .../admin-guide/kernel-parameters.txt | 6 +++- arch/x86/kernel/early_printk.c | 29 ++++++++++++------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9841bad6f271..99a08722124d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1011,7 +1011,7 @@ earlyprintk=serial[,0x...[,baudrate]] earlyprintk=ttySn[,baudrate] earlyprintk=dbgp[debugController#] - earlyprintk=pciserial,bus:device.function[,baudrate] + earlyprintk=pciserial[,force],bus:device.function[,baudrate] earlyprintk=xdbc[xhciController#] earlyprintk is useful when the kernel crashes before @@ -1043,6 +1043,10 @@ The sclp output can only be used on s390. + The optional "force" to "pciserial" enables use of a + PCI device even when its classcode is not of the + UART class. + edac_report= [HW,EDAC] Control how to report EDAC event Format: {"on" | "off" | "force"} on: enable EDAC to report H/W event. May be overridden diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 5e801c8c8ce7..374a52fa5296 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -213,8 +213,9 @@ static unsigned int mem32_serial_in(unsigned long addr, int offset) * early_pci_serial_init() * * This function is invoked when the early_printk param starts with "pciserial" - * The rest of the param should be ",B:D.F,baud" where B, D & F describe the - * location of a PCI device that must be a UART device. + * The rest of the param should be "[force],B:D.F,baud", where B, D & F describe + * the location of a PCI device that must be a UART device. "force" is optional + * and overrides the use of an UART device with a wrong PCI class code. */ static __init void early_pci_serial_init(char *s) { @@ -224,17 +225,23 @@ static __init void early_pci_serial_init(char *s) u32 classcode, bar0; u16 cmdreg; char *e; + int force = 0; - - /* - * First, part the param to get the BDF values - */ if (*s == ',') ++s; if (*s == 0) return; + /* Force the use of an UART device with wrong class code */ + if (!strncmp(s, "force,", 6)) { + force = 1; + s += 6; + } + + /* + * Part the param to get the BDF values + */ bus = (u8)simple_strtoul(s, &e, 16); s = e; if (*s != ':') @@ -253,7 +260,7 @@ static __init void early_pci_serial_init(char *s) s++; /* - * Second, find the device from the BDF + * Find the device from the BDF */ cmdreg = read_pci_config(bus, slot, func, PCI_COMMAND); classcode = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); @@ -264,8 +271,10 @@ static __init void early_pci_serial_init(char *s) */ if (((classcode >> 16 != PCI_CLASS_COMMUNICATION_MODEM) && (classcode >> 16 != PCI_CLASS_COMMUNICATION_SERIAL)) || - (((classcode >> 8) & 0xff) != 0x02)) /* 16550 I/F at BAR0 */ - return; + (((classcode >> 8) & 0xff) != 0x02)) /* 16550 I/F at BAR0 */ { + if (!force) + return; + } /* * Determine if it is IO or memory mapped @@ -289,7 +298,7 @@ static __init void early_pci_serial_init(char *s) } /* - * Lastly, initialize the hardware + * Initialize the hardware */ if (*s) { if (strcmp(s, "nocfg") == 0) From 4f9ffc2711d0d6651b67fa68ab75feec7201ecff Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 20 Sep 2018 21:44:19 -0400 Subject: [PATCH 0561/2608] platform/x86: acerhdf: Add BIOS entry for Gateway LT31 v1.3307 [ Upstream commit 684238d79ad85c5e19a71bb5818e77e329912fbc ] To fix: acerhdf: unknown (unsupported) BIOS version Gateway /LT31 /v1.3307 , please report, aborting! As can be seen in the context, the BIOS registers haven't changed in the previous versions, so the assumption is they won't have changed in this last update for this somewhat older platform either. Cc: Peter Feuerer Cc: Darren Hart Cc: Andy Shevchenko Signed-off-by: Paul Gortmaker Signed-off-by: Andy Shevchenko Reviewed-by: Peter Feuerer Signed-off-by: Sasha Levin --- drivers/platform/x86/acerhdf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index ea22591ee66f..53dfe67807e3 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c @@ -233,6 +233,7 @@ static const struct bios_settings bios_tbl[] = { {"Gateway", "LT31", "v1.3201", 0x55, 0x58, {0x9e, 0x00}, 0}, {"Gateway", "LT31", "v1.3302", 0x55, 0x58, {0x9e, 0x00}, 0}, {"Gateway", "LT31", "v1.3303t", 0x55, 0x58, {0x9e, 0x00}, 0}, + {"Gateway", "LT31", "v1.3307", 0x55, 0x58, {0x9e, 0x00}, 0}, /* Packard Bell */ {"Packard Bell", "DOA150", "v0.3104", 0x55, 0x58, {0x21, 0x00}, 0}, {"Packard Bell", "DOA150", "v0.3105", 0x55, 0x58, {0x20, 0x00}, 0}, From 10c51fa85c9879f7d90b184f940e2e16e5e3b330 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 25 Sep 2018 12:44:59 -0700 Subject: [PATCH 0562/2608] arm64: percpu: Initialize ret in the default case [ Upstream commit b5bb425871186303e6936fa2581521bdd1964a58 ] Clang warns that if the default case is taken, ret will be uninitialized. ./arch/arm64/include/asm/percpu.h:196:2: warning: variable 'ret' is used uninitialized whenever switch default is taken [-Wsometimes-uninitialized] default: ^~~~~~~ ./arch/arm64/include/asm/percpu.h:200:9: note: uninitialized use occurs here return ret; ^~~ ./arch/arm64/include/asm/percpu.h:157:19: note: initialize the variable 'ret' to silence this warning unsigned long ret, loop; ^ = 0 This warning appears several times while building the erofs filesystem. While it's not strictly wrong, the BUILD_BUG will prevent this from becoming a true problem. Initialize ret to 0 in the default case right before the BUILD_BUG to silence all of these warnings. Reported-by: Prasad Sodagudi Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Dennis Zhou Signed-off-by: Sasha Levin --- arch/arm64/include/asm/percpu.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 43393208229e..d79eaa816f29 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -93,6 +93,7 @@ static inline unsigned long __percpu_##op(void *ptr, \ : [val] "Ir" (val)); \ break; \ default: \ + ret = 0; \ BUILD_BUG(); \ } \ \ @@ -122,6 +123,7 @@ static inline unsigned long __percpu_read(void *ptr, int size) ret = READ_ONCE(*(u64 *)ptr); break; default: + ret = 0; BUILD_BUG(); } @@ -191,6 +193,7 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, : [val] "r" (val)); break; default: + ret = 0; BUILD_BUG(); } From 221609ce4ce547d2fbad0e7ecd68d66e12f358ec Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 19 Oct 2018 15:37:01 +0200 Subject: [PATCH 0563/2608] s390/vdso: add missing FORCE to build targets [ Upstream commit b44b136a3773d8a9c7853f8df716bd1483613cbb ] According to Documentation/kbuild/makefiles.txt all build targets using if_changed should use FORCE as well. Add missing FORCE to make sure vdso targets are rebuild properly when not just immediate prerequisites have changed but also when build command differs. Reviewed-by: Philipp Rudo Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- arch/s390/kernel/vdso32/Makefile | 6 +++--- arch/s390/kernel/vdso64/Makefile | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 308564b9bf68..101cadabfc89 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -33,7 +33,7 @@ UBSAN_SANITIZE := n $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE $(call if_changed,vdso32ld) # strip rule for the .so file @@ -42,12 +42,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # assembly rules for the .S files -$(obj-vdso32): %.o: %.S +$(obj-vdso32): %.o: %.S FORCE $(call if_changed_dep,vdso32as) # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $< diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index f81ae7998883..36bbafcf4a77 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -33,7 +33,7 @@ UBSAN_SANITIZE := n $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE $(call if_changed,vdso64ld) # strip rule for the .so file @@ -42,12 +42,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # assembly rules for the .S files -$(obj-vdso64): %.o: %.S +$(obj-vdso64): %.o: %.S FORCE $(call if_changed_dep,vdso64as) # actual build commands quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< From a1e0ae82cfd5f05973a6459ec17ed61df5cf0f9f Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Sat, 14 Jul 2018 21:59:43 +0200 Subject: [PATCH 0564/2608] netfilter: ipset: list:set: Decrease refcount synchronously on deletion and replace [ Upstream commit 439cd39ea136d2c026805264d58a91f36b6b64ca ] Commit 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel") postponed decreasing set reference counters to the RCU callback. An 'ipset del' command can terminate before the RCU grace period is elapsed, and if sets are listed before then, the reference counter shown in userspace will be wrong: # ipset create h hash:ip; ipset create l list:set; ipset add l # ipset del l h; ipset list h Name: h Type: hash:ip Revision: 4 Header: family inet hashsize 1024 maxelem 65536 Size in memory: 88 References: 1 Number of entries: 0 Members: # sleep 1; ipset list h Name: h Type: hash:ip Revision: 4 Header: family inet hashsize 1024 maxelem 65536 Size in memory: 88 References: 0 Number of entries: 0 Members: Fix this by making the reference count update synchronous again. As a result, when sets are listed, ip_set_name_byindex() might now fetch a set whose reference count is already zero. Instead of relying on the reference count to protect against concurrent set renaming, grab ip_set_ref_lock as reader and copy the name, while holding the same lock in ip_set_rename() as writer instead. Reported-by: Li Shuang Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel") Signed-off-by: Stefano Brivio Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/linux/netfilter/ipset/ip_set.h | 2 +- net/netfilter/ipset/ip_set_core.c | 23 +++++++++++------------ net/netfilter/ipset/ip_set_list_set.c | 17 +++++++++++------ 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 8e42253e5d4d..91a533bd3eb1 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -312,7 +312,7 @@ enum { extern ip_set_id_t ip_set_get_byname(struct net *net, const char *name, struct ip_set **set); extern void ip_set_put_byindex(struct net *net, ip_set_id_t index); -extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index); +extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name); extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index); extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 9d2ce1459cec..a3f1dc7cf538 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -668,21 +668,20 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index) EXPORT_SYMBOL_GPL(ip_set_put_byindex); /* Get the name of a set behind a set index. - * We assume the set is referenced, so it does exist and - * can't be destroyed. The set cannot be renamed due to - * the referencing either. - * + * Set itself is protected by RCU, but its name isn't: to protect against + * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the + * name. */ -const char * -ip_set_name_byindex(struct net *net, ip_set_id_t index) +void +ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name) { - const struct ip_set *set = ip_set_rcu_get(net, index); + struct ip_set *set = ip_set_rcu_get(net, index); BUG_ON(!set); - BUG_ON(set->ref == 0); - /* Referenced, so it's safe */ - return set->name; + read_lock_bh(&ip_set_ref_lock); + strncpy(name, set->name, IPSET_MAXNAMELEN); + read_unlock_bh(&ip_set_ref_lock); } EXPORT_SYMBOL_GPL(ip_set_name_byindex); @@ -1128,7 +1127,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, if (!set) return -ENOENT; - read_lock_bh(&ip_set_ref_lock); + write_lock_bh(&ip_set_ref_lock); if (set->ref != 0) { ret = -IPSET_ERR_REFERENCED; goto out; @@ -1145,7 +1144,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, strncpy(set->name, name2, IPSET_MAXNAMELEN); out: - read_unlock_bh(&ip_set_ref_lock); + write_unlock_bh(&ip_set_ref_lock); return ret; } diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 178d4eba013b..75d52aed6fdb 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -156,9 +156,7 @@ __list_set_del_rcu(struct rcu_head * rcu) { struct set_elem *e = container_of(rcu, struct set_elem, rcu); struct ip_set *set = e->set; - struct list_set *map = set->data; - ip_set_put_byindex(map->net, e->id); ip_set_ext_destroy(set, e); kfree(e); } @@ -166,15 +164,21 @@ __list_set_del_rcu(struct rcu_head * rcu) static inline void list_set_del(struct ip_set *set, struct set_elem *e) { + struct list_set *map = set->data; + set->elements--; list_del_rcu(&e->list); + ip_set_put_byindex(map->net, e->id); call_rcu(&e->rcu, __list_set_del_rcu); } static inline void -list_set_replace(struct set_elem *e, struct set_elem *old) +list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old) { + struct list_set *map = set->data; + list_replace_rcu(&old->list, &e->list); + ip_set_put_byindex(map->net, old->id); call_rcu(&old->rcu, __list_set_del_rcu); } @@ -306,7 +310,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, INIT_LIST_HEAD(&e->list); list_set_init_extensions(set, ext, e); if (n) - list_set_replace(e, n); + list_set_replace(set, e, n); else if (next) list_add_tail_rcu(&e->list, &next->list); else if (prev) @@ -497,6 +501,7 @@ list_set_list(const struct ip_set *set, const struct list_set *map = set->data; struct nlattr *atd, *nested; u32 i = 0, first = cb->args[IPSET_CB_ARG0]; + char name[IPSET_MAXNAMELEN]; struct set_elem *e; int ret = 0; @@ -515,8 +520,8 @@ list_set_list(const struct ip_set *set, nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; - if (nla_put_string(skb, IPSET_ATTR_NAME, - ip_set_name_byindex(map->net, e->id))) + ip_set_name_byindex(map->net, e->id, name); + if (nla_put_string(skb, IPSET_ATTR_NAME, name)) goto nla_put_failure; if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; From 0d5d0b5c41f766355f2b42c47d13ea001f754c7d Mon Sep 17 00:00:00 2001 From: Eric Westbrook Date: Tue, 28 Aug 2018 15:14:42 -0600 Subject: [PATCH 0565/2608] netfilter: ipset: actually allow allowable CIDR 0 in hash:net,port,net [ Upstream commit 886503f34d63e681662057448819edb5b1057a97 ] Allow /0 as advertised for hash:net,port,net sets. For "hash:net,port,net", ipset(8) says that "either subnet is permitted to be a /0 should you wish to match port between all destinations." Make that statement true. Before: # ipset create cidrzero hash:net,port,net # ipset add cidrzero 0.0.0.0/0,12345,0.0.0.0/0 ipset v6.34: The value of the CIDR parameter of the IP address is invalid # ipset create cidrzero6 hash:net,port,net family inet6 # ipset add cidrzero6 ::/0,12345,::/0 ipset v6.34: The value of the CIDR parameter of the IP address is invalid After: # ipset create cidrzero hash:net,port,net # ipset add cidrzero 0.0.0.0/0,12345,0.0.0.0/0 # ipset test cidrzero 192.168.205.129,12345,172.16.205.129 192.168.205.129,tcp:12345,172.16.205.129 is in set cidrzero. # ipset create cidrzero6 hash:net,port,net family inet6 # ipset add cidrzero6 ::/0,12345,::/0 # ipset test cidrzero6 fe80::1,12345,ff00::1 fe80::1,tcp:12345,ff00::1 is in set cidrzero6. See also: https://bugzilla.kernel.org/show_bug.cgi?id=200897 https://github.com/ewestbrook/linux/commit/df7ff6efb0934ab6acc11f003ff1a7580d6c1d9c Signed-off-by: Eric Westbrook Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_hash_netportnet.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 8602f2595a1a..0e6e40c6f652 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -213,13 +213,13 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CIDR]) { e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!e.cidr[0] || e.cidr[0] > HOST_MASK) + if (e.cidr[0] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } if (tb[IPSET_ATTR_CIDR2]) { e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); - if (!e.cidr[1] || e.cidr[1] > HOST_MASK) + if (e.cidr[1] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } @@ -492,13 +492,13 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CIDR]) { e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!e.cidr[0] || e.cidr[0] > HOST_MASK) + if (e.cidr[0] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } if (tb[IPSET_ATTR_CIDR2]) { e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); - if (!e.cidr[1] || e.cidr[1] > HOST_MASK) + if (e.cidr[1] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } From 28bd72a7262a2256d4e2fa35b8bba47cb43e8142 Mon Sep 17 00:00:00 2001 From: "Justin M. Forbes" Date: Wed, 31 Oct 2018 13:02:03 -0500 Subject: [PATCH 0566/2608] s390/mm: Fix ERROR: "__node_distance" undefined! [ Upstream commit a541f0ebcc08ed8bc0cc492eec9a86cb280a9f24 ] Fixes: ERROR: "__node_distance" [drivers/nvme/host/nvme-core.ko] undefined! make[1]: *** [scripts/Makefile.modpost:92: __modpost] Error 1 make: *** [Makefile:1275: modules] Error 2 + exit 1 Signed-off-by: Justin M. Forbes Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- arch/s390/numa/numa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index 5bd374491f94..6c151b42e65d 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -54,6 +54,7 @@ int __node_distance(int a, int b) { return mode->distance ? mode->distance(a, b) : 0; } +EXPORT_SYMBOL(__node_distance); int numa_debug_enabled; From 79f2eb5fde82c5264cb51cf52c1c24bd3cef6b47 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 19 Oct 2018 19:35:19 +0200 Subject: [PATCH 0567/2608] netfilter: ipset: Correct rcu_dereference() call in ip_set_put_comment() [ Upstream commit 17b8b74c0f8dbf9b9e3301f9ca5b65dd1c079951 ] The function is called when rcu_read_lock() is held and not when rcu_read_lock_bh() is held. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/linux/netfilter/ipset/ip_set_comment.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h index 8e2bab1e8e90..70877f8de7e9 100644 --- a/include/linux/netfilter/ipset/ip_set_comment.h +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -43,11 +43,11 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, rcu_assign_pointer(comment->c, c); } -/* Used only when dumping a set, protected by rcu_read_lock_bh() */ +/* Used only when dumping a set, protected by rcu_read_lock() */ static inline int ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment) { - struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c); + struct ip_set_comment_rcu *c = rcu_dereference(comment->c); if (!c) return 0; From fdf4e678ea193ec9c2e099509994a118260f0d8c Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 21 Oct 2018 00:00:08 +0900 Subject: [PATCH 0568/2608] netfilter: xt_IDLETIMER: add sysfs filename checking routine [ Upstream commit 54451f60c8fa061af9051a53be9786393947367c ] When IDLETIMER rule is added, sysfs file is created under /sys/class/xt_idletimer/timers/ But some label name shouldn't be used. ".", "..", "power", "uevent", "subsystem", etc... So that sysfs filename checking routine is needed. test commands: %iptables -I INPUT -j IDLETIMER --timeout 1 --label "power" splat looks like: [95765.423132] sysfs: cannot create duplicate filename '/devices/virtual/xt_idletimer/timers/power' [95765.433418] CPU: 0 PID: 8446 Comm: iptables Not tainted 4.19.0-rc6+ #20 [95765.449755] Call Trace: [95765.449755] dump_stack+0xc9/0x16b [95765.449755] ? show_regs_print_info+0x5/0x5 [95765.449755] sysfs_warn_dup+0x74/0x90 [95765.449755] sysfs_add_file_mode_ns+0x352/0x500 [95765.449755] sysfs_create_file_ns+0x179/0x270 [95765.449755] ? sysfs_add_file_mode_ns+0x500/0x500 [95765.449755] ? idletimer_tg_checkentry+0x3e5/0xb1b [xt_IDLETIMER] [95765.449755] ? rcu_read_lock_sched_held+0x114/0x130 [95765.449755] ? __kmalloc_track_caller+0x211/0x2b0 [95765.449755] ? memcpy+0x34/0x50 [95765.449755] idletimer_tg_checkentry+0x4e2/0xb1b [xt_IDLETIMER] [ ... ] Fixes: 0902b469bd25 ("netfilter: xtables: idletimer target implementation") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/xt_IDLETIMER.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 1141f08810b6..3fef8c2e545d 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -116,6 +116,22 @@ static void idletimer_tg_expired(unsigned long data) schedule_work(&timer->work); } +static int idletimer_check_sysfs_name(const char *name, unsigned int size) +{ + int ret; + + ret = xt_check_proc_name(name, size); + if (ret < 0) + return ret; + + if (!strcmp(name, "power") || + !strcmp(name, "subsystem") || + !strcmp(name, "uevent")) + return -EINVAL; + + return 0; +} + static int idletimer_tg_create(struct idletimer_tg_info *info) { int ret; @@ -126,6 +142,10 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) goto out; } + ret = idletimer_check_sysfs_name(info->label, sizeof(info->label)); + if (ret < 0) + goto out_free_timer; + sysfs_attr_init(&info->timer->attr.attr); info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); if (!info->timer->attr.attr.name) { From 8cab58117e72caa2234323ce399f77037fe4dea2 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 2 Nov 2018 19:04:09 +0100 Subject: [PATCH 0569/2608] s390/qeth: fix HiperSockets sniffer [ Upstream commit bd74a7f9cc033cf4d405788f80292268987dc0c5 ] Sniffing mode for L3 HiperSockets requires that no IP addresses are registered with the HW. The preferred way to achieve this is for userspace to delete all the IPs on the interface. But qeth is expected to also tolerate a configuration where that is not the case, by skipping the IP registration when in sniffer mode. Since commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") reworked the IP registration logic in the L3 subdriver, this no longer works. When the qeth device is set online, qeth_l3_recover_ip() now unconditionally registers all unicast addresses from our internal IP table. While we could fix this particular problem by skipping qeth_l3_recover_ip() on a sniffer device, the more future-proof change is to skip the IP address registration at the lowest level. This way we a) catch any future code path that attempts to register an IP address without considering the sniffer scenario, and b) continue to build up our internal IP table, so that if sniffer mode is switched off later we can operate just like normal. Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/s390/net/qeth_l3_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index cd73172bff47..a19f2dc69e8a 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -358,9 +358,6 @@ static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover) QETH_CARD_TEXT(card, 4, "clearip"); - if (recover && card->options.sniffer) - return; - spin_lock_bh(&card->ip_lock); hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) { @@ -818,6 +815,8 @@ static int qeth_l3_register_addr_entry(struct qeth_card *card, int rc = 0; int cnt = 3; + if (card->options.sniffer) + return 0; if (addr->proto == QETH_PROT_IPV4) { QETH_CARD_TEXT(card, 2, "setaddr4"); @@ -853,6 +852,9 @@ static int qeth_l3_deregister_addr_entry(struct qeth_card *card, { int rc = 0; + if (card->options.sniffer) + return 0; + if (addr->proto == QETH_PROT_IPV4) { QETH_CARD_TEXT(card, 2, "deladdr4"); QETH_CARD_HEX(card, 3, &addr->u.a4.addr, sizeof(int)); From 21eb778e7dd55c4a690b662d988798a2eb988c1d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 28 Oct 2018 18:16:51 +0100 Subject: [PATCH 0570/2608] hwmon: (ibmpowernv) Remove bogus __init annotations [ Upstream commit e3e61f01d755188cb6c2dcf5a244b9c0937c258e ] If gcc decides not to inline make_sensor_label(): WARNING: vmlinux.o(.text+0x4df549c): Section mismatch in reference from the function .create_device_attrs() to the function .init.text:.make_sensor_label() The function .create_device_attrs() references the function __init .make_sensor_label(). This is often because .create_device_attrs lacks a __init annotation or the annotation of .make_sensor_label is wrong. As .probe() can be called after freeing of __init memory, all __init annotiations in the driver are bogus, and should be removed. Signed-off-by: Geert Uytterhoeven Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/ibmpowernv.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c index 5ccdd0b52650..b38f4951c94e 100644 --- a/drivers/hwmon/ibmpowernv.c +++ b/drivers/hwmon/ibmpowernv.c @@ -126,7 +126,7 @@ static ssize_t show_label(struct device *dev, struct device_attribute *devattr, return sprintf(buf, "%s\n", sdata->label); } -static int __init get_logical_cpu(int hwcpu) +static int get_logical_cpu(int hwcpu) { int cpu; @@ -137,9 +137,8 @@ static int __init get_logical_cpu(int hwcpu) return -ENOENT; } -static void __init make_sensor_label(struct device_node *np, - struct sensor_data *sdata, - const char *label) +static void make_sensor_label(struct device_node *np, + struct sensor_data *sdata, const char *label) { u32 id; size_t n; From c6b1ef1110ce90b66dbd957b706d58fd1ce4f4b4 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Fri, 5 Oct 2018 11:50:20 +0900 Subject: [PATCH 0571/2608] Revert "drm/exynos/decon5433: implement frame counter" [ Upstream commit 6ca469e22a30992b4478d2ab88737c70667c1e00 ] This reverts commit 0586feba322e1de05075700eb4b835c8b683e62b This patch makes it to need get_vblank_counter callback in crtc to get frame counter from decon driver. However, drm_dev->max_vblank_count is a member unique to vendor's DRM driver but in case of ARM DRM, some CRTC devices don't provide the frame counter value. As a result, this patch made extension and clone mode not working. Instead of this patch, we may need separated max_vblank_count which belongs to each CRTC device, or need to implement frame counter emulation for them who don't support HW frame counter. Signed-off-by: Inki Dae Signed-off-by: Sasha Levin --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 9 --------- drivers/gpu/drm/exynos/exynos_drm_crtc.c | 11 ----------- drivers/gpu/drm/exynos/exynos_drm_drv.h | 1 - 3 files changed, 21 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index f905c214fdd0..5a5b3535411f 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -160,13 +160,6 @@ static u32 decon_get_frame_count(struct decon_context *ctx, bool end) return frm; } -static u32 decon_get_vblank_counter(struct exynos_drm_crtc *crtc) -{ - struct decon_context *ctx = crtc->ctx; - - return decon_get_frame_count(ctx, false); -} - static void decon_setup_trigger(struct decon_context *ctx) { if (!ctx->crtc->i80_mode && !(ctx->out_type & I80_HW_TRG)) @@ -532,7 +525,6 @@ static const struct exynos_drm_crtc_ops decon_crtc_ops = { .disable = decon_disable, .enable_vblank = decon_enable_vblank, .disable_vblank = decon_disable_vblank, - .get_vblank_counter = decon_get_vblank_counter, .atomic_begin = decon_atomic_begin, .update_plane = decon_update_plane, .disable_plane = decon_disable_plane, @@ -550,7 +542,6 @@ static int decon_bind(struct device *dev, struct device *master, void *data) int ret; ctx->drm_dev = drm_dev; - drm_dev->max_vblank_count = 0xffffffff; for (win = ctx->first_win; win < WINDOWS_NR; win++) { int tmp = (win == ctx->first_win) ? 0 : win; diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c index 6ce0821590df..4787560bf93e 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c @@ -147,16 +147,6 @@ static void exynos_drm_crtc_disable_vblank(struct drm_crtc *crtc) exynos_crtc->ops->disable_vblank(exynos_crtc); } -static u32 exynos_drm_crtc_get_vblank_counter(struct drm_crtc *crtc) -{ - struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc); - - if (exynos_crtc->ops->get_vblank_counter) - return exynos_crtc->ops->get_vblank_counter(exynos_crtc); - - return 0; -} - static const struct drm_crtc_funcs exynos_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .page_flip = drm_atomic_helper_page_flip, @@ -166,7 +156,6 @@ static const struct drm_crtc_funcs exynos_crtc_funcs = { .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, .enable_vblank = exynos_drm_crtc_enable_vblank, .disable_vblank = exynos_drm_crtc_disable_vblank, - .get_vblank_counter = exynos_drm_crtc_get_vblank_counter, }; struct exynos_drm_crtc *exynos_drm_crtc_create(struct drm_device *drm_dev, diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index f8bae4cb4823..d228b5148dbc 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -133,7 +133,6 @@ struct exynos_drm_crtc_ops { void (*disable)(struct exynos_drm_crtc *crtc); int (*enable_vblank)(struct exynos_drm_crtc *crtc); void (*disable_vblank)(struct exynos_drm_crtc *crtc); - u32 (*get_vblank_counter)(struct exynos_drm_crtc *crtc); enum drm_mode_status (*mode_valid)(struct exynos_drm_crtc *crtc, const struct drm_display_mode *mode); int (*atomic_check)(struct exynos_drm_crtc *crtc, From 35b3e230a2b5dc9068a8779f1b77754b9c3edfde Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Thu, 1 Nov 2018 14:15:49 +0100 Subject: [PATCH 0572/2608] clk: fixed-factor: fix of_node_get-put imbalance [ Upstream commit f98e8a572bddbf27032114127d2fcc78fa5e6a9d ] When the fixed factor clock is created by devicetree, of_clk_add_provider is called. Add a call to of_clk_del_provider in the remove function to balance it out. Reported-by: Alan Tull Fixes: 971451b3b15d ("clk: fixed-factor: Convert into a module platform driver") Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk-fixed-factor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/clk-fixed-factor.c b/drivers/clk/clk-fixed-factor.c index 20724abd38bd..7df6b5b1e7ee 100644 --- a/drivers/clk/clk-fixed-factor.c +++ b/drivers/clk/clk-fixed-factor.c @@ -210,6 +210,7 @@ static int of_fixed_factor_clk_remove(struct platform_device *pdev) { struct clk *clk = platform_get_drvdata(pdev); + of_clk_del_provider(pdev->dev.of_node); clk_unregister_fixed_factor(clk); return 0; From 615a30e8dbc9c60bb35f3687072fb077160ea0b7 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Mon, 5 Nov 2018 18:14:41 -0600 Subject: [PATCH 0573/2608] lib/raid6: Fix arm64 test build [ Upstream commit 313a06e636808387822af24c507cba92703568b1 ] The lib/raid6/test fails to build the neon objects on arm64 because the correct machine type is 'aarch64'. Once this is correctly enabled, the neon recovery objects need to be added to the build. Reviewed-by: Ard Biesheuvel Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- lib/raid6/test/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index be1010bdc435..565a77220fae 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -27,7 +27,7 @@ ifeq ($(ARCH),arm) CFLAGS += -I../../../arch/arm/include -mfpu=neon HAS_NEON = yes endif -ifeq ($(ARCH),arm64) +ifeq ($(ARCH),aarch64) CFLAGS += -I../../../arch/arm64/include HAS_NEON = yes endif @@ -41,7 +41,7 @@ ifeq ($(IS_X86),yes) gcc -c -x assembler - >&/dev/null && \ rm ./-.o && echo -DCONFIG_AS_AVX512=1) else ifeq ($(HAS_NEON),yes) - OBJS += neon.o neon1.o neon2.o neon4.o neon8.o + OBJS += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 else HAS_ALTIVEC := $(shell printf '\#include \nvector int a;\n' |\ From 850e4ca7868046e13b66cb0d656398960efc4162 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 29 Oct 2018 08:11:33 +0000 Subject: [PATCH 0574/2608] s390/perf: Change CPUM_CF return code in event init function [ Upstream commit 0bb2ae1b26e1fb7543ec7474cdd374ac4b88c4da ] The function perf_init_event() creates a new event and assignes it to a PMU. This a done in a loop over all existing PMUs. For each listed PMU the event init function is called and if this function does return any other error than -ENOENT, the loop is terminated the creation of the event fails. If the event is invalid, return -ENOENT to try other PMUs. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- arch/s390/kernel/perf_cpum_cf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 746d03423333..61e91fee8467 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -376,7 +376,7 @@ static int __hw_perf_event_init(struct perf_event *event) return -ENOENT; if (ev > PERF_CPUM_CF_MAX_CTR) - return -EINVAL; + return -ENOENT; /* Obtain the counter set to which the specified counter belongs */ set = get_counter_set(ev); From ef8d2a5d990af5d658aafd37b41e29782b690bff Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 23 Oct 2018 14:37:31 +0100 Subject: [PATCH 0575/2608] sched/core: Take the hotplug lock in sched_init_smp() [ Upstream commit 40fa3780bac2b654edf23f6b13f4e2dd550aea10 ] When running on linux-next (8c60c36d0b8c ("Add linux-next specific files for 20181019")) + CONFIG_PROVE_LOCKING=y on a big.LITTLE system (e.g. Juno or HiKey960), we get the following report: [ 0.748225] Call trace: [ 0.750685] lockdep_assert_cpus_held+0x30/0x40 [ 0.755236] static_key_enable_cpuslocked+0x20/0xc8 [ 0.760137] build_sched_domains+0x1034/0x1108 [ 0.764601] sched_init_domains+0x68/0x90 [ 0.768628] sched_init_smp+0x30/0x80 [ 0.772309] kernel_init_freeable+0x278/0x51c [ 0.776685] kernel_init+0x10/0x108 [ 0.780190] ret_from_fork+0x10/0x18 The static_key in question is 'sched_asym_cpucapacity' introduced by commit: df054e8445a4 ("sched/topology: Add static_key for asymmetric CPU capacity optimizations") In this particular case, we enable it because smp_prepare_cpus() will end up fetching the capacity-dmips-mhz entry from the devicetree, so we already have some asymmetry detected when entering sched_init_smp(). This didn't get detected in tip/sched/core because we were missing: commit cb538267ea1e ("jump_label/lockdep: Assert we hold the hotplug lock for _cpuslocked() operations") Calls to build_sched_domains() post sched_init_smp() will hold the hotplug lock, it just so happens that this very first call is a special case. As stated by a comment in sched_init_smp(), "There's no userspace yet to cause hotplug operations" so this is a harmless warning. However, to both respect the semantics of underlying callees and make lockdep happy, take the hotplug lock in sched_init_smp(). This also satisfies the comment atop sched_init_domains() that says "Callers must hold the hotplug lock". Reported-by: Sudeep Holla Tested-by: Sudeep Holla Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Cc: Dietmar.Eggemann@arm.com Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: morten.rasmussen@arm.com Cc: quentin.perret@arm.com Link: http://lkml.kernel.org/r/1540301851-3048-1-git-send-email-valentin.schneider@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/sched/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4e89ed8a0fb2..3bc664662081 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5733,14 +5733,17 @@ void __init sched_init_smp(void) /* * There's no userspace yet to cause hotplug operations; hence all the * CPU masks are stable and all blatant races in the below code cannot - * happen. + * happen. The hotplug lock is nevertheless taken to satisfy lockdep, + * but there won't be any contention on it. */ + cpus_read_lock(); mutex_lock(&sched_domains_mutex); sched_init_domains(cpu_active_mask); cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); if (cpumask_empty(non_isolated_cpus)) cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); mutex_unlock(&sched_domains_mutex); + cpus_read_unlock(); /* Move init over to a non-isolated CPU */ if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0) From fc88b3abe23fe3d4cb2e2b1f3f7036e7f68889fe Mon Sep 17 00:00:00 2001 From: Gustavo Romero Date: Thu, 1 Nov 2018 20:13:21 -0400 Subject: [PATCH 0576/2608] perf tools: Fix undefined symbol scnprintf in libperf-jvmti.so [ Upstream commit 6ac2226229d931153331a93d90655a3de05b9290 ] Currently jvmti agent can not be used because function scnprintf is not present in the agent libperf-jvmti.so. As a result the JVM when using such agent to record JITed code profiling information will fail on looking up scnprintf: java: symbol lookup error: lib/libperf-jvmti.so: undefined symbol: scnprintf This commit fixes that by reverting to the use of snprintf, that can be looked up, instead of scnprintf, adding a proper check for the returned value in order to print a better error message when the jitdump file pathname is too long. Checking the returned value also helps to comply with some recent gcc versions, like gcc8, which will fail due to truncated writing checks related to the -Werror=format-truncation= flag. Signed-off-by: Gustavo Romero Acked-by: Jiri Olsa LPU-Reference: 1541117601-18937-2-git-send-email-gromero@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-mvpxxxy7wnzaj74cq75muw3f@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/jvmti/jvmti_agent.c | 49 ++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index c1d20d951434..4ad9948fe594 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -125,7 +125,7 @@ perf_get_timestamp(void) } static int -debug_cache_init(void) +create_jit_cache_dir(void) { char str[32]; char *base, *p; @@ -144,8 +144,13 @@ debug_cache_init(void) strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm); - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base); - + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/", base); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because %s/.debug/" + " is too long, please check the cwd, JITDUMPDIR, and" + " HOME variables", base); + return -1; + } ret = mkdir(jit_path, 0755); if (ret == -1) { if (errno != EEXIST) { @@ -154,20 +159,32 @@ debug_cache_init(void) } } - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base); + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit", base); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because" + " %s/.debug/jit is too long, please check the cwd," + " JITDUMPDIR, and HOME variables", base); + return -1; + } ret = mkdir(jit_path, 0755); if (ret == -1) { if (errno != EEXIST) { - warn("cannot create jit cache dir %s", jit_path); + warn("jvmti: cannot create jit cache dir %s", jit_path); return -1; } } - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str); - + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit/%s.XXXXXXXX", base, str); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because" + " %s/.debug/jit/%s.XXXXXXXX is too long, please check" + " the cwd, JITDUMPDIR, and HOME variables", + base, str); + return -1; + } p = mkdtemp(jit_path); if (p != jit_path) { - warn("cannot create jit cache dir %s", jit_path); + warn("jvmti: cannot create jit cache dir %s", jit_path); return -1; } @@ -228,7 +245,7 @@ void *jvmti_open(void) { char dump_path[PATH_MAX]; struct jitheader header; - int fd; + int fd, ret; FILE *fp; init_arch_timestamp(); @@ -245,12 +262,22 @@ void *jvmti_open(void) memset(&header, 0, sizeof(header)); - debug_cache_init(); + /* + * jitdump file dir + */ + if (create_jit_cache_dir() < 0) + return NULL; /* * jitdump file name */ - scnprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + ret = snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jitdump file full path because" + " %s/jit-%i.dump is too long, please check the cwd," + " JITDUMPDIR, and HOME variables", jit_path, getpid()); + return NULL; + } fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666); if (fd == -1) From eddf9365687f829beeca47a3d0cfcdf6252e46a1 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 29 Oct 2018 10:52:42 -0700 Subject: [PATCH 0577/2608] i40e: restore NETIF_F_GSO_IPXIP[46] to netdev features [ Upstream commit ba766b8b99c30ad3c55ed8cf224d1185ecff1476 ] Since commit bacd75cfac8a ("i40e/i40evf: Add capability exchange for outer checksum", 2017-04-06) the i40e driver has not reported support for IP-in-IP offloads. This likely occurred due to a bad rebase, as the commit extracts hw_enc_features into its own variable. As part of this change, it dropped the NETIF_F_FSO_IPXIP flags from the netdev->hw_enc_features. This was unfortunately not caught during code review. Fix this by adding back the missing feature flags. For reference, NETIF_F_GSO_IPXIP4 was added in commit 7e13318daa4a ("net: define gso types for IPx over IPv4 and IPv6", 2016-05-20), replacing NETIF_F_GSO_IPIP and NETIF_F_GSO_SIT. NETIF_F_GSO_IPXIP6 was added in commit bf2d1df39502 ("intel: Add support for IPv6 IP-in-IP offload", 2016-05-20). Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 04dbf64fb1cb..176c99b8251d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -9688,6 +9688,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_PARTIAL | + NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC | From 484ef227384f88c0b52e9dd43529eccf7c4025da Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Thu, 8 Nov 2018 16:46:08 +0200 Subject: [PATCH 0578/2608] qed: Fix memory/entry leak in qed_init_sp_request() [ Upstream commit 39477551df940ddb1339203817de04f5caaacf7a ] Free the allocated SPQ entry or return the acquired SPQ entry to the free list in error flows. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/qlogic/qed/qed_sp_commands.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index d7c5965328be..b26578464469 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -80,7 +80,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, case QED_SPQ_MODE_BLOCK: if (!p_data->p_comp_data) - return -EINVAL; + goto err; p_ent->comp_cb.cookie = p_data->p_comp_data->cookie; break; @@ -95,7 +95,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, default: DP_NOTICE(p_hwfn, "Unknown SPQE completion mode %d\n", p_ent->comp_mode); - return -EINVAL; + goto err; } DP_VERBOSE(p_hwfn, QED_MSG_SPQ, @@ -109,6 +109,18 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, memset(&p_ent->ramrod, 0, sizeof(p_ent->ramrod)); return 0; + +err: + /* qed_spq_get_entry() can either get an entry from the free_pool, + * or, if no entries are left, allocate a new entry and add it to + * the unlimited_pending list. + */ + if (p_ent->queue == &p_hwfn->p_spq->unlimited_pending) + kfree(p_ent); + else + qed_spq_return_entry(p_hwfn, p_ent); + + return -EINVAL; } static enum tunnel_clss qed_tunn_clss_to_fw_clss(u8 type) From 082810cb7f21c52dbe4c15f247e87ad22b88bbb8 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Thu, 8 Nov 2018 16:46:09 +0200 Subject: [PATCH 0579/2608] qed: Fix blocking/unlimited SPQ entries leak [ Upstream commit 2632f22ebd08da249c2017962a199a0cfb2324bf ] When there are no SPQ entries left in the free_pool, new entries are allocated and are added to the unlimited list. When an entry in the pool is available, the content is copied from the original entry, and the new entry is sent to the device. qed_spq_post() is not aware of that, so the additional entry is stored in the original entry as p_post_ent, which can later be returned to the pool. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_sp.h | 3 ++ drivers/net/ethernet/qlogic/qed/qed_spq.c | 57 ++++++++++++----------- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index ab4ad8a1e2a5..01a213d4ee9c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -167,6 +167,9 @@ struct qed_spq_entry { enum spq_mode comp_mode; struct qed_spq_comp_cb comp_cb; struct qed_spq_comp_done comp_done; /* SPQ_MODE_EBLOCK */ + + /* Posted entry for unlimited list entry in EBLOCK mode */ + struct qed_spq_entry *post_ent; }; struct qed_eq { diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index be48d9abd001..0313e9c46979 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -687,6 +687,8 @@ static int qed_spq_add_entry(struct qed_hwfn *p_hwfn, /* EBLOCK responsible to free the allocated p_ent */ if (p_ent->comp_mode != QED_SPQ_MODE_EBLOCK) kfree(p_ent); + else + p_ent->post_ent = p_en2; p_ent = p_en2; } @@ -770,6 +772,25 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) SPQ_HIGH_PRI_RESERVE_DEFAULT); } +/* Avoid overriding of SPQ entries when getting out-of-order completions, by + * marking the completions in a bitmap and increasing the chain consumer only + * for the first successive completed entries. + */ +static void qed_spq_comp_bmap_update(struct qed_hwfn *p_hwfn, __le16 echo) +{ + u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE; + struct qed_spq *p_spq = p_hwfn->p_spq; + + __set_bit(pos, p_spq->p_comp_bitmap); + while (test_bit(p_spq->comp_bitmap_idx, + p_spq->p_comp_bitmap)) { + __clear_bit(p_spq->comp_bitmap_idx, + p_spq->p_comp_bitmap); + p_spq->comp_bitmap_idx++; + qed_chain_return_produced(&p_spq->chain); + } +} + int qed_spq_post(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent, u8 *fw_return_code) { @@ -821,11 +842,12 @@ int qed_spq_post(struct qed_hwfn *p_hwfn, p_ent->queue == &p_spq->unlimited_pending); if (p_ent->queue == &p_spq->unlimited_pending) { - /* This is an allocated p_ent which does not need to - * return to pool. - */ + struct qed_spq_entry *p_post_ent = p_ent->post_ent; + kfree(p_ent); - return rc; + + /* Return the entry which was actually posted */ + p_ent = p_post_ent; } if (rc) @@ -839,7 +861,7 @@ int qed_spq_post(struct qed_hwfn *p_hwfn, spq_post_fail2: spin_lock_bh(&p_spq->lock); list_del(&p_ent->list); - qed_chain_return_produced(&p_spq->chain); + qed_spq_comp_bmap_update(p_hwfn, p_ent->elem.hdr.echo); spq_post_fail: /* return to the free pool */ @@ -871,25 +893,8 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, spin_lock_bh(&p_spq->lock); list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending, list) { if (p_ent->elem.hdr.echo == echo) { - u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE; - list_del(&p_ent->list); - - /* Avoid overriding of SPQ entries when getting - * out-of-order completions, by marking the completions - * in a bitmap and increasing the chain consumer only - * for the first successive completed entries. - */ - __set_bit(pos, p_spq->p_comp_bitmap); - - while (test_bit(p_spq->comp_bitmap_idx, - p_spq->p_comp_bitmap)) { - __clear_bit(p_spq->comp_bitmap_idx, - p_spq->p_comp_bitmap); - p_spq->comp_bitmap_idx++; - qed_chain_return_produced(&p_spq->chain); - } - + qed_spq_comp_bmap_update(p_hwfn, echo); p_spq->comp_count++; found = p_ent; break; @@ -928,11 +933,9 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, QED_MSG_SPQ, "Got a completion without a callback function\n"); - if ((found->comp_mode != QED_SPQ_MODE_EBLOCK) || - (found->queue == &p_spq->unlimited_pending)) + if (found->comp_mode != QED_SPQ_MODE_EBLOCK) /* EBLOCK is responsible for returning its own entry into the - * free list, unless it originally added the entry into the - * unlimited pending list. + * free list. */ qed_spq_return_entry(p_hwfn, found); From fe2c4df2692de3c724ac8720d4479cbe16d89814 Mon Sep 17 00:00:00 2001 From: Sagiv Ozeri Date: Thu, 8 Nov 2018 16:46:11 +0200 Subject: [PATCH 0580/2608] qed: Fix potential memory corruption [ Upstream commit fa5c448d98f0df660bfcad3dd5facc027ef84cd3 ] A stuck ramrod should be deleted from the completion_pending list, otherwise it will be added again in the future and corrupt the list. Return error value to inform that ramrod is stuck and should be deleted. Signed-off-by: Sagiv Ozeri Signed-off-by: Denis Bolotin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_spq.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index 0313e9c46979..467755b6dd0b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -144,6 +144,7 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn, DP_INFO(p_hwfn, "Ramrod is stuck, requesting MCP drain\n"); rc = qed_mcp_drain(p_hwfn, p_ptt); + qed_ptt_release(p_hwfn, p_ptt); if (rc) { DP_NOTICE(p_hwfn, "MCP drain failed\n"); goto err; @@ -152,18 +153,15 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn, /* Retry after drain */ rc = __qed_spq_block(p_hwfn, p_ent, p_fw_ret, true); if (!rc) - goto out; + return 0; comp_done = (struct qed_spq_comp_done *)p_ent->comp_cb.cookie; - if (comp_done->done == 1) + if (comp_done->done == 1) { if (p_fw_ret) *p_fw_ret = comp_done->fw_return_code; -out: - qed_ptt_release(p_hwfn, p_ptt); - return 0; - + return 0; + } err: - qed_ptt_release(p_hwfn, p_ptt); DP_NOTICE(p_hwfn, "Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n", le32_to_cpu(p_ent->elem.hdr.cid), From 47ff76ee9c9c938531ec97b7b4a74804a10f04fc Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Thu, 8 Nov 2018 11:42:14 -0600 Subject: [PATCH 0581/2608] net: stmmac: Fix RX packet size > 8191 [ Upstream commit 8137b6ef0ce469154e5cf19f8e7fe04d9a72ac5e ] Ping problems with packets > 8191 as shown: PING 192.168.1.99 (192.168.1.99) 8150(8178) bytes of data. 8158 bytes from 192.168.1.99: icmp_seq=1 ttl=64 time=0.669 ms wrong data byte 8144 should be 0xd0 but was 0x0 16 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f %< ---------------snip-------------------------------------- 8112 b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf 8144 0 0 0 0 d0 d1 ^^^^^^^ Notice the 4 bytes of 0 before the expected byte of d0. Databook notes that the RX buffer must be a multiple of 4/8/16 bytes [1]. Update the DMA Buffer size define to 8188 instead of 8192. Remove the -1 from the RX buffer size allocations and use the new DMA Buffer size directly. [1] Synopsys DesignWare Cores Ethernet MAC Universal v3.70a [section 8.4.2 - Table 8-24] Tested on SoCFPGA Stratix10 with ping sweep from 100 to 8300 byte packets. Fixes: 286a83721720 ("stmmac: add CHAINED descriptor mode support (V4)") Suggested-by: Jose Abreu Signed-off-by: Thor Thayer Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/common.h | 3 ++- drivers/net/ethernet/stmicro/stmmac/descs_com.h | 2 +- drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 2 +- drivers/net/ethernet/stmicro/stmmac/ring_mode.c | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 627fec210e2f..8e2a19616bc9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -340,7 +340,8 @@ struct dma_features { /* GMAC TX FIFO is 8K, Rx FIFO is 16K */ #define BUF_SIZE_16KiB 16384 -#define BUF_SIZE_8KiB 8192 +/* RX Buffer size must be < 8191 and multiple of 4/8/16 bytes */ +#define BUF_SIZE_8KiB 8188 #define BUF_SIZE_4KiB 4096 #define BUF_SIZE_2KiB 2048 diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h index ca9d7e48034c..40d6356a7e73 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h @@ -31,7 +31,7 @@ /* Enhanced descriptors */ static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end) { - p->des1 |= cpu_to_le32(((BUF_SIZE_8KiB - 1) + p->des1 |= cpu_to_le32((BUF_SIZE_8KiB << ERDES1_BUFFER2_SIZE_SHIFT) & ERDES1_BUFFER2_SIZE_MASK); diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 2a828a312814..acd65a4f94d4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -262,7 +262,7 @@ static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, int end) { p->des0 |= cpu_to_le32(RDES0_OWN); - p->des1 |= cpu_to_le32((BUF_SIZE_8KiB - 1) & ERDES1_BUFFER1_SIZE_MASK); + p->des1 |= cpu_to_le32(BUF_SIZE_8KiB & ERDES1_BUFFER1_SIZE_MASK); if (mode == STMMAC_CHAIN_MODE) ehn_desc_rx_set_on_chain(p); diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index 28e4b5d50ce6..1af7b078b94d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -143,7 +143,7 @@ static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p) static int stmmac_set_16kib_bfsize(int mtu) { int ret = 0; - if (unlikely(mtu >= BUF_SIZE_8KiB)) + if (unlikely(mtu > BUF_SIZE_8KiB)) ret = BUF_SIZE_16KiB; return ret; } From c887029c11d0c2bc14da9eb02704669cebab89a9 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 23 Nov 2018 15:25:50 +0900 Subject: [PATCH 0582/2608] zram: close udev startup race condition as default groups commit fef912bf860e upstream. commit 98af4d4df889 upstream. I got a report from Howard Chen that he saw zram and sysfs race(ie, zram block device file is created but sysfs for it isn't yet) when he tried to create new zram devices via hotadd knob. v4.20 kernel fixes it by [1, 2] but it's too large size to merge into -stable so this patch fixes the problem by registering defualt group by Greg KH's approach[3]. This patch should be applied to every stable tree [3.16+] currently existing from kernel.org because the problem was introduced at 2.6.37 by [4]. [1] fef912bf860e, block: genhd: add 'groups' argument to device_add_disk [2] 98af4d4df889, zram: register default groups with device_add_disk() [3] http://kroah.com/log/blog/2013/06/26/how-to-create-a-sysfs-file-correctly/ [4] 33863c21e69e9, Staging: zram: Replace ioctls with sysfs interface Cc: Sergey Senozhatsky Cc: Hannes Reinecke Tested-by: Howard Chen Signed-off-by: Minchan Kim Signed-off-by: Sasha Levin --- drivers/block/zram/zram_drv.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 1e2648e4c286..27b202c64c84 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1491,6 +1491,11 @@ static const struct attribute_group zram_disk_attr_group = { .attrs = zram_disk_attrs, }; +static const struct attribute_group *zram_disk_attr_groups[] = { + &zram_disk_attr_group, + NULL, +}; + /* * Allocate and initialize new zram device. the function returns * '>= 0' device_id upon success, and negative value otherwise. @@ -1568,23 +1573,14 @@ static int zram_add(void) if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); + disk_to_dev(zram->disk)->groups = zram_disk_attr_groups; add_disk(zram->disk); - ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, - &zram_disk_attr_group); - if (ret < 0) { - pr_err("Error creating sysfs group for device %d\n", - device_id); - goto out_free_disk; - } strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); pr_info("Added device: %s\n", zram->disk->disk_name); return device_id; -out_free_disk: - del_gendisk(zram->disk); - put_disk(zram->disk); out_free_queue: blk_cleanup_queue(queue); out_free_idr: @@ -1612,16 +1608,6 @@ static int zram_remove(struct zram *zram) zram->claim = true; mutex_unlock(&bdev->bd_mutex); - /* - * Remove sysfs first, so no one will perform a disksize - * store while we destroy the devices. This also helps during - * hot_remove -- zram_reset_device() is the last holder of - * ->init_lock, no later/concurrent disksize_store() or any - * other sysfs handlers are possible. - */ - sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, - &zram_disk_attr_group); - /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); From 26eff850549b52e43d0bc61f4682952fe2da293d Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 8 Nov 2018 02:04:57 +0000 Subject: [PATCH 0583/2608] SUNRPC: drop pointless static qualifier in xdr_get_next_encode_buffer() [ Upstream commit 025911a5f4e36955498ed50806ad1b02f0f76288 ] There is no need to have the '__be32 *p' variable static since new value always be assigned before use it. Signed-off-by: YueHaibing Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin --- net/sunrpc/xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 13695ba8fc54..4f382805eb9c 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -512,7 +512,7 @@ EXPORT_SYMBOL_GPL(xdr_commit_encode); static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes) { - static __be32 *p; + __be32 *p; int space_left; int frag1bytes, frag2bytes; From 8c209c38d8fa633bfd519476f6b616ee1bc8eb06 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 22 May 2018 14:16:50 +0300 Subject: [PATCH 0584/2608] ACPI / watchdog: Prefer iTCO_wdt always when WDAT table uses RTC SRAM [ Upstream commit 5a802a7a285c8877ca872e44eeb0f06afcb5212f ] After we added quirk for Lenovo Z50-70 it turns out there are at least two more systems where WDAT table includes instructions accessing RTC SRAM. Instead of quirking each system separately, look for such instructions in the table and automatically prefer iTCO_wdt if found. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199033 Reported-by: Arnold Guy Reported-by: Alois Nespor Reported-by: Yury Pakin Reported-by: Ihor Chyhin Signed-off-by: Mika Westerberg Acked-by: Guenter Roeck Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/acpi_watchdog.c | 72 ++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index 4bde16fb97d8..95600309ce42 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -12,35 +12,51 @@ #define pr_fmt(fmt) "ACPI: watchdog: " fmt #include -#include #include #include #include "internal.h" -static const struct dmi_system_id acpi_watchdog_skip[] = { - { - /* - * On Lenovo Z50-70 there are two issues with the WDAT - * table. First some of the instructions use RTC SRAM - * to store persistent information. This does not work well - * with Linux RTC driver. Second, more important thing is - * that the instructions do not actually reset the system. - * - * On this particular system iTCO_wdt seems to work just - * fine so we prefer that over WDAT for now. - * - * See also https://bugzilla.kernel.org/show_bug.cgi?id=199033. - */ - .ident = "Lenovo Z50-70", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "20354"), - DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Z50-70"), - }, - }, - {} -}; +#ifdef CONFIG_RTC_MC146818_LIB +#include + +/* + * There are several systems where the WDAT table is accessing RTC SRAM to + * store persistent information. This does not work well with the Linux RTC + * driver so on those systems we skip WDAT driver and prefer iTCO_wdt + * instead. + * + * See also https://bugzilla.kernel.org/show_bug.cgi?id=199033. + */ +static bool acpi_watchdog_uses_rtc(const struct acpi_table_wdat *wdat) +{ + const struct acpi_wdat_entry *entries; + int i; + + entries = (struct acpi_wdat_entry *)(wdat + 1); + for (i = 0; i < wdat->entries; i++) { + const struct acpi_generic_address *gas; + + gas = &entries[i].register_region; + if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { + switch (gas->address) { + case RTC_PORT(0): + case RTC_PORT(1): + case RTC_PORT(2): + case RTC_PORT(3): + return true; + } + } + } + + return false; +} +#else +static bool acpi_watchdog_uses_rtc(const struct acpi_table_wdat *wdat) +{ + return false; +} +#endif static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void) { @@ -50,9 +66,6 @@ static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void) if (acpi_disabled) return NULL; - if (dmi_check_system(acpi_watchdog_skip)) - return NULL; - status = acpi_get_table(ACPI_SIG_WDAT, 0, (struct acpi_table_header **)&wdat); if (ACPI_FAILURE(status)) { @@ -60,6 +73,11 @@ static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void) return NULL; } + if (acpi_watchdog_uses_rtc(wdat)) { + pr_info("Skipping WDAT on this system because it uses RTC SRAM\n"); + return NULL; + } + return wdat; } From e404a6294a31b2b68cecdd2e89d61cc6ed87dddd Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Nov 2018 15:52:43 +0200 Subject: [PATCH 0585/2608] perf machine: Add machine__is() to identify machine arch commit dbbd34a666ee117d0e39e71a47f38f02c4a5c698 upstream. Add a function to identify the machine architecture. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Andy Lutomirski Cc: Dave Hansen Cc: H. Peter Anvin Cc: Joerg Roedel Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1526548928-20790-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/env.c | 19 +++++++++++++++++++ tools/perf/util/env.h | 3 +++ tools/perf/util/machine.c | 9 +++++++++ tools/perf/util/machine.h | 2 ++ 4 files changed, 33 insertions(+) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6276b340f893..49f58921a968 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -3,6 +3,7 @@ #include "env.h" #include "util.h" #include +#include struct perf_env perf_env; @@ -87,6 +88,24 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) return 0; } +static int perf_env__read_arch(struct perf_env *env) +{ + struct utsname uts; + + if (env->arch) + return 0; + + if (!uname(&uts)) + env->arch = strdup(uts.machine); + + return env->arch ? 0 : -ENOMEM; +} + +const char *perf_env__raw_arch(struct perf_env *env) +{ + return env && !perf_env__read_arch(env) ? env->arch : "unknown"; +} + void cpu_cache_level__free(struct cpu_cache_level *cache) { free(cache->type); diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1eb35b190b34..bd3869913907 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -65,4 +65,7 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); + +const char *perf_env__raw_arch(struct perf_env *env); + #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index bd5d5b5e2218..2af879693fbe 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2238,6 +2238,15 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, return 0; } +/* + * Compares the raw arch string. N.B. see instead perf_env__arch() if a + * normalized arch is needed. + */ +bool machine__is(struct machine *machine, const char *arch) +{ + return machine && !strcmp(perf_env__raw_arch(machine->env), arch); +} + int machine__get_kernel_start(struct machine *machine) { struct map *map = machine__kernel_map(machine); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index d551aa80a59b..fbc5133fb27c 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -169,6 +169,8 @@ static inline bool machine__is_host(struct machine *machine) return machine ? machine->pid == HOST_KERNEL_ID : false; } +bool machine__is(struct machine *machine, const char *arch); + struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); From 4edc059bac38f8cb59a8e539fb36fe0b8d19c55e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Nov 2018 15:52:44 +0200 Subject: [PATCH 0586/2608] perf tools: Fix kernel_start for PTI on x86 commit 19422a9f2a3be7f3a046285ffae4cbb571aa853a upstream. On x86_64, PTI entry trampolines are less than the start of kernel text, but still above 2^63. So leave kernel_start = 1ULL << 63 for x86_64. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Andy Lutomirski Cc: Dave Hansen Cc: H. Peter Anvin Cc: Joerg Roedel Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1526548928-20790-7-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/machine.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 2af879693fbe..af18c3d55642 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2263,7 +2263,12 @@ int machine__get_kernel_start(struct machine *machine) machine->kernel_start = 1ULL << 63; if (map) { err = map__load(map); - if (!err) + /* + * On x86_64, PTI entry trampolines are less than the + * start of kernel text, but still above 2^63. So leave + * kernel_start = 1ULL << 63 for x86_64. + */ + if (!err && !machine__is(machine, "x86_64")) machine->kernel_start = map->start; } return err; From 5d390059eab4bc9b3bfcba01a214ab873514f578 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Nov 2018 15:52:45 +0200 Subject: [PATCH 0587/2608] perf machine: Add nr_cpus_avail() commit 9cecca325ea879c84fcd31a5e609a514c1a1dbd1 upstream. Add a function to return the number of the machine's available CPUs. Signed-off-by: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Andy Lutomirski Cc: Dave Hansen Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Joerg Roedel Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1526986485-6562-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/env.c | 13 +++++++++++++ tools/perf/util/env.h | 1 + tools/perf/util/machine.c | 5 +++++ tools/perf/util/machine.h | 1 + 4 files changed, 20 insertions(+) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 49f58921a968..b492cb974aa0 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -101,11 +101,24 @@ static int perf_env__read_arch(struct perf_env *env) return env->arch ? 0 : -ENOMEM; } +static int perf_env__read_nr_cpus_avail(struct perf_env *env) +{ + if (env->nr_cpus_avail == 0) + env->nr_cpus_avail = cpu__max_present_cpu(); + + return env->nr_cpus_avail ? 0 : -ENOENT; +} + const char *perf_env__raw_arch(struct perf_env *env) { return env && !perf_env__read_arch(env) ? env->arch : "unknown"; } +int perf_env__nr_cpus_avail(struct perf_env *env) +{ + return env && !perf_env__read_nr_cpus_avail(env) ? env->nr_cpus_avail : 0; +} + void cpu_cache_level__free(struct cpu_cache_level *cache) { free(cache->type); diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index bd3869913907..9aace8452751 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -67,5 +67,6 @@ int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); const char *perf_env__raw_arch(struct perf_env *env); +int perf_env__nr_cpus_avail(struct perf_env *env); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index af18c3d55642..78aa1c5f19ca 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2247,6 +2247,11 @@ bool machine__is(struct machine *machine, const char *arch) return machine && !strcmp(perf_env__raw_arch(machine->env), arch); } +int machine__nr_cpus_avail(struct machine *machine) +{ + return machine ? perf_env__nr_cpus_avail(machine->env) : 0; +} + int machine__get_kernel_start(struct machine *machine) { struct map *map = machine__kernel_map(machine); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index fbc5133fb27c..245743d9ce63 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -170,6 +170,7 @@ static inline bool machine__is_host(struct machine *machine) } bool machine__is(struct machine *machine, const char *arch); +int machine__nr_cpus_avail(struct machine *machine); struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); From ce41e5fc90d4039907046e10b8d8dd7206ec36a4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Nov 2018 15:52:46 +0200 Subject: [PATCH 0588/2608] perf machine: Workaround missing maps for x86 PTI entry trampolines commit 4d99e4136580d178e3523281a820be17bf814bf8 upstream. On x86_64 the PTI entry trampolines are not in the kernel map created by perf tools. That results in the addresses having no symbols and prevents annotation. It also causes Intel PT to have decoding errors at the trampoline addresses. Workaround that by creating maps for the trampolines. At present the kernel does not export information revealing where the trampolines are. Until that happens, the addresses are hardcoded. Signed-off-by: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Andy Lutomirski Cc: Dave Hansen Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Joerg Roedel Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1526986485-6562-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/machine.c | 96 +++++++++++++++++++++++++++++++++++++++ tools/perf/util/machine.h | 3 ++ tools/perf/util/symbol.c | 12 +++-- 3 files changed, 106 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 78aa1c5f19ca..968fd0454e6b 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -818,6 +818,102 @@ static int machine__get_running_kernel_start(struct machine *machine, return 0; } +/* Kernel-space maps for symbols that are outside the main kernel map and module maps */ +struct extra_kernel_map { + u64 start; + u64 end; + u64 pgoff; +}; + +static int machine__create_extra_kernel_map(struct machine *machine, + struct dso *kernel, + struct extra_kernel_map *xm) +{ + struct kmap *kmap; + struct map *map; + + map = map__new2(xm->start, kernel, MAP__FUNCTION); + if (!map) + return -1; + + map->end = xm->end; + map->pgoff = xm->pgoff; + + kmap = map__kmap(map); + + kmap->kmaps = &machine->kmaps; + + map_groups__insert(&machine->kmaps, map); + + pr_debug2("Added extra kernel map %" PRIx64 "-%" PRIx64 "\n", + map->start, map->end); + + map__put(map); + + return 0; +} + +static u64 find_entry_trampoline(struct dso *dso) +{ + /* Duplicates are removed so lookup all aliases */ + const char *syms[] = { + "_entry_trampoline", + "__entry_trampoline_start", + "entry_SYSCALL_64_trampoline", + }; + struct symbol *sym = dso__first_symbol(dso, MAP__FUNCTION); + unsigned int i; + + for (; sym; sym = dso__next_symbol(sym)) { + if (sym->binding != STB_GLOBAL) + continue; + for (i = 0; i < ARRAY_SIZE(syms); i++) { + if (!strcmp(sym->name, syms[i])) + return sym->start; + } + } + + return 0; +} + +/* + * These values can be used for kernels that do not have symbols for the entry + * trampolines in kallsyms. + */ +#define X86_64_CPU_ENTRY_AREA_PER_CPU 0xfffffe0000000000ULL +#define X86_64_CPU_ENTRY_AREA_SIZE 0x2c000 +#define X86_64_ENTRY_TRAMPOLINE 0x6000 + +/* Map x86_64 PTI entry trampolines */ +int machine__map_x86_64_entry_trampolines(struct machine *machine, + struct dso *kernel) +{ + u64 pgoff = find_entry_trampoline(kernel); + int nr_cpus_avail, cpu; + + if (!pgoff) + return 0; + + nr_cpus_avail = machine__nr_cpus_avail(machine); + + /* Add a 1 page map for each CPU's entry trampoline */ + for (cpu = 0; cpu < nr_cpus_avail; cpu++) { + u64 va = X86_64_CPU_ENTRY_AREA_PER_CPU + + cpu * X86_64_CPU_ENTRY_AREA_SIZE + + X86_64_ENTRY_TRAMPOLINE; + struct extra_kernel_map xm = { + .start = va, + .end = va + page_size, + .pgoff = pgoff, + }; + + if (machine__create_extra_kernel_map(machine, kernel, &xm) < 0) + return -1; + } + + return 0; +} + int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) { int type; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 245743d9ce63..13041b036a5b 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -266,4 +266,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, */ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp); +int machine__map_x86_64_entry_trampolines(struct machine *machine, + struct dso *kernel); + #endif /* __PERF_MACHINE_H */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index ec40e47aa198..3936f69f385c 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1513,20 +1513,22 @@ int dso__load(struct dso *dso, struct map *map) goto out; } + if (map->groups && map->groups->machine) + machine = map->groups->machine; + else + machine = NULL; + if (dso->kernel) { if (dso->kernel == DSO_TYPE_KERNEL) ret = dso__load_kernel_sym(dso, map); else if (dso->kernel == DSO_TYPE_GUEST_KERNEL) ret = dso__load_guest_kernel_sym(dso, map); + if (machine__is(machine, "x86_64")) + machine__map_x86_64_entry_trampolines(machine, dso); goto out; } - if (map->groups && map->groups->machine) - machine = map->groups->machine; - else - machine = NULL; - dso->adjust_symbols = 0; if (perfmap) { From 09917457494163dec35a74745ff04184385c8d32 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Nov 2018 15:52:47 +0200 Subject: [PATCH 0589/2608] perf test code-reading: Fix perf_env setup for PTI entry trampolines commit f6c66d73bb8192d357bb5fb8cd5826920f811d8c upstream. The "Object code reading" test will not create maps for the PTI entry trampolines unless the machine environment exists to show that the arch is x86_64. Signed-off-by: Adrian Hunter Reported-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1528183800-21577-1-git-send-email-adrian.hunter@intel.com [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/code-reading.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index fcc8984bc329..acad8ba06d77 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -527,6 +527,7 @@ static int do_test_code_reading(bool try_kcore) pid = getpid(); machine = machine__new_host(); + machine->env = &perf_env; ret = machine__create_kernel_maps(machine); if (ret < 0) { From d412ab7cfc0de3c01befb5d32746eaf512929bde Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 26 Oct 2018 15:28:54 +0300 Subject: [PATCH 0590/2608] x86/mm: Move LDT remap out of KASLR region on 5-level paging commit d52888aa2753e3063a9d3a0c9f72f94aa9809c15 upstream On 5-level paging the LDT remap area is placed in the middle of the KASLR randomization region and it can overlap with the direct mapping, the vmalloc or the vmap area. The LDT mapping is per mm, so it cannot be moved into the P4D page table next to the CPU_ENTRY_AREA without complicating PGD table allocation for 5-level paging. The 4 PGD slot gap just before the direct mapping is reserved for hypervisors, so it cannot be used. Move the direct mapping one slot deeper and use the resulting gap for the LDT remap area. The resulting layout is the same for 4 and 5 level paging. [ tglx: Massaged changelog ] Fixes: f55f0501cbf6 ("x86/pti: Put the LDT in its own PGD if PTI is on") Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Cc: bp@alien8.de Cc: hpa@zytor.com Cc: dave.hansen@linux.intel.com Cc: peterz@infradead.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: bhe@redhat.com Cc: willy@infradead.org Cc: linux-mm@kvack.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181026122856.66224-2-kirill.shutemov@linux.intel.com Signed-off-by: Sasha Levin --- Documentation/x86/x86_64/mm.txt | 10 ++++++---- arch/x86/include/asm/page_64_types.h | 12 +++++++----- arch/x86/include/asm/pgtable_64_types.h | 7 +++---- arch/x86/xen/mmu_pv.c | 6 +++--- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index ea91cb61a602..43f066cde67d 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -4,8 +4,9 @@ Virtual memory map with 4 level page tables: 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [47:63] sign extension ffff800000000000 - ffff87ffffffffff (=43 bits) guard hole, reserved for hypervisor -ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory -ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole +ffff880000000000 - ffff887fffffffff (=39 bits) LDT remap for PTI +ffff888000000000 - ffffc87fffffffff (=64 TB) direct mapping of all phys. memory +ffffc88000000000 - ffffc8ffffffffff (=39 bits) hole ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) @@ -30,8 +31,9 @@ Virtual memory map with 5 level page tables: 0000000000000000 - 00ffffffffffffff (=56 bits) user space, different per mm hole caused by [56:63] sign extension ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor -ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory -ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI +ff10000000000000 - ff10ffffffffffff (=48 bits) LDT remap for PTI +ff11000000000000 - ff90ffffffffffff (=55 bits) direct mapping of all phys. memory +ff91000000000000 - ff9fffffffffffff (=3840 TB) hole ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB) ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index e1407312c412..74d531f6d518 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -33,14 +33,16 @@ /* * Set __PAGE_OFFSET to the most negative possible address + - * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a - * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's - * what Xen requires. + * PGDIR_SIZE*17 (pgd slot 273). + * + * The gap is to allow a space for LDT remap for PTI (1 pgd slot) and space for + * a hypervisor (16 slots). Choosing 16 slots for a hypervisor is arbitrary, + * but it's what Xen requires. */ #ifdef CONFIG_X86_5LEVEL -#define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL) +#define __PAGE_OFFSET_BASE _AC(0xff11000000000000, UL) #else -#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL) +#define __PAGE_OFFSET_BASE _AC(0xffff888000000000, UL) #endif #ifdef CONFIG_RANDOMIZE_MEMORY diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6b8f73dcbc2c..7764617b8f9c 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -88,16 +88,15 @@ typedef struct { pteval_t pte; } pte_t; # define VMALLOC_SIZE_TB _AC(12800, UL) # define __VMALLOC_BASE _AC(0xffa0000000000000, UL) # define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) -# define LDT_PGD_ENTRY _AC(-112, UL) -# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #else # define VMALLOC_SIZE_TB _AC(32, UL) # define __VMALLOC_BASE _AC(0xffffc90000000000, UL) # define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) -# define LDT_PGD_ENTRY _AC(-3, UL) -# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #endif +#define LDT_PGD_ENTRY -240UL +#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) + #ifdef CONFIG_RANDOMIZE_MEMORY # define VMALLOC_START vmalloc_base # define VMEMMAP_START vmemmap_base diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 8ed11a5b1a9d..b33fa127a613 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1869,7 +1869,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) init_top_pgt[0] = __pgd(0); /* Pre-constructed entries are in pfn, so convert to mfn */ - /* L4[272] -> level3_ident_pgt */ + /* L4[273] -> level3_ident_pgt */ /* L4[511] -> level3_kernel_pgt */ convert_pfn_mfn(init_top_pgt); @@ -1889,8 +1889,8 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) addr[0] = (unsigned long)pgd; addr[1] = (unsigned long)l3; addr[2] = (unsigned long)l2; - /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: - * Both L4[272][0] and L4[511][510] have entries that point to the same + /* Graft it onto L4[273][0]. Note that we creating an aliasing problem: + * Both L4[273][0] and L4[511][510] have entries that point to the same * L2 (PMD) tables. Meaning that if you modify it in __va space * it will be also modified in the __ka space! (But if you just * modify the PMD table to point to other PTE's or none, then you From a17989cb9bb03c75bb57dddc0f044100fd4d856e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 26 Oct 2018 15:28:55 +0300 Subject: [PATCH 0591/2608] x86/ldt: Unmap PTEs for the slot before freeing LDT pages commit a0e6e0831c516860fc7f9be1db6c081fe902ebcf upstream modify_ldt(2) leaves the old LDT mapped after switching over to the new one. The old LDT gets freed and the pages can be re-used. Leaving the mapping in place can have security implications. The mapping is present in the userspace page tables and Meltdown-like attacks can read these freed and possibly reused pages. It's relatively simple to fix: unmap the old LDT and flush TLB before freeing the old LDT memory. This further allows to avoid flushing the TLB in map_ldt_struct() as the slot is unmapped and flushed by unmap_ldt_struct() or has never been mapped at all. [ tglx: Massaged changelog and removed the needless line breaks ] Fixes: f55f0501cbf6 ("x86/pti: Put the LDT in its own PGD if PTI is on") Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Cc: bp@alien8.de Cc: hpa@zytor.com Cc: dave.hansen@linux.intel.com Cc: luto@kernel.org Cc: peterz@infradead.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: bhe@redhat.com Cc: willy@infradead.org Cc: linux-mm@kvack.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181026122856.66224-3-kirill.shutemov@linux.intel.com Signed-off-by: Sasha Levin --- arch/x86/kernel/ldt.c | 49 +++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 26d713ecad34..65df298d4e9e 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -103,14 +103,6 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) /* * If PTI is enabled, this maps the LDT into the kernelmode and * usermode tables for the given mm. - * - * There is no corresponding unmap function. Even if the LDT is freed, we - * leave the PTEs around until the slot is reused or the mm is destroyed. - * This is harmless: the LDT is always in ordinary memory, and no one will - * access the freed slot. - * - * If we wanted to unmap freed LDTs, we'd also need to do a flush to make - * it useful, and the flush would slow down modify_ldt(). */ static int map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) @@ -119,8 +111,8 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) bool is_vmalloc, had_top_level_entry; unsigned long va; spinlock_t *ptl; + int i, nr_pages; pgd_t *pgd; - int i; if (!static_cpu_has(X86_FEATURE_PTI)) return 0; @@ -141,7 +133,9 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) is_vmalloc = is_vmalloc_addr(ldt->entries); - for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) { + nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); + + for (i = 0; i < nr_pages; i++) { unsigned long offset = i << PAGE_SHIFT; const void *src = (char *)ldt->entries + offset; unsigned long pfn; @@ -189,14 +183,42 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) } } - va = (unsigned long)ldt_slot_va(slot); - flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0); - ldt->slot = slot; #endif return 0; } +static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + unsigned long va; + int i, nr_pages; + + if (!ldt) + return; + + /* LDT map/unmap is only required for PTI */ + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); + + for (i = 0; i < nr_pages; i++) { + unsigned long offset = i << PAGE_SHIFT; + spinlock_t *ptl; + pte_t *ptep; + + va = (unsigned long)ldt_slot_va(ldt->slot) + offset; + ptep = get_locked_pte(mm, va, &ptl); + pte_clear(mm, va, ptep); + pte_unmap_unlock(ptep, ptl); + } + + va = (unsigned long)ldt_slot_va(ldt->slot); + flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, 0); +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ +} + static void free_ldt_pgtables(struct mm_struct *mm) { #ifdef CONFIG_PAGE_TABLE_ISOLATION @@ -433,6 +455,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) } install_ldt(mm, new_ldt); + unmap_ldt_struct(mm, old_ldt); free_ldt_struct(old_ldt); error = 0; From 817266cb8d45b74a695fb8a27e12b17cf087d7b1 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 5 Nov 2018 09:35:44 -0500 Subject: [PATCH 0592/2608] media: v4l: event: Add subscription to list before calling "add" operation commit 92539d3eda2c090b382699bbb896d4b54e9bdece upstream. Patch ad608fbcf166 changed how events were subscribed to address an issue elsewhere. As a side effect of that change, the "add" callback was called before the event subscription was added to the list of subscribed events, causing the first event queued by the add callback (and possibly other events arriving soon afterwards) to be lost. Fix this by adding the subscription to the list before calling the "add" callback, and clean up afterwards if that fails. Fixes: ad608fbcf166 ("media: v4l: event: Prevent freeing event subscriptions while accessed") Reported-by: Dave Stevenson Signed-off-by: Sakari Ailus Tested-by: Dave Stevenson Reviewed-by: Hans Verkuil Tested-by: Hans Verkuil Cc: stable@vger.kernel.org (for 4.14 and up) Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-event.c | 43 ++++++++++++++++------------ 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-event.c b/drivers/media/v4l2-core/v4l2-event.c index 568dd4affb33..011907eff660 100644 --- a/drivers/media/v4l2-core/v4l2-event.c +++ b/drivers/media/v4l2-core/v4l2-event.c @@ -193,6 +193,22 @@ int v4l2_event_pending(struct v4l2_fh *fh) } EXPORT_SYMBOL_GPL(v4l2_event_pending); +static void __v4l2_event_unsubscribe(struct v4l2_subscribed_event *sev) +{ + struct v4l2_fh *fh = sev->fh; + unsigned int i; + + lockdep_assert_held(&fh->subscribe_lock); + assert_spin_locked(&fh->vdev->fh_lock); + + /* Remove any pending events for this subscription */ + for (i = 0; i < sev->in_use; i++) { + list_del(&sev->events[sev_pos(sev, i)].list); + fh->navailable--; + } + list_del(&sev->list); +} + int v4l2_event_subscribe(struct v4l2_fh *fh, const struct v4l2_event_subscription *sub, unsigned elems, const struct v4l2_subscribed_event_ops *ops) @@ -225,27 +241,23 @@ int v4l2_event_subscribe(struct v4l2_fh *fh, spin_lock_irqsave(&fh->vdev->fh_lock, flags); found_ev = v4l2_event_subscribed(fh, sub->type, sub->id); + if (!found_ev) + list_add(&sev->list, &fh->subscribed); spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); if (found_ev) { /* Already listening */ kvfree(sev); - goto out_unlock; - } - - if (sev->ops && sev->ops->add) { + } else if (sev->ops && sev->ops->add) { ret = sev->ops->add(sev, elems); if (ret) { + spin_lock_irqsave(&fh->vdev->fh_lock, flags); + __v4l2_event_unsubscribe(sev); + spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); kvfree(sev); - goto out_unlock; } } - spin_lock_irqsave(&fh->vdev->fh_lock, flags); - list_add(&sev->list, &fh->subscribed); - spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); - -out_unlock: mutex_unlock(&fh->subscribe_lock); return ret; @@ -280,7 +292,6 @@ int v4l2_event_unsubscribe(struct v4l2_fh *fh, { struct v4l2_subscribed_event *sev; unsigned long flags; - int i; if (sub->type == V4L2_EVENT_ALL) { v4l2_event_unsubscribe_all(fh); @@ -292,14 +303,8 @@ int v4l2_event_unsubscribe(struct v4l2_fh *fh, spin_lock_irqsave(&fh->vdev->fh_lock, flags); sev = v4l2_event_subscribed(fh, sub->type, sub->id); - if (sev != NULL) { - /* Remove any pending events for this subscription */ - for (i = 0; i < sev->in_use; i++) { - list_del(&sev->events[sev_pos(sev, i)].list); - fh->navailable--; - } - list_del(&sev->list); - } + if (sev != NULL) + __v4l2_event_unsubscribe(sev); spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); From 2fc77700daa9f2faf7e61f46c1d784fd85867dfc Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 11 Nov 2018 00:06:12 +0200 Subject: [PATCH 0593/2608] MIPS: OCTEON: cavium_octeon_defconfig: re-enable OCTEON USB driver commit 82fba2df7f7c019627f24c5036dc99f41731d770 upstream. Re-enable OCTEON USB driver which is needed on older hardware (e.g. EdgeRouter Lite) for mass storage etc. This got accidentally deleted when config options were changed for OCTEON2/3 USB. Signed-off-by: Aaro Koskinen Signed-off-by: Paul Burton Fixes: f922bc0ad08b ("MIPS: Octeon: cavium_octeon_defconfig: Enable more drivers") Patchwork: https://patchwork.linux-mips.org/patch/21077/ Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 4.14+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/configs/cavium_octeon_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig index 490b12af103c..c52d0efacd14 100644 --- a/arch/mips/configs/cavium_octeon_defconfig +++ b/arch/mips/configs/cavium_octeon_defconfig @@ -140,6 +140,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_STAGING=y CONFIG_OCTEON_ETHERNET=y +CONFIG_OCTEON_USB=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_RAS=y CONFIG_EXT4_FS=y From f6a6ae4e0f345aa481535bfe2046cd33f4dc37b8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Oct 2018 10:19:51 +0300 Subject: [PATCH 0594/2608] uio: Fix an Oops on load commit 432798195bbce1f8cd33d1c0284d0538835e25fb upstream. I was trying to solve a double free but I introduced a more serious NULL dereference bug. The problem is that if there is an IRQ which triggers immediately, then we need "info->uio_dev" but it's not set yet. This patch puts the original initialization back to how it was and just sets info->uio_dev to NULL on the error path so it should solve both the Oops and the double free. Fixes: f019f07ecf6a ("uio: potential double frees if __uio_register_device() fails") Reported-by: Mathias Thore Signed-off-by: Dan Carpenter Cc: stable Tested-by: Mathias Thore Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 0a730136646d..654579bc1e54 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -850,6 +850,8 @@ int __uio_register_device(struct module *owner, if (ret) goto err_uio_dev_add_attributes; + info->uio_dev = idev; + if (info->irq && (info->irq != UIO_IRQ_CUSTOM)) { /* * Note that we deliberately don't use devm_request_irq @@ -861,11 +863,12 @@ int __uio_register_device(struct module *owner, */ ret = request_irq(info->irq, uio_interrupt, info->irq_flags, info->name, idev); - if (ret) + if (ret) { + info->uio_dev = NULL; goto err_request_irq; + } } - info->uio_dev = idev; return 0; err_request_irq: From 578f05a757bf76e828fe244994436947187bcfe3 Mon Sep 17 00:00:00 2001 From: Maarten Jacobs Date: Mon, 19 Nov 2018 23:18:49 +0000 Subject: [PATCH 0595/2608] usb: cdc-acm: add entry for Hiro (Conexant) modem commit 63529eaa6164ef7ab4b907b25ac3648177e5e78f upstream. The cdc-acm kernel module currently does not support the Hiro (Conexant) H05228 USB modem. The patch below adds the device specific information: idVendor 0x0572 idProduct 0x1349 Signed-off-by: Maarten Jacobs Acked-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index e41d00bc7e97..5a8ef83a5c5c 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1724,6 +1724,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0572, 0x1328), /* Shiro / Aztech USB MODEM UM-3100 */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, + { USB_DEVICE(0x0572, 0x1349), /* Hiro (Conexant) USB MODEM H50228 */ + .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ + }, { USB_DEVICE(0x20df, 0x0001), /* Simtec Electronics Entropy Key */ .driver_info = QUIRK_CONTROL_LINE_STATE, }, { USB_DEVICE(0x2184, 0x001c) }, /* GW Instek AFG-2225 */ From d7385bcf1887642ac0f0c7c6b82efc0d3c3c5f45 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 26 Oct 2018 13:33:15 +0800 Subject: [PATCH 0596/2608] USB: quirks: Add no-lpm quirk for Raydium touchscreens commit deefd24228a172d1b27d4a9adbfd2cdacd60ae64 upstream. Raydium USB touchscreen fails to set config if LPM is enabled: [ 2.030658] usb 1-8: New USB device found, idVendor=2386, idProduct=3119 [ 2.030659] usb 1-8: New USB device strings: Mfr=1, Product=2, SerialNumber=0 [ 2.030660] usb 1-8: Product: Raydium Touch System [ 2.030661] usb 1-8: Manufacturer: Raydium Corporation [ 7.132209] usb 1-8: can't set config #1, error -110 Same behavior can be observed on 2386:3114. Raydium claims the touchscreen supports LPM under Windows, so I used Microsoft USB Test Tools (MUTT) [1] to check its LPM status. MUTT shows that the LPM doesn't work under Windows, either. So let's just disable LPM for Raydium touchscreens. [1] https://docs.microsoft.com/en-us/windows-hardware/drivers/usbcon/usb-test-tools Signed-off-by: Kai-Heng Feng Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 37a5e07b3488..dc753a8b2418 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -263,6 +263,11 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x2040, 0x7200), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* Raydium Touchscreen */ + { USB_DEVICE(0x2386, 0x3114), .driver_info = USB_QUIRK_NO_LPM }, + + { USB_DEVICE(0x2386, 0x3119), .driver_info = USB_QUIRK_NO_LPM }, + /* DJI CineSSD */ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM }, From 041fa1f61997142e52331eee0eba84ffdbf85bd2 Mon Sep 17 00:00:00 2001 From: Emmanuel Pescosta Date: Fri, 26 Oct 2018 14:48:09 +0200 Subject: [PATCH 0597/2608] usb: quirks: Add delay-init quirk for Corsair K70 LUX RGB commit a77112577667cbda7c6292c52d909636aef31fd9 upstream. Following on from this patch: https://lkml.org/lkml/2017/11/3/516, Corsair K70 LUX RGB keyboards also require the DELAY_INIT quirk to start correctly at boot. Dmesg output: usb 1-6: string descriptor 0 read error: -110 usb 1-6: New USB device found, idVendor=1b1c, idProduct=1b33 usb 1-6: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 1-6: can't set config #1, error -110 Signed-off-by: Emmanuel Pescosta Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index dc753a8b2418..1e8f68960014 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -243,6 +243,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT | USB_QUIRK_DELAY_CTRL_MSG }, + /* Corsair K70 LUX RGB */ + { USB_DEVICE(0x1b1c, 0x1b33), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Corsair K70 LUX */ { USB_DEVICE(0x1b1c, 0x1b36), .driver_info = USB_QUIRK_DELAY_INIT }, From 939936e6015d007f6ef2e344f5e2ec809074c987 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 17 Oct 2018 10:09:02 -0700 Subject: [PATCH 0598/2608] misc: atmel-ssc: Fix section annotation on atmel_ssc_get_driver_data commit 7c97301285b62a41d6bceded7d964085fc8cc50f upstream. After building the kernel with Clang, the following section mismatch warning appears: WARNING: vmlinux.o(.text+0x3bf19a6): Section mismatch in reference from the function ssc_probe() to the function .init.text:atmel_ssc_get_driver_data() The function ssc_probe() references the function __init atmel_ssc_get_driver_data(). This is often because ssc_probe lacks a __init annotation or the annotation of atmel_ssc_get_driver_data is wrong. Remove __init from atmel_ssc_get_driver_data to get rid of the mismatch. Signed-off-by: Nathan Chancellor Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/misc/atmel-ssc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c index b2a0340f277e..d8e3cc2dc747 100644 --- a/drivers/misc/atmel-ssc.c +++ b/drivers/misc/atmel-ssc.c @@ -132,7 +132,7 @@ static const struct of_device_id atmel_ssc_dt_ids[] = { MODULE_DEVICE_TABLE(of, atmel_ssc_dt_ids); #endif -static inline const struct atmel_ssc_platform_data * __init +static inline const struct atmel_ssc_platform_data * atmel_ssc_get_driver_data(struct platform_device *pdev) { if (pdev->dev.of_node) { From fb37c765e28d778f53c66c7727254f8ac2f047ea Mon Sep 17 00:00:00 2001 From: Mattias Jacobsson <2pi@mok.nu> Date: Sun, 21 Oct 2018 11:25:37 +0200 Subject: [PATCH 0599/2608] USB: misc: appledisplay: add 20" Apple Cinema Display commit f6501f49199097b99e4e263644d88c90d1ec1060 upstream. Add another Apple Cinema Display to the list of supported displays Signed-off-by: Mattias Jacobsson <2pi@mok.nu> Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/appledisplay.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c index 8efdc500e790..288fe3e69d52 100644 --- a/drivers/usb/misc/appledisplay.c +++ b/drivers/usb/misc/appledisplay.c @@ -63,6 +63,7 @@ static const struct usb_device_id appledisplay_table[] = { { APPLEDISPLAY_DEVICE(0x9219) }, { APPLEDISPLAY_DEVICE(0x921c) }, { APPLEDISPLAY_DEVICE(0x921d) }, + { APPLEDISPLAY_DEVICE(0x9222) }, { APPLEDISPLAY_DEVICE(0x9236) }, /* Terminating entry */ From 16c233b9b6c8758207f0079737bf36bda9cd9094 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 12:59:44 +0200 Subject: [PATCH 0600/2608] drivers/misc/sgi-gru: fix Spectre v1 vulnerability commit fee05f455ceb5c670cbe48e2f9454ebc4a388554 upstream. req.gid can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: vers/misc/sgi-gru/grukdump.c:200 gru_dump_chiplet_request() warn: potential spectre issue 'gru_base' [w] Fix this by sanitizing req.gid before calling macro GID_TO_GRU, which uses it to index gru_base. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman --- drivers/misc/sgi-gru/grukdump.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c index 313da3150262..1540a7785e14 100644 --- a/drivers/misc/sgi-gru/grukdump.c +++ b/drivers/misc/sgi-gru/grukdump.c @@ -27,6 +27,9 @@ #include #include #include + +#include + #include "gru.h" #include "grutables.h" #include "gruhandles.h" @@ -196,6 +199,7 @@ int gru_dump_chiplet_request(unsigned long arg) /* Currently, only dump by gid is implemented */ if (req.gid >= gru_max_gids) return -EINVAL; + req.gid = array_index_nospec(req.gid, gru_max_gids); gru = GID_TO_GRU(req.gid); ubuf = req.buf; From 3cf1a2b4e0b3eae21abd25d535142e7b6ac3209c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 19 Nov 2018 19:06:01 +0100 Subject: [PATCH 0601/2608] ACPI / platform: Add SMB0001 HID to forbidden_id_list commit 2bbb5fa37475d7aa5fa62f34db1623f3da2dfdfa upstream. Many HP AMD based laptops contain an SMB0001 device like this: Device (SMBD) { Name (_HID, "SMB0001") // _HID: Hardware ID Name (_CRS, ResourceTemplate () // _CRS: Current Resource Settings { IO (Decode16, 0x0B20, // Range Minimum 0x0B20, // Range Maximum 0x20, // Alignment 0x20, // Length ) IRQ (Level, ActiveLow, Shared, ) {7} }) } The legacy style IRQ resource here causes acpi_dev_get_irqresource() to be called with legacy=true and this message to show in dmesg: ACPI: IRQ 7 override to edge, high This causes issues when later on the AMD0030 GPIO device gets enumerated: Device (GPIO) { Name (_HID, "AMDI0030") // _HID: Hardware ID Name (_CID, "AMDI0030") // _CID: Compatible ID Name (_UID, Zero) // _UID: Unique ID Method (_CRS, 0, NotSerialized) // _CRS: Current Resource Settings { Name (RBUF, ResourceTemplate () { Interrupt (ResourceConsumer, Level, ActiveLow, Shared, ,, ) { 0x00000007, } Memory32Fixed (ReadWrite, 0xFED81500, // Address Base 0x00000400, // Address Length ) }) Return (RBUF) /* \_SB_.GPIO._CRS.RBUF */ } } Now acpi_dev_get_irqresource() gets called with legacy=false, but because of the earlier override of the trigger-type acpi_register_gsi() returns -EBUSY (because we try to register the same interrupt with a different trigger-type) and we end up setting IORESOURCE_DISABLED in the flags. The setting of IORESOURCE_DISABLED causes platform_get_irq() to call acpi_irq_get() which is not implemented on x86 and returns -EINVAL. resulting in the following in dmesg: amd_gpio AMDI0030:00: Failed to get gpio IRQ: -22 amd_gpio: probe of AMDI0030:00 failed with error -22 The SMB0001 is a "virtual" device in the sense that the only way the OS interacts with it is through calling a couple of methods to do SMBus transfers. As such it is weird that it has IO and IRQ resources at all, because the driver for it is not expected to ever access the hardware directly. The Linux driver for the SMB0001 device directly binds to the acpi_device through the acpi_bus, so we do not need to instantiate a platform_device for this ACPI device. This commit adds the SMB0001 HID to the forbidden_id_list, avoiding the instantiating of a platform_device for it. Not instantiating a platform_device means we will no longer call acpi_dev_get_irqresource() for the legacy IRQ resource fixing the probe of the AMDI0030 device failing. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1644013 BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=198715 BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=199523 Reported-by: Lukas Kahnert Tested-by: Marc Cc: All applicable Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index 88cd949003f3..ecd84d910ed2 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -30,6 +30,7 @@ static const struct acpi_device_id forbidden_id_list[] = { {"PNP0200", 0}, /* AT DMA Controller */ {"ACPI0009", 0}, /* IOxAPIC */ {"ACPI000A", 0}, /* IOAPIC */ + {"SMB0001", 0}, /* ACPI SMBUS virtual device */ {"", 0}, }; From 540f89376ba2949f53ed6f057fea321be848cd43 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 14 Nov 2018 13:55:09 -0800 Subject: [PATCH 0602/2608] HID: uhid: forbid UHID_CREATE under KERNEL_DS or elevated privileges commit 8c01db7619f07c85c5cd81ec5eb83608b56c88f5 upstream. When a UHID_CREATE command is written to the uhid char device, a copy_from_user() is done from a user pointer embedded in the command. When the address limit is KERNEL_DS, e.g. as is the case during sys_sendfile(), this can read from kernel memory. Alternatively, information can be leaked from a setuid binary that is tricked to write to the file descriptor. Therefore, forbid UHID_CREATE in these cases. No other commands in uhid_char_write() are affected by this bug and UHID_CREATE is marked as "obsolete", so apply the restriction to UHID_CREATE only rather than to uhid_char_write() entirely. Thanks to Dmitry Vyukov for adding uhid definitions to syzkaller and to Jann Horn for commit 9da3f2b740544 ("x86/fault: BUG() when uaccess helpers fault on kernel addresses"), allowing this bug to be found. Reported-by: syzbot+72473edc9bf4eb1c6556@syzkaller.appspotmail.com Fixes: d365c6cfd337 ("HID: uhid: add UHID_CREATE and UHID_DESTROY events") Cc: # v3.6+ Cc: Jann Horn Cc: Andy Lutomirski Signed-off-by: Eric Biggers Reviewed-by: Jann Horn Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/uhid.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c index 6f819f144cb4..6f67d73b184e 100644 --- a/drivers/hid/uhid.c +++ b/drivers/hid/uhid.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -722,6 +723,17 @@ static ssize_t uhid_char_write(struct file *file, const char __user *buffer, switch (uhid->input_buf.type) { case UHID_CREATE: + /* + * 'struct uhid_create_req' contains a __user pointer which is + * copied from, so it's unsafe to allow this with elevated + * privileges (e.g. from a setuid binary) or via kernel_write(). + */ + if (file->f_cred != current_cred() || uaccess_kernel()) { + pr_err_once("UHID_CREATE from different security context by process %d (%s), this is not allowed.\n", + task_tgid_vnr(current), current->comm); + ret = -EACCES; + goto unlock; + } ret = uhid_dev_create(uhid, &uhid->input_buf); break; case UHID_CREATE2: From 3c7f1671af5f1bef6fac4e9ad7416e7a5a4ed295 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 8 Nov 2018 15:55:37 +0100 Subject: [PATCH 0603/2608] libceph: fall back to sendmsg for slab pages commit 7e241f647dc7087a0401418a187f3f5b527cc690 upstream. skb_can_coalesce() allows coalescing neighboring slab objects into a single frag: return page == skb_frag_page(frag) && off == frag->page_offset + skb_frag_size(frag); ceph_tcp_sendpage() can be handed slab pages. One example of this is XFS: it passes down sector sized slab objects for its metadata I/O. If the kernel client is co-located on the OSD node, the skb may go through loopback and pop on the receive side with the exact same set of frags. When tcp_recvmsg() attempts to copy out such a frag, hardened usercopy complains because the size exceeds the object's allocated size: usercopy: kernel memory exposure attempt detected from ffff9ba917f20a00 (kmalloc-512) (1024 bytes) Although skb_can_coalesce() could be taught to return false if the resulting frag would cross a slab object boundary, we already have a fallback for non-refcounted pages. Utilize it for slab pages too. Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- net/ceph/messenger.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5c4e85296cf6..5281da82371a 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -594,9 +594,15 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, struct bio_vec bvec; int ret; - /* sendpage cannot properly handle pages with page_count == 0, - * we need to fallback to sendmsg if that's the case */ - if (page_count(page) >= 1) + /* + * sendpage cannot properly handle pages with page_count == 0, + * we need to fall back to sendmsg if that's the case. + * + * Same goes for slab pages: skb_can_coalesce() allows + * coalescing neighboring slab objects into a single frag which + * triggers one of hardened usercopy checks. + */ + if (page_count(page) >= 1 && !PageSlab(page)) return __ceph_tcp_sendpage(sock, page, offset, size, more); bvec.bv_page = page; From 4201a586f1fa63de5a965a1ebc3b99e0e1a4912c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 27 Nov 2018 16:10:52 +0100 Subject: [PATCH 0604/2608] Linux 4.14.84 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0f42814095a4..874d72a3e6a7 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 83 +SUBLEVEL = 84 EXTRAVERSION = NAME = Petit Gorille From ed482390c8dbf51484bfe69d0f4952cf29a4dc6b Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Tue, 27 Nov 2018 15:33:17 -0800 Subject: [PATCH 0605/2608] efi/libstub: arm: support building with clang (commit 41f1c48420709470c51ee0e54b6fb28b956bb4e0 upstream) When building with CONFIG_EFI and CONFIG_EFI_STUB on ARM, the libstub Makefile would use -mno-single-pic-base without checking it was supported by the compiler. As the ARM (32-bit) clang backend does not support this flag, the build would fail. This changes the Makefile to check the compiler's support for -mno-single-pic-base before using it, similar to c1c386681bd7 ("ARM: 8767/1: add support for building ARM kernel with clang"). Signed-off-by: Alistair Strachan Reviewed-by: Stefan Agner Signed-off-by: Ard Biesheuvel [ND: adjusted due to missing commit ce279d374ff3 ("efi/libstub: Only disable stackleak plugin for arm64")] Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index adaa4a964f0c..678bc910e080 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -13,7 +13,8 @@ cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ -O2 \ cflags-$(CONFIG_ARM64) := $(subst -pg,,$(KBUILD_CFLAGS)) -fpie cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) \ - -fno-builtin -fpic -mno-single-pic-base + -fno-builtin -fpic \ + $(call cc-option,-mno-single-pic-base) cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt From f1222c5f7f6036cbab74abefdbce0bffe85d3eea Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 8 May 2018 22:49:49 +0100 Subject: [PATCH 0606/2608] ARM: 8766/1: drop no-thumb-interwork in EABI mode (commit 22905a24306c8c312c2d66da9f90d09af0414f81 upstream) According to GCC documentation -m(no-)thumb-interwork is meaningless in AAPCS configurations. Also clang does not support the flag: clang-5.0: error: unknown argument: '-mno-thumb-interwork' Just drop -mno-thumb-interwork in AEABI configuration. Signed-off-by: Stefan Agner Signed-off-by: Russell King Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- arch/arm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 36ae4454554c..17e80f483281 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -106,7 +106,7 @@ tune-$(CONFIG_CPU_V6K) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) tune-y := $(tune-y) ifeq ($(CONFIG_AEABI),y) -CFLAGS_ABI :=-mabi=aapcs-linux -mno-thumb-interwork -mfpu=vfp +CFLAGS_ABI :=-mabi=aapcs-linux -mfpu=vfp else CFLAGS_ABI :=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,) endif From 764ecc201217f676fdc7f0878e74ca034716d9ee Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 8 May 2018 22:50:38 +0100 Subject: [PATCH 0607/2608] ARM: 8767/1: add support for building ARM kernel with clang (commit c1c386681bd73c4fc28eb5cc91cf8b7be9b409ba upstream) Use cc-options call for compiler options which are not available in clang. With this patch an ARMv7 multi platform kernel can be successfully build using clang (tested with version 5.0.1). Based-on-patches-by: Behan Webster Signed-off-by: Stefan Agner Signed-off-by: Russell King Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/compressed/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index a5889238fc9f..746c8c575f98 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -113,7 +113,7 @@ CFLAGS_fdt_ro.o := $(nossp_flags) CFLAGS_fdt_rw.o := $(nossp_flags) CFLAGS_fdt_wip.o := $(nossp_flags) -ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj) +ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin -I$(obj) asflags-y := -DZIMAGE # Supply kernel BSS size to the decompressor via a linker symbol. From 8d7925ad96c682dd1a0b15943a5d545c5191107e Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 8 May 2018 16:27:26 +0200 Subject: [PATCH 0608/2608] bus: arm-cci: remove unnecessary unreachable() (commit 10d8713429d345867fc8998d6193b233c0cab28c upstream) Mixing asm and C code is not recommended in a naked function by gcc and leads to an error when using clang: drivers/bus/arm-cci.c:2107:2: error: non-ASM statement in naked function is not supported unreachable(); ^ While the function is marked __naked it actually properly return in asm. There is no need for the unreachable() call. GCC 7.2 generates identical object files before and after, other than (for obvious reasons) the line numbers generated by WANT_WARN_ON_SLOWPATH for all the WARN()s appearing later in the file. Suggested-by: Russell King Signed-off-by: Stefan Agner Acked-by: Nicolas Pitre Reviewed-by: Robin Murphy Signed-off-by: Olof Johansson Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- drivers/bus/arm-cci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 5426c04fe24b..fc2da3a617ac 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -2103,8 +2103,6 @@ asmlinkage void __naked cci_enable_port_for_self(void) [sizeof_struct_cpu_port] "i" (sizeof(struct cpu_port)), [sizeof_struct_ace_port] "i" (sizeof(struct cci_ace_port)), [offsetof_port_phys] "i" (offsetof(struct cci_ace_port, phys)) ); - - unreachable(); } /** From 5d47e129a7107d6f35ac4ce7f32ff9376b555bfe Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 25 Mar 2018 20:09:56 +0200 Subject: [PATCH 0609/2608] ARM: trusted_foundations: do not use naked function (commit 4ea7bdc6b5b33427bbd3f41c333e21c1825462a3 upstream) As documented in GCC naked functions should only use basic ASM syntax. The extended ASM or mixture of basic ASM and "C" code is not guaranteed. Currently this works because it was hard coded to follow and check GCC behavior for arguments and register placement. Furthermore with clang using parameters in Extended asm in a naked function is not supported: arch/arm/firmware/trusted_foundations.c:47:10: error: parameter references not allowed in naked functions : "r" (type), "r" (arg1), "r" (arg2) ^ Use a regular function to be more portable. This aligns also with the other SMC call implementations e.g. in qcom_scm-32.c and bcm_kona_smc.c. Cc: Dmitry Osipenko Cc: Stephen Warren Cc: Thierry Reding Signed-off-by: Stefan Agner Signed-off-by: Thierry Reding Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- arch/arm/firmware/trusted_foundations.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/arm/firmware/trusted_foundations.c b/arch/arm/firmware/trusted_foundations.c index 3fb1b5a1dce9..689e6565abfc 100644 --- a/arch/arm/firmware/trusted_foundations.c +++ b/arch/arm/firmware/trusted_foundations.c @@ -31,21 +31,25 @@ static unsigned long cpu_boot_addr; -static void __naked tf_generic_smc(u32 type, u32 arg1, u32 arg2) +static void tf_generic_smc(u32 type, u32 arg1, u32 arg2) { + register u32 r0 asm("r0") = type; + register u32 r1 asm("r1") = arg1; + register u32 r2 asm("r2") = arg2; + asm volatile( ".arch_extension sec\n\t" - "stmfd sp!, {r4 - r11, lr}\n\t" + "stmfd sp!, {r4 - r11}\n\t" __asmeq("%0", "r0") __asmeq("%1", "r1") __asmeq("%2", "r2") "mov r3, #0\n\t" "mov r4, #0\n\t" "smc #0\n\t" - "ldmfd sp!, {r4 - r11, pc}" + "ldmfd sp!, {r4 - r11}\n\t" : - : "r" (type), "r" (arg1), "r" (arg2) - : "memory"); + : "r" (r0), "r" (r1), "r" (r2) + : "memory", "r3", "r12", "lr"); } static int tf_set_cpu_boot_addr(int cpu, unsigned long boot_addr) From 427d4e4c27442ed2329a9672f65af49cd5a56389 Mon Sep 17 00:00:00 2001 From: Dennis Wassenberg Date: Tue, 13 Nov 2018 14:40:34 +0100 Subject: [PATCH 0610/2608] usb: core: Fix hub port connection events lost commit 22454b79e6de05fa61a2a72d00d2eed798abbb75 upstream. This will clear the USB_PORT_FEAT_C_CONNECTION bit in case of a hub port reset only if a device is was attached to the hub port before resetting the hub port. Using a Lenovo T480s attached to the ultra dock it was not possible to detect some usb-c devices at the dock usb-c ports because the hub_port_reset code will clear the USB_PORT_FEAT_C_CONNECTION bit after the actual hub port reset. Using this device combo the USB_PORT_FEAT_C_CONNECTION bit was set between the actual hub port reset and the clear of the USB_PORT_FEAT_C_CONNECTION bit. This ends up with clearing the USB_PORT_FEAT_C_CONNECTION bit after the new device was attached such that it was not detected. This patch will not clear the USB_PORT_FEAT_C_CONNECTION bit if there is currently no device attached to the port before the hub port reset. This will avoid clearing the connection bit for new attached devices. Signed-off-by: Dennis Wassenberg Acked-by: Mathias Nyman Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index a9db0887edca..638dc6f66d70 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2815,7 +2815,9 @@ static int hub_port_reset(struct usb_hub *hub, int port1, USB_PORT_FEAT_C_BH_PORT_RESET); usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_PORT_LINK_STATE); - usb_clear_port_feature(hub->hdev, port1, + + if (udev) + usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_CONNECTION); /* From 47631d837acccf5532a32b47943621ce62a3a477 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 1 Aug 2018 09:37:34 +0300 Subject: [PATCH 0611/2608] usb: dwc3: gadget: fix ISOC TRB type on unaligned transfers commit 2fc6d4be35fb1e262f209758e25bfe2b7a113a7f upstream. When chaining ISOC TRBs together, only the first ISOC TRB should be of type ISOC_FIRST, all others should be of type ISOC. This patch fixes that. Fixes: c6267a51639b ("usb: dwc3: gadget: align transfers to wMaxPacketSize") Cc: # v4.11+ Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index d7fae66a0681..e28d2904e3a3 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1088,7 +1088,7 @@ static void dwc3_prepare_one_trb_sg(struct dwc3_ep *dep, /* Now prepare one extra TRB to align transfer size */ trb = &dep->trb_pool[dep->trb_enqueue]; __dwc3_prepare_one_trb(dep, trb, dwc->bounce_addr, - maxp - rem, false, 0, + maxp - rem, false, 1, req->request.stream_id, req->request.short_not_ok, req->request.no_interrupt); @@ -1120,7 +1120,7 @@ static void dwc3_prepare_one_trb_linear(struct dwc3_ep *dep, /* Now prepare one extra TRB to align transfer size */ trb = &dep->trb_pool[dep->trb_enqueue]; __dwc3_prepare_one_trb(dep, trb, dwc->bounce_addr, maxp - rem, - false, 0, req->request.stream_id, + false, 1, req->request.stream_id, req->request.short_not_ok, req->request.no_interrupt); } else if (req->request.zero && req->request.length && @@ -1136,7 +1136,7 @@ static void dwc3_prepare_one_trb_linear(struct dwc3_ep *dep, /* Now prepare one extra TRB to handle ZLP */ trb = &dep->trb_pool[dep->trb_enqueue]; __dwc3_prepare_one_trb(dep, trb, dwc->bounce_addr, 0, - false, 0, req->request.stream_id, + false, 1, req->request.stream_id, req->request.short_not_ok, req->request.no_interrupt); } else { From 474e39f53816ef70224cdb6fae27893d679fb65f Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 2 Aug 2018 20:17:16 -0700 Subject: [PATCH 0612/2608] usb: dwc3: gadget: Properly check last unaligned/zero chain TRB commit ba3a51ac32ebcf8d0a54b37f1af268ad8a31c52f upstream. Current check for the last extra TRB for zero and unaligned transfers does not account for isoc OUT. The last TRB of the Buffer Descriptor for isoc OUT transfers will be retired with HWO=0. As a result, we won't return early. The req->remaining will be updated to include the BUFSIZ count of the extra TRB, and the actual number of transferred bytes calculation will be wrong. To fix this, check whether it's a short or zero packet and the last TRB chain bit to return early. Fixes: c6267a51639b ("usb: dwc3: gadget: align transfers to wMaxPacketSize") Cc: Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index e28d2904e3a3..ac8d619ff887 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2249,7 +2249,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, * with one TRB pending in the ring. We need to manually clear HWO bit * from that TRB. */ - if ((req->zero || req->unaligned) && (trb->ctrl & DWC3_TRB_CTRL_HWO)) { + if ((req->zero || req->unaligned) && !(trb->ctrl & DWC3_TRB_CTRL_CHN)) { trb->ctrl &= ~DWC3_TRB_CTRL_HWO; return 1; } From 2c3e97edbb19a4a47e1aa47101b1e2553e3bdad0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 27 Aug 2018 18:30:16 +0300 Subject: [PATCH 0613/2608] usb: dwc3: core: Clean up ULPI device commit 08fd9a82fda86529bb2f2af3c2f7cb657b4d3066 upstream. If dwc3_core_init_mode() fails with deferred probe, next probe fails on sysfs with sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:11.0/dwc3.0.auto/dwc3.0.auto.ulpi' To avoid this failure, clean up ULPI device. Cc: Signed-off-by: Andy Shevchenko Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 8b323a360e03..783d16a53466 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1276,6 +1276,7 @@ static int dwc3_probe(struct platform_device *pdev) err5: dwc3_event_buffers_cleanup(dwc); + dwc3_ulpi_exit(dwc); err4: dwc3_free_scratch_buffers(dwc); From 0902c4e7d6dbdeaa73fa6fc635391624de6e9a5f Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 9 Nov 2018 17:21:19 +0200 Subject: [PATCH 0614/2608] xhci: Add check for invalid byte size error when UAS devices are connected. commit d9193efba84fe4c4aa22a569fade5e6ca971f8af upstream. Observed "TRB completion code (27)" error which corresponds to Stopped - Length Invalid error(xhci spec section 4.17.4) while connecting USB to SATA bridge. Looks like this case was not considered when the following patch[1] was committed. Hence adding this new check which can prevent the invalid byte size error. [1] ade2e3a xhci: handle transfer events without TRB pointer Cc: Signed-off-by: Sandeep Singh cc: Nehal Shah cc: Shyam Sundar S K Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 6996235e34a9..2d64cb024d00 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2335,6 +2335,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, goto cleanup; case COMP_RING_UNDERRUN: case COMP_RING_OVERRUN: + case COMP_STOPPED_LENGTH_INVALID: goto cleanup; default: xhci_err(xhci, "ERROR Transfer event for unknown stream ring slot %u ep %u\n", From 494427532654fc2b3c6791bdd997587643717684 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Fri, 9 Nov 2018 17:21:21 +0200 Subject: [PATCH 0615/2608] usb: xhci: fix timeout for transition from RExit to U0 commit a5baeaeabcca3244782a9b6382ebab6f8a58f583 upstream. This definition is used by msecs_to_jiffies in milliseconds. According to the comments, max rexit timeout should be 20ms. Align with the comments to properly calculate the delay. Verified on Sunrise Point-LP and Cannon Lake. Cc: stable@vger.kernel.org Signed-off-by: Aaron Ma Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 4 ++-- drivers/usb/host/xhci.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index d2a9767a8e9c..4f612b5d0f94 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -941,7 +941,7 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, time_left = wait_for_completion_timeout( &bus_state->rexit_done[wIndex], msecs_to_jiffies( - XHCI_MAX_REXIT_TIMEOUT)); + XHCI_MAX_REXIT_TIMEOUT_MS)); spin_lock_irqsave(&xhci->lock, flags); if (time_left) { @@ -955,7 +955,7 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, } else { int port_status = readl(port_array[wIndex]); xhci_warn(xhci, "Port resume took longer than %i msec, port status = 0x%x\n", - XHCI_MAX_REXIT_TIMEOUT, + XHCI_MAX_REXIT_TIMEOUT_MS, port_status); status |= USB_PORT_STAT_SUSPEND; clear_bit(wIndex, &bus_state->rexit_ports); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 11232e62b898..29dc6160c9eb 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1684,7 +1684,7 @@ struct xhci_bus_state { * It can take up to 20 ms to transition from RExit to U0 on the * Intel Lynx Point LP xHCI host. */ -#define XHCI_MAX_REXIT_TIMEOUT (20 * 1000) +#define XHCI_MAX_REXIT_TIMEOUT_MS 20 static inline unsigned int hcd_index(struct usb_hcd *hcd) { From 1129534f73336addfcea53a65c7847d1aa1d2175 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 9 Nov 2018 11:59:45 +0100 Subject: [PATCH 0616/2608] ALSA: oss: Use kvzalloc() for local buffer allocations commit 65766ee0bf7fe8b3be80e2e1c3ef54ad59b29476 upstream. PCM OSS layer may allocate a few temporary buffers, one for the core read/write and another for the conversions via plugins. Currently both are allocated via vmalloc(). But as the allocation size is equivalent with the PCM period size, the required size might be quite small, depending on the application. This patch replaces these vmalloc() calls with kvzalloc() for covering small period sizes better. Also, we use "z"-alloc variant here for addressing the possible uninitialized access reported by syzkaller. Reported-by: syzbot+1cb36954e127c98dd037@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/pcm_oss.c | 6 +++--- sound/core/oss/pcm_plugin.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index b4f954e6d2db..df358e838b5b 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -1062,8 +1062,8 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) runtime->oss.channels = params_channels(params); runtime->oss.rate = params_rate(params); - vfree(runtime->oss.buffer); - runtime->oss.buffer = vmalloc(runtime->oss.period_bytes); + kvfree(runtime->oss.buffer); + runtime->oss.buffer = kvzalloc(runtime->oss.period_bytes, GFP_KERNEL); if (!runtime->oss.buffer) { err = -ENOMEM; goto failure; @@ -2328,7 +2328,7 @@ static void snd_pcm_oss_release_substream(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime; runtime = substream->runtime; - vfree(runtime->oss.buffer); + kvfree(runtime->oss.buffer); runtime->oss.buffer = NULL; #ifdef CONFIG_SND_PCM_OSS_PLUGINS snd_pcm_oss_plugin_clear(substream); diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index 85a56af104bd..617845d4a811 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -66,8 +66,8 @@ static int snd_pcm_plugin_alloc(struct snd_pcm_plugin *plugin, snd_pcm_uframes_t return -ENXIO; size /= 8; if (plugin->buf_frames < frames) { - vfree(plugin->buf); - plugin->buf = vmalloc(size); + kvfree(plugin->buf); + plugin->buf = kvzalloc(size, GFP_KERNEL); plugin->buf_frames = frames; } if (!plugin->buf) { @@ -191,7 +191,7 @@ int snd_pcm_plugin_free(struct snd_pcm_plugin *plugin) if (plugin->private_free) plugin->private_free(plugin); kfree(plugin->buf_channels); - vfree(plugin->buf); + kvfree(plugin->buf); kfree(plugin); return 0; } From d34af066f084defffd77f33778217fef8889e063 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 15 Nov 2018 15:03:24 -0800 Subject: [PATCH 0617/2608] MAINTAINERS: Add Sasha as a stable branch maintainer commit cb5d21946d2a2f4687c482ab4604af1d29dac35a upstream. Sasha has somehow been convinced into helping me with the stable kernel maintenance. Codify this slip in good judgement before he realizes what he really signed up for :) Signed-off-by: Greg Kroah-Hartman Acked-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 546beb6b0176..6cb70b853323 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12662,6 +12662,7 @@ F: arch/alpha/kernel/srm_env.c STABLE BRANCH M: Greg Kroah-Hartman +M: Sasha Levin L: stable@vger.kernel.org S: Supported F: Documentation/process/stable-kernel-rules.rst From 7c9f55c5a58a8a7131e1f5a82a7f85bf06c62a63 Mon Sep 17 00:00:00 2001 From: Rajat Jain Date: Mon, 29 Oct 2018 15:17:01 -0700 Subject: [PATCH 0618/2608] mmc: sdhci-pci: Try "cd" for card-detect lookup before using NULL commit cdcefe6bd9df754f528ffc339d3cc143cea4ddf6 upstream. Problem: The card detect IRQ does not work with modern BIOS (that want to use _DSD to provide the card detect GPIO to the driver). Details: The mmc core provides the mmc_gpiod_request_cd() API to let host drivers request the gpio descriptor for the "card detect" pin. This pin is specified in the ACPI for the SDHC device: * Either as a resource using _CRS. This is a method used by legacy BIOS. (The driver needs to tell which resource index). * Or as a named property ("cd-gpios"/"cd-gpio") in _DSD (which internally points to an entry in _CRS). This way, the driver can lookup using a string. This is what modern BIOS prefer to use. This API finally results in a call to the following code: struct gpio_desc *acpi_find_gpio(..., const char *con_id,...) { ... /* Lookup gpio (using "-gpio") in the _DSD */ ... if (!acpi_can_fallback_to_crs(adev, con_id)) return ERR_PTR(-ENOENT); ... /* Falling back to _CRS is allowed, Lookup gpio in the _CRS */ ... } Note that this means that if the ACPI has _DSD properties, the kernel will never use _CRS for the lookup (Because acpi_can_fallback_to_crs() will always be false for any device hat has _DSD entries). The SDHCI driver is thus currently broken on a modern BIOS, even if BIOS provides both _CRS (for index based lookup) and _DSD entries (for string based lookup). Ironically, none of these will be used for the lookup currently because: * Since the con_id is NULL, acpi_find_gpio() does not find a matching entry in DSDT. (The _DSDT entry has the property name = "cd-gpios") * Because ACPI contains DSDT entries, thus acpi_can_fallback_to_crs() returns false (because device properties have been populated from _DSD), thus the _CRS is never used for the lookup. Fix: Try "cd" for lookup in the _DSD before falling back to using NULL so as to try looking up in the _CRS. I've tested this patch successfully with both Legacy BIOS (that provide only _CRS method) as well as modern BIOS (that provide both _CRS and _DSD). Also the use of "cd" appears to be fairly consistent across other users of this API (other MMC host controller drivers). Link: https://lkml.org/lkml/2018/9/25/1113 Signed-off-by: Rajat Jain Acked-by: Adrian Hunter Fixes: f10e4bf6632b ("gpio: acpi: Even more tighten up ACPI GPIO lookups") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 44da037b13ba..0e386f5cc836 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -1607,8 +1607,13 @@ static struct sdhci_pci_slot *sdhci_pci_probe_slot( host->mmc->caps2 |= MMC_CAP2_NO_PRESCAN_POWERUP; if (slot->cd_idx >= 0) { - ret = mmc_gpiod_request_cd(host->mmc, NULL, slot->cd_idx, + ret = mmc_gpiod_request_cd(host->mmc, "cd", slot->cd_idx, slot->cd_override_level, 0, NULL); + if (ret && ret != -EPROBE_DEFER) + ret = mmc_gpiod_request_cd(host->mmc, NULL, + slot->cd_idx, + slot->cd_override_level, + 0, NULL); if (ret == -EPROBE_DEFER) goto remove; From 6a5f25e30045bb2c3622136807d76be3bd4fb0ba Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Fri, 2 Nov 2018 15:39:43 +0200 Subject: [PATCH 0619/2608] gpio: don't free unallocated ida on gpiochip_add_data_with_key() error path commit a05a14049999598a3bb6fab12db6b768a0215522 upstream. The change corrects the error path in gpiochip_add_data_with_key() by avoiding to call ida_simple_remove(), if ida_simple_get() returns an error. Note that ida_simple_remove()/ida_free() throws a BUG(), if id argument is negative, it allows to easily check the correctness of the fix by fuzzing the return value from ida_simple_get(). Fixes: ff2b13592299 ("gpio: make the gpiochip a real device") Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Vladimir Zapolskiy Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 7d5de4ef4f22..21062cb6b85f 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1166,7 +1166,7 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data) gdev->descs = kcalloc(chip->ngpio, sizeof(gdev->descs[0]), GFP_KERNEL); if (!gdev->descs) { status = -ENOMEM; - goto err_free_gdev; + goto err_free_ida; } if (chip->ngpio == 0) { @@ -1298,8 +1298,9 @@ err_free_label: kfree(gdev->label); err_free_descs: kfree(gdev->descs); -err_free_gdev: +err_free_ida: ida_simple_remove(&gpio_ida, gdev->id); +err_free_gdev: /* failures here can mean systems won't boot... */ pr_err("%s: GPIOs %d..%d (%s) failed to register\n", __func__, gdev->base, gdev->base + gdev->ngpio - 1, From bc176d7a68bef23e1db1b0ec6e4ad4d75aa63958 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 16 Aug 2018 13:25:48 +0300 Subject: [PATCH 0620/2608] iwlwifi: mvm: support sta_statistics() even on older firmware commit ec484d03ef0df8d34086b95710e355a259cbe1f2 upstream. The oldest firmware supported by iwlmvm do support getting the average beacon RSSI. Enable the sta_statistics() call from mac80211 even on older firmware versions. Fixes: 33cef9256342 ("iwlwifi: mvm: support beacon statistics for BSS client") Cc: stable@vger.kernel.org # 4.2+ Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 80a653950e86..f161fa55d202 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4189,10 +4189,6 @@ static void iwl_mvm_mac_sta_statistics(struct ieee80211_hw *hw, sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG); } - if (!fw_has_capa(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS)) - return; - /* if beacon filtering isn't on mac80211 does it anyway */ if (!(vif->driver_flags & IEEE80211_VIF_BEACON_FILTER)) return; From b58f0659f7c6c9957a528aa15cb0189055570672 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 3 Oct 2018 11:16:54 +0300 Subject: [PATCH 0621/2608] iwlwifi: mvm: fix regulatory domain update when the firmware starts commit 82715ac71e6b94a2c2136e31f3a8e6748e33aa8c upstream. When the firmware starts, it doesn't have any regulatory information, hence it uses the world wide limitations. The driver can feed the firmware with previous knowledge that was kept in the driver, but the firmware may still not update its internal tables. This happens when we start a BSS interface, and then the firmware can change the regulatory tables based on our location and it'll use more lenient, location specific rules. Then, if the firmware is shut down (when the interface is brought down), and then an AP interface is created, the firmware will forget the country specific rules. The host will think that we are in a certain country that may allow channels and will try to teach the firmware about our location, but the firmware may still not allow to drop the world wide limitations and apply country specific rules because it was just re-started. In this case, the firmware will reply with MCC_RESP_ILLEGAL to the MCC_UPDATE_CMD. In that case, iwlwifi needs to let the upper layers (cfg80211 / hostapd) know that the channel list they know about has been updated. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=201105 Cc: stable@vger.kernel.org Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 8 ++++++-- drivers/net/wireless/intel/iwlwifi/mvm/nvm.c | 5 ++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index f161fa55d202..77ed6ecf5ee5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -328,8 +328,12 @@ struct ieee80211_regdomain *iwl_mvm_get_regdomain(struct wiphy *wiphy, goto out; } - if (changed) - *changed = (resp->status == MCC_RESP_NEW_CHAN_PROFILE); + if (changed) { + u32 status = le32_to_cpu(resp->status); + + *changed = (status == MCC_RESP_NEW_CHAN_PROFILE || + status == MCC_RESP_ILLEGAL); + } regd = iwl_parse_nvm_mcc_info(mvm->trans->dev, mvm->cfg, __le32_to_cpu(resp->n_channels), diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index fb25b6f29323..ca2d66ce8424 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -732,9 +732,8 @@ iwl_mvm_update_mcc(struct iwl_mvm *mvm, const char *alpha2, } IWL_DEBUG_LAR(mvm, - "MCC response status: 0x%x. new MCC: 0x%x ('%c%c') change: %d n_chans: %d\n", - status, mcc, mcc >> 8, mcc & 0xff, - !!(status == MCC_RESP_NEW_CHAN_PROFILE), n_channels); + "MCC response status: 0x%x. new MCC: 0x%x ('%c%c') n_chans: %d\n", + status, mcc, mcc >> 8, mcc & 0xff, n_channels); exit: iwl_free_resp(&cmd); From bf02eaf57a328b7213520396f4920902286e8cd8 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 17 Oct 2018 08:35:15 +0300 Subject: [PATCH 0622/2608] iwlwifi: mvm: don't use SAR Geo if basic SAR is not used commit 5d041c46ccb9b48acc110e214beff5e2789311df upstream. We can't use SAR Geo if basic SAR is not enabled, since the SAR Geo tables define offsets in relation to the basic SAR table in use. To fix this, make iwl_mvm_sar_init() return one in case WRDS is not available, so we can skip reading WGDS entirely. Fixes: a6bff3cb19b7 ("iwlwifi: mvm: add GEO_TX_POWER_LIMIT cmd for geographic tx power table") Cc: stable@vger.kernel.org # 4.12+ Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 36 ++++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index b71a9d11a50f..11319675f2d9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -997,6 +997,11 @@ static int iwl_mvm_sar_get_ewrd_table(struct iwl_mvm *mvm) return -ENOENT; } +static int iwl_mvm_sar_get_wgds_table(struct iwl_mvm *mvm) +{ + return -ENOENT; +} + static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm) { return 0; @@ -1023,8 +1028,11 @@ static int iwl_mvm_sar_init(struct iwl_mvm *mvm) IWL_DEBUG_RADIO(mvm, "WRDS SAR BIOS table invalid or unavailable. (%d)\n", ret); - /* if not available, don't fail and don't bother with EWRD */ - return 0; + /* + * If not available, don't fail and don't bother with EWRD. + * Return 1 to tell that we can't use WGDS either. + */ + return 1; } ret = iwl_mvm_sar_get_ewrd_table(mvm); @@ -1037,9 +1045,13 @@ static int iwl_mvm_sar_init(struct iwl_mvm *mvm) /* choose profile 1 (WRDS) as default for both chains */ ret = iwl_mvm_sar_select_profile(mvm, 1, 1); - /* if we don't have profile 0 from BIOS, just skip it */ + /* + * If we don't have profile 0 from BIOS, just skip it. This + * means that SAR Geo will not be enabled either, even if we + * have other valid profiles. + */ if (ret == -ENOENT) - return 0; + return 1; return ret; } @@ -1229,11 +1241,19 @@ int iwl_mvm_up(struct iwl_mvm *mvm) iwl_mvm_unref(mvm, IWL_MVM_REF_UCODE_DOWN); ret = iwl_mvm_sar_init(mvm); - if (ret) - goto error; + if (ret == 0) { + ret = iwl_mvm_sar_geo_init(mvm); + } else if (ret > 0 && !iwl_mvm_sar_get_wgds_table(mvm)) { + /* + * If basic SAR is not available, we check for WGDS, + * which should *not* be available either. If it is + * available, issue an error, because we can't use SAR + * Geo without basic SAR. + */ + IWL_ERR(mvm, "BIOS contains WGDS but no WRDS\n"); + } - ret = iwl_mvm_sar_geo_init(mvm); - if (ret) + if (ret < 0) goto error; iwl_mvm_leds_sync(mvm); From a0b9ddf4906e6340c4ce920c9450de05021e73ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 8 Nov 2018 16:08:29 +0100 Subject: [PATCH 0623/2608] brcmfmac: fix reporting support for 160 MHz channels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d1fe6ad6f6bd61c84788d3a7b11e459a439c6169 upstream. Driver can report IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ so it's important to provide valid & complete info about supported bands for each channel. By default no support for 160 MHz should be assumed unless firmware reports it for a given channel later. This fixes info passed to the userspace. Without that change userspace could try to use invalid channel and fail to start an interface. Signed-off-by: Rafał Miłecki Cc: stable@vger.kernel.org Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 083e5ce7eac7..cd6c5ece9a5d 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -6098,7 +6098,8 @@ static int brcmf_construct_chaninfo(struct brcmf_cfg80211_info *cfg, * for subsequent chanspecs. */ channel->flags = IEEE80211_CHAN_NO_HT40 | - IEEE80211_CHAN_NO_80MHZ; + IEEE80211_CHAN_NO_80MHZ | + IEEE80211_CHAN_NO_160MHZ; ch.bw = BRCMU_CHAN_BW_20; cfg->d11inf.encchspec(&ch); chaninfo = ch.chspec; From 3b99dcd4026a08fc9d1fd5cc0a9fc50529803fca Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 16 Oct 2018 11:56:26 +0300 Subject: [PATCH 0624/2608] tools/power/cpupower: fix compilation with STATIC=true commit 9de9aa45e9bd67232e000cca42ceb134b8ae51b6 upstream. Rename duplicate sysfs_read_file into cpupower_read_sysfs and fix linking. Signed-off-by: Konstantin Khlebnikov Acked-by: Thomas Renninger Cc: Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Greg Kroah-Hartman --- tools/power/cpupower/bench/Makefile | 2 +- tools/power/cpupower/lib/cpufreq.c | 2 +- tools/power/cpupower/lib/cpuidle.c | 2 +- tools/power/cpupower/lib/cpupower.c | 4 ++-- tools/power/cpupower/lib/cpupower_intern.h | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile index d79ab161cc75..f68b4bc55273 100644 --- a/tools/power/cpupower/bench/Makefile +++ b/tools/power/cpupower/bench/Makefile @@ -9,7 +9,7 @@ endif ifeq ($(strip $(STATIC)),true) LIBS = -L../ -L$(OUTPUT) -lm OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o \ - $(OUTPUT)../lib/cpufreq.o $(OUTPUT)../lib/sysfs.o + $(OUTPUT)../lib/cpufreq.o $(OUTPUT)../lib/cpupower.o else LIBS = -L../ -L$(OUTPUT) -lm -lcpupower OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c index 1b993fe1ce23..0c0f3e3f0d80 100644 --- a/tools/power/cpupower/lib/cpufreq.c +++ b/tools/power/cpupower/lib/cpufreq.c @@ -28,7 +28,7 @@ static unsigned int sysfs_cpufreq_read_file(unsigned int cpu, const char *fname, snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s", cpu, fname); - return sysfs_read_file(path, buf, buflen); + return cpupower_read_sysfs(path, buf, buflen); } /* helper function to write a new value to a /sys file */ diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c index 9bd4c7655fdb..852d25462388 100644 --- a/tools/power/cpupower/lib/cpuidle.c +++ b/tools/power/cpupower/lib/cpuidle.c @@ -319,7 +319,7 @@ static unsigned int sysfs_cpuidle_read_file(const char *fname, char *buf, snprintf(path, sizeof(path), PATH_TO_CPU "cpuidle/%s", fname); - return sysfs_read_file(path, buf, buflen); + return cpupower_read_sysfs(path, buf, buflen); } diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c index 9c395ec924de..9711d628b0f4 100644 --- a/tools/power/cpupower/lib/cpupower.c +++ b/tools/power/cpupower/lib/cpupower.c @@ -15,7 +15,7 @@ #include "cpupower.h" #include "cpupower_intern.h" -unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen) +unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen) { int fd; ssize_t numread; @@ -95,7 +95,7 @@ static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *re snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s", cpu, fname); - if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0) + if (cpupower_read_sysfs(path, linebuf, MAX_LINE_LEN) == 0) return -1; *result = strtol(linebuf, &endp, 0); if (endp == linebuf || errno == ERANGE) diff --git a/tools/power/cpupower/lib/cpupower_intern.h b/tools/power/cpupower/lib/cpupower_intern.h index 92affdfbe417..4887c76d23f8 100644 --- a/tools/power/cpupower/lib/cpupower_intern.h +++ b/tools/power/cpupower/lib/cpupower_intern.h @@ -3,4 +3,4 @@ #define MAX_LINE_LEN 4096 #define SYSFS_PATH_MAX 255 -unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen); +unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen); From 335f31342699f9015f0ab5a8d41d6c911085f033 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Mon, 27 Aug 2018 15:12:05 +0900 Subject: [PATCH 0625/2608] v9fs_dir_readdir: fix double-free on p9stat_read error commit 81c99089bce693b94b775b6eb888115d2d540086 upstream. p9stat_read will call p9stat_free on error, we should only free the struct content on success. There also is no need to "p9stat_init" st as the read function will zero the whole struct for us anyway, so clean up the code a bit while we are here. Link: http://lkml.kernel.org/r/1535410108-20650-1-git-send-email-asmadeus@codewreck.org Signed-off-by: Dominique Martinet Reported-by: syzbot+d4252148d198410b864f@syzkaller.appspotmail.com Signed-off-by: Greg Kroah-Hartman --- fs/9p/vfs_dir.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index b0405d6aac85..48db9a9f13f9 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -76,15 +76,6 @@ static inline int dt_type(struct p9_wstat *mistat) return rettype; } -static void p9stat_init(struct p9_wstat *stbuf) -{ - stbuf->name = NULL; - stbuf->uid = NULL; - stbuf->gid = NULL; - stbuf->muid = NULL; - stbuf->extension = NULL; -} - /** * v9fs_alloc_rdir_buf - Allocate buffer used for read and readdir * @filp: opened file structure @@ -145,12 +136,10 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) rdir->tail = n; } while (rdir->head < rdir->tail) { - p9stat_init(&st); err = p9stat_read(fid->clnt, rdir->buf + rdir->head, rdir->tail - rdir->head, &st); if (err) { p9_debug(P9_DEBUG_VFS, "returned %d\n", err); - p9stat_free(&st); return -EIO; } reclen = st.size+2; From 9520db16756e24873677d867a6a0f9cab3b6726d Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 8 Sep 2018 01:42:58 +0900 Subject: [PATCH 0626/2608] selinux: Add __GFP_NOWARN to allocation at str_read() commit 4458bba09788e70e8fb39ad003f087cd9dfbd6ac upstream. syzbot is hitting warning at str_read() [1] because len parameter can become larger than KMALLOC_MAX_SIZE. We don't need to emit warning for this case. [1] https://syzkaller.appspot.com/bug?id=7f2f5aad79ea8663c296a2eedb81978401a908f0 Signed-off-by: Tetsuo Handa Reported-by: syzbot Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/ss/policydb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 6e8c8056d7ad..6688ac5b991e 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -1099,7 +1099,7 @@ static int str_read(char **strp, gfp_t flags, void *fp, u32 len) if ((len == 0) || (len == (u32)-1)) return -EINVAL; - str = kmalloc(len + 1, flags); + str = kmalloc(len + 1, flags | __GFP_NOWARN); if (!str) return -ENOMEM; From 922fad66a961a34d970f9d0d0e961813489c556c Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 16 Oct 2018 17:07:35 -0700 Subject: [PATCH 0627/2608] Input: synaptics - avoid using uninitialized variable when probing commit f39f8688888ae74fa8deae2d01289b69b4727394 upstream. synaptics_detect() does not check whether sending commands to the device succeeds and instead relies on getting unique data from the device. Let's make sure we seed entire buffer with zeroes to make sure we will not use garbage on stack that just happen to be 0x47. Reported-by: syzbot+13cb3b01d0784e4ffc3f@syzkaller.appspotmail.com Reviewed-by: Benjamin Tissoires Reviewed-by: Peter Hutterer Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 6c4bbd38700e..6f36e2d01e2e 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -99,9 +99,7 @@ static int synaptics_mode_cmd(struct psmouse *psmouse, u8 mode) int synaptics_detect(struct psmouse *psmouse, bool set_properties) { struct ps2dev *ps2dev = &psmouse->ps2dev; - u8 param[4]; - - param[0] = 0; + u8 param[4] = { 0 }; ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES); ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES); From bc94cd6d869ea0ac78936b14321d37bdea657ba4 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 2 Nov 2018 15:48:42 -0700 Subject: [PATCH 0628/2608] bfs: add sanity check at bfs_fill_super() commit 9f2df09a33aa2c76ce6385d382693f98d7f2f07e upstream. syzbot is reporting too large memory allocation at bfs_fill_super() [1]. Since file system image is corrupted such that bfs_sb->s_start == 0, bfs_fill_super() is trying to allocate 8MB of continuous memory. Fix this by adding a sanity check on bfs_sb->s_start, __GFP_NOWARN and printf(). [1] https://syzkaller.appspot.com/bug?id=16a87c236b951351374a84c8a32f40edbc034e96 Link: http://lkml.kernel.org/r/1525862104-3407-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Reported-by: syzbot Reviewed-by: Andrew Morton Cc: Tigran Aivazian Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/bfs/inode.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 9a69392f1fb3..d81c148682e7 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -350,7 +350,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) s->s_magic = BFS_MAGIC; - if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) { + if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end) || + le32_to_cpu(bfs_sb->s_start) < BFS_BSIZE) { printf("Superblock is corrupted\n"); goto out1; } @@ -359,9 +360,11 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) sizeof(struct bfs_inode) + BFS_ROOT_INO - 1; imap_len = (info->si_lasti / 8) + 1; - info->si_imap = kzalloc(imap_len, GFP_KERNEL); - if (!info->si_imap) + info->si_imap = kzalloc(imap_len, GFP_KERNEL | __GFP_NOWARN); + if (!info->si_imap) { + printf("Cannot allocate %u bytes\n", imap_len); goto out1; + } for (i = 0; i < BFS_ROOT_INO; i++) set_bit(i, info->si_imap); From 8376fdc999be008f0e9918db52f1ed8c08f5a1c9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 29 Oct 2018 23:10:29 +0800 Subject: [PATCH 0629/2608] sctp: clear the transport of some out_chunk_list chunks in sctp_assoc_rm_peer commit df132eff463873e14e019a07f387b4d577d6d1f9 upstream. If a transport is removed by asconf but there still are some chunks with this transport queuing on out_chunk_list, later an use-after-free issue will be caused when accessing this transport from these chunks in sctp_outq_flush(). This is an old bug, we fix it by clearing the transport of these chunks in out_chunk_list when removing a transport in sctp_assoc_rm_peer(). Reported-by: syzbot+56a40ceee5fb35932f4d@syzkaller.appspotmail.com Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/associola.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 58f7d8cfd748..4982b31fec8e 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -497,8 +497,9 @@ void sctp_assoc_set_primary(struct sctp_association *asoc, void sctp_assoc_rm_peer(struct sctp_association *asoc, struct sctp_transport *peer) { - struct list_head *pos; - struct sctp_transport *transport; + struct sctp_transport *transport; + struct list_head *pos; + struct sctp_chunk *ch; pr_debug("%s: association:%p addr:%pISpc\n", __func__, asoc, &peer->ipaddr.sa); @@ -562,7 +563,6 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, */ if (!list_empty(&peer->transmitted)) { struct sctp_transport *active = asoc->peer.active_path; - struct sctp_chunk *ch; /* Reset the transport of each chunk on this list */ list_for_each_entry(ch, &peer->transmitted, @@ -584,6 +584,10 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, sctp_transport_hold(active); } + list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) + if (ch->transport == peer) + ch->transport = NULL; + asoc->peer.transport_count--; sctp_transport_free(peer); From 49ee6220be1b9c2dfd00a4a7adc55c2d7ffc7faf Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Mon, 8 Oct 2018 07:52:43 -0500 Subject: [PATCH 0630/2608] gfs2: Don't leave s_fs_info pointing to freed memory in init_sbd commit 4c62bd9cea7bcf10292f7e4c57a2bca332942697 upstream. When alloc_percpu() fails, sdp gets freed but sb->s_fs_info still points to the same address. Move the assignment after that error check so that s_fs_info can only point to a valid sdp or NULL, which is checked for later in the error path, in gfs2_kill_super(). Reported-by: syzbot+dcb8b3587445007f5808@syzkaller.appspotmail.com Signed-off-by: Andrew Price Signed-off-by: Bob Peterson Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/ops_fstype.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 28d6c65c8bb3..057be88eb1b4 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -72,13 +72,13 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) if (!sdp) return NULL; - sb->s_fs_info = sdp; sdp->sd_vfs = sb; sdp->sd_lkstats = alloc_percpu(struct gfs2_pcpu_lkstats); if (!sdp->sd_lkstats) { kfree(sdp); return NULL; } + sb->s_fs_info = sdp; set_bit(SDF_NOJOURNALID, &sdp->sd_flags); gfs2_tune_init(&sdp->sd_tune); From 8a37895d1e35884c160fbdd2bb90aeab8daafd87 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Oct 2018 09:24:27 -0700 Subject: [PATCH 0631/2608] llc: do not use sk_eat_skb() commit 604d415e2bd642b7e02c80e719e0396b9d4a77a6 upstream. syzkaller triggered a use-after-free [1], caused by a combination of skb_get() in llc_conn_state_process() and usage of sk_eat_skb() sk_eat_skb() is assuming the skb about to be freed is only used by the current thread. TCP/DCCP stacks enforce this because current thread holds the socket lock. llc_conn_state_process() wants to make sure skb does not disappear, and holds a reference on the skb it manipulates. But as soon as this skb is added to socket receive queue, another thread can consume it. This means that llc must use regular skb_unlink() and kfree_skb() so that both producer and consumer can safely work on the same skb. [1] BUG: KASAN: use-after-free in atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] BUG: KASAN: use-after-free in refcount_read include/linux/refcount.h:43 [inline] BUG: KASAN: use-after-free in skb_unref include/linux/skbuff.h:967 [inline] BUG: KASAN: use-after-free in kfree_skb+0xb7/0x580 net/core/skbuff.c:655 Read of size 4 at addr ffff8801d1f6fba4 by task ksoftirqd/1/18 CPU: 1 PID: 18 Comm: ksoftirqd/1 Not tainted 4.19.0-rc8+ #295 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c4/0x2b6 lib/dump_stack.c:113 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 check_memory_region_inline mm/kasan/kasan.c:260 [inline] check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267 kasan_check_read+0x11/0x20 mm/kasan/kasan.c:272 atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] refcount_read include/linux/refcount.h:43 [inline] skb_unref include/linux/skbuff.h:967 [inline] kfree_skb+0xb7/0x580 net/core/skbuff.c:655 llc_sap_state_process+0x9b/0x550 net/llc/llc_sap.c:224 llc_sap_rcv+0x156/0x1f0 net/llc/llc_sap.c:297 llc_sap_handler+0x65e/0xf80 net/llc/llc_sap.c:438 llc_rcv+0x79e/0xe20 net/llc/llc_input.c:208 __netif_receive_skb_one_core+0x14d/0x200 net/core/dev.c:4913 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:5023 process_backlog+0x218/0x6f0 net/core/dev.c:5829 napi_poll net/core/dev.c:6249 [inline] net_rx_action+0x7c5/0x1950 net/core/dev.c:6315 __do_softirq+0x30c/0xb03 kernel/softirq.c:292 run_ksoftirqd+0x94/0x100 kernel/softirq.c:653 smpboot_thread_fn+0x68b/0xa00 kernel/smpboot.c:164 kthread+0x35a/0x420 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:413 Allocated by task 18: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490 kmem_cache_alloc_node+0x144/0x730 mm/slab.c:3644 __alloc_skb+0x119/0x770 net/core/skbuff.c:193 alloc_skb include/linux/skbuff.h:995 [inline] llc_alloc_frame+0xbc/0x370 net/llc/llc_sap.c:54 llc_station_ac_send_xid_r net/llc/llc_station.c:52 [inline] llc_station_rcv+0x1dc/0x1420 net/llc/llc_station.c:111 llc_rcv+0xc32/0xe20 net/llc/llc_input.c:220 __netif_receive_skb_one_core+0x14d/0x200 net/core/dev.c:4913 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:5023 process_backlog+0x218/0x6f0 net/core/dev.c:5829 napi_poll net/core/dev.c:6249 [inline] net_rx_action+0x7c5/0x1950 net/core/dev.c:6315 __do_softirq+0x30c/0xb03 kernel/softirq.c:292 Freed by task 16383: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kmem_cache_free+0x83/0x290 mm/slab.c:3756 kfree_skbmem+0x154/0x230 net/core/skbuff.c:582 __kfree_skb+0x1d/0x20 net/core/skbuff.c:642 sk_eat_skb include/net/sock.h:2366 [inline] llc_ui_recvmsg+0xec2/0x1610 net/llc/af_llc.c:882 sock_recvmsg_nosec net/socket.c:794 [inline] sock_recvmsg+0xd0/0x110 net/socket.c:801 ___sys_recvmsg+0x2b6/0x680 net/socket.c:2278 __sys_recvmmsg+0x303/0xb90 net/socket.c:2390 do_sys_recvmmsg+0x181/0x1a0 net/socket.c:2466 __do_sys_recvmmsg net/socket.c:2484 [inline] __se_sys_recvmmsg net/socket.c:2480 [inline] __x64_sys_recvmmsg+0xbe/0x150 net/socket.c:2480 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8801d1f6fac0 which belongs to the cache skbuff_head_cache of size 232 The buggy address is located 228 bytes inside of 232-byte region [ffff8801d1f6fac0, ffff8801d1f6fba8) The buggy address belongs to the page: page:ffffea000747dbc0 count:1 mapcount:0 mapping:ffff8801d9be7680 index:0xffff8801d1f6fe80 flags: 0x2fffc0000000100(slab) raw: 02fffc0000000100 ffffea0007346e88 ffffea000705b108 ffff8801d9be7680 raw: ffff8801d1f6fe80 ffff8801d1f6f0c0 000000010000000b 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801d1f6fa80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ffff8801d1f6fb00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8801d1f6fb80: fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc ^ ffff8801d1f6fc00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801d1f6fc80: fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/llc/af_llc.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index b49f5afab405..2e472d5c3ea4 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -730,7 +730,6 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, struct sk_buff *skb = NULL; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - unsigned long cpu_flags; size_t copied = 0; u32 peek_seq = 0; u32 *seq, skb_len; @@ -855,9 +854,8 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, goto copy_uaddr; if (!(flags & MSG_PEEK)) { - spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); - sk_eat_skb(sk, skb); - spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); + skb_unlink(skb, &sk->sk_receive_queue); + kfree_skb(skb); *seq = 0; } @@ -878,9 +876,8 @@ copy_uaddr: llc_cmsg_rcv(msg, skb); if (!(flags & MSG_PEEK)) { - spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); - sk_eat_skb(sk, skb); - spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); + skb_unlink(skb, &sk->sk_receive_queue); + kfree_skb(skb); *seq = 0; } From 97764043885cd2aa6017891948a6705e4f1086e8 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 26 Oct 2018 15:03:12 -0700 Subject: [PATCH 0632/2608] mm: don't warn about large allocations for slab commit 61448479a9f2c954cde0cfe778cb6bec5d0a748d upstream. Slub does not call kmalloc_slab() for sizes > KMALLOC_MAX_CACHE_SIZE, instead it falls back to kmalloc_large(). For slab KMALLOC_MAX_CACHE_SIZE == KMALLOC_MAX_SIZE and it calls kmalloc_slab() for all allocations relying on NULL return value for over-sized allocations. This inconsistency leads to unwanted warnings from kmalloc_slab() for over-sized allocations for slab. Returning NULL for failed allocations is the expected behavior. Make slub and slab code consistent by checking size > KMALLOC_MAX_CACHE_SIZE in slab before calling kmalloc_slab(). While we are here also fix the check in kmalloc_slab(). We should check against KMALLOC_MAX_CACHE_SIZE rather than KMALLOC_MAX_SIZE. It all kinda worked because for slab the constants are the same, and slub always checks the size against KMALLOC_MAX_CACHE_SIZE before kmalloc_slab(). But if we get there with size > KMALLOC_MAX_CACHE_SIZE anyhow bad things will happen. For example, in case of a newly introduced bug in slub code. Also move the check in kmalloc_slab() from function entry to the size > 192 case. This partially compensates for the additional check in slab code and makes slub code a bit faster (at least theoretically). Also drop __GFP_NOWARN in the warning check. This warning means a bug in slab code itself, user-passed flags have nothing to do with it. Nothing of this affects slob. Link: http://lkml.kernel.org/r/20180927171502.226522-1-dvyukov@gmail.com Signed-off-by: Dmitry Vyukov Reported-by: syzbot+87829a10073277282ad1@syzkaller.appspotmail.com Reported-by: syzbot+ef4e8fc3a06e9019bb40@syzkaller.appspotmail.com Reported-by: syzbot+6e438f4036df52cbb863@syzkaller.appspotmail.com Reported-by: syzbot+8574471d8734457d98aa@syzkaller.appspotmail.com Reported-by: syzbot+af1504df0807a083dbd9@syzkaller.appspotmail.com Acked-by: Christoph Lameter Acked-by: Vlastimil Babka Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slab.c | 4 ++++ mm/slab_common.c | 12 ++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 198c1e2c5358..68ab88e2920e 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3670,6 +3670,8 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller) struct kmem_cache *cachep; void *ret; + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) + return NULL; cachep = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; @@ -3705,6 +3707,8 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, struct kmem_cache *cachep; void *ret; + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) + return NULL; cachep = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; diff --git a/mm/slab_common.c b/mm/slab_common.c index 91d271b90600..f6764cf162b8 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -971,18 +971,18 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) { int index; - if (unlikely(size > KMALLOC_MAX_SIZE)) { - WARN_ON_ONCE(!(flags & __GFP_NOWARN)); - return NULL; - } - if (size <= 192) { if (!size) return ZERO_SIZE_PTR; index = size_index[size_index_elem(size)]; - } else + } else { + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { + WARN_ON(1); + return NULL; + } index = fls(size - 1); + } #ifdef CONFIG_ZONE_DMA if (unlikely((flags & GFP_DMA))) From 6b43a9978a6b000cf05d19f8765eba0434817f94 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 26 Oct 2018 15:09:01 -0700 Subject: [PATCH 0633/2608] mm/memory.c: recheck page table entry with page table lock held commit ff09d7ec9786be4ad7589aa987d7dc66e2dd9160 upstream. We clear the pte temporarily during read/modify/write update of the pte. If we take a page fault while the pte is cleared, the application can get SIGBUS. One such case is with remap_pfn_range without a backing vm_ops->fault callback. do_fault will return SIGBUS in that case. cpu 0 cpu1 mprotect() ptep_modify_prot_start()/pte cleared. . . page fault. . . prep_modify_prot_commit() Fix this by taking page table lock and rechecking for pte_none. [aneesh.kumar@linux.ibm.com: fix crash observed with syzkaller run] Link: http://lkml.kernel.org/r/87va6bwlfg.fsf@linux.ibm.com Link: http://lkml.kernel.org/r/20180926031858.9692-1-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V Acked-by: Kirill A. Shutemov Cc: Willem de Bruijn Cc: Eric Dumazet Cc: Ido Schimmel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 93d5d324904b..b6cfe0cf0ead 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3697,10 +3697,36 @@ static int do_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; int ret; - /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ - if (!vma->vm_ops->fault) - ret = VM_FAULT_SIGBUS; - else if (!(vmf->flags & FAULT_FLAG_WRITE)) + /* + * The VMA was not fully populated on mmap() or missing VM_DONTEXPAND + */ + if (!vma->vm_ops->fault) { + /* + * If we find a migration pmd entry or a none pmd entry, which + * should never happen, return SIGBUS + */ + if (unlikely(!pmd_present(*vmf->pmd))) + ret = VM_FAULT_SIGBUS; + else { + vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, + vmf->pmd, + vmf->address, + &vmf->ptl); + /* + * Make sure this is not a temporary clearing of pte + * by holding ptl and checking again. A R/M/W update + * of pte involves: take ptl, clearing the pte so that + * we don't have concurrent modification by hardware + * followed by an update. + */ + if (unlikely(pte_none(*vmf->pte))) + ret = VM_FAULT_SIGBUS; + else + ret = VM_FAULT_NOPAGE; + + pte_unmap_unlock(vmf->pte, vmf->ptl); + } + } else if (!(vmf->flags & FAULT_FLAG_WRITE)) ret = do_read_fault(vmf); else if (!(vma->vm_flags & VM_SHARED)) ret = do_cow_fault(vmf); From e6ddc2c3d89c8ed3266d83254a0c11798f33502e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 1 Oct 2018 23:24:26 -0700 Subject: [PATCH 0634/2608] tcp: do not release socket ownership in tcp_close() commit 8873c064d1de579ea23412a6d3eee972593f142b upstream. syzkaller was able to hit the WARN_ON(sock_owned_by_user(sk)); in tcp_close() While a socket is being closed, it is very possible other threads find it in rtnetlink dump. tcp_get_info() will acquire the socket lock for a short amount of time (slow = lock_sock_fast(sk)/unlock_sock_fast(sk, slow);), enough to trigger the warning. Fixes: 67db3e4bfbc9 ("tcp: no longer hold ehash lock while calling tcp_get_info()") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sock.h | 1 + net/core/sock.c | 2 +- net/ipv4/tcp.c | 11 +++-------- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 9bd5d68076d9..64a330544dad 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1452,6 +1452,7 @@ static inline void lock_sock(struct sock *sk) lock_sock_nested(sk, 0); } +void __release_sock(struct sock *sk); void release_sock(struct sock *sk); /* BH context may only use the following locking interface. */ diff --git a/net/core/sock.c b/net/core/sock.c index 68d08ed5521e..36f19458e2fe 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2242,7 +2242,7 @@ static void __lock_sock(struct sock *sk) finish_wait(&sk->sk_lock.wq, &wait); } -static void __release_sock(struct sock *sk) +void __release_sock(struct sock *sk) __releases(&sk->sk_lock.slock) __acquires(&sk->sk_lock.slock) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f9c985460faa..8109985e78a1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2217,16 +2217,10 @@ adjudge_to_death: sock_hold(sk); sock_orphan(sk); - /* It is the last release_sock in its life. It will remove backlog. */ - release_sock(sk); - - - /* Now socket is owned by kernel and we acquire BH lock - * to finish close. No need to check for user refs. - */ local_bh_disable(); bh_lock_sock(sk); - WARN_ON(sock_owned_by_user(sk)); + /* remove backlog if any, without releasing ownership. */ + __release_sock(sk); percpu_counter_inc(sk->sk_prot->orphan_count); @@ -2295,6 +2289,7 @@ adjudge_to_death: out: bh_unlock_sock(sk); local_bh_enable(); + release_sock(sk); sock_put(sk); } EXPORT_SYMBOL(tcp_close); From c271b660b798998c926d2d62b47d0cd4d97d8d2e Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 9 Jan 2018 15:24:50 +0200 Subject: [PATCH 0635/2608] IB/core: Perform modify QP on real one commit b2bedfb39541a7e14798d066b6f8685d84c8fcf5 upstream. Currently qp->port stores the port number whenever IB_QP_PORT QP attribute mask is set (during QP state transition to INIT state). This port number should be stored for the real QP when XRC target QP is used. Follow the ib_modify_qp() implementation and hide the access to ->real_qp. Fixes: a512c2fbef9c ("IB/core: Introduce modify QP operation with udata") Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/verbs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index feb80dbb5948..6d59af07d338 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1285,7 +1285,7 @@ EXPORT_SYMBOL(ib_resolve_eth_dmac); /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. - * @qp: The QP to modify. + * @ib_qp: The QP to modify. * @attr: On input, specifies the QP attributes to modify. On output, * the current values of selected QP attributes are returned. * @attr_mask: A bit-mask used to specify which attributes of the QP @@ -1294,9 +1294,10 @@ EXPORT_SYMBOL(ib_resolve_eth_dmac); * are being modified. * It returns 0 on success and returns appropriate error code on error. */ -int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr, +int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { + struct ib_qp *qp = ib_qp->real_qp; int ret; if (attr_mask & IB_QP_AV) { From d84bb18ed7a29a3a571feed8ed36ee2c3e58d1bc Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 15 Nov 2018 11:38:41 +0200 Subject: [PATCH 0636/2608] usb: xhci: Prevent bus suspend if a port connect change or polling state is detected commit 2f31a67f01a8beb22cae754c53522cb61a005750 upstream. USB3 roothub might autosuspend before a plugged USB3 device is detected, causing USB3 device enumeration failure. USB3 devices don't show up as connected and enabled until USB3 link trainig completes. On a fast booting platform with a slow USB3 link training the link might reach the connected enabled state just as the bus is suspending. If this device is discovered first time by the xhci_bus_suspend() routine it will be put to U3 suspended state like the other ports which failed to suspend earlier. The hub thread will notice the connect change and resume the bus, moving the port back to U0 This U0 -> U3 -> U0 transition right after being connected seems to be too much for some devices, causing them to first go to SS.Inactive state, and finally end up stuck in a polling state with reset asserted Fix this by failing the bus suspend if a port has a connect change or is in a polling state in xhci_bus_suspend(). Don't do any port changes until all ports are checked, buffer all port changes and only write them in the end if suspend can proceed Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 60 ++++++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 14 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 4f612b5d0f94..a2e350d28c05 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1481,13 +1481,16 @@ int xhci_bus_suspend(struct usb_hcd *hcd) __le32 __iomem **port_array; struct xhci_bus_state *bus_state; unsigned long flags; + u32 portsc_buf[USB_MAXCHILDREN]; + bool wake_enabled; max_ports = xhci_get_ports(hcd, &port_array); bus_state = &xhci->bus_state[hcd_index(hcd)]; + wake_enabled = hcd->self.root_hub->do_remote_wakeup; spin_lock_irqsave(&xhci->lock, flags); - if (hcd->self.root_hub->do_remote_wakeup) { + if (wake_enabled) { if (bus_state->resuming_ports || /* USB2 */ bus_state->port_remote_wakeup) { /* USB3 */ spin_unlock_irqrestore(&xhci->lock, flags); @@ -1495,26 +1498,36 @@ int xhci_bus_suspend(struct usb_hcd *hcd) return -EBUSY; } } - - port_index = max_ports; + /* + * Prepare ports for suspend, but don't write anything before all ports + * are checked and we know bus suspend can proceed + */ bus_state->bus_suspended = 0; + port_index = max_ports; while (port_index--) { - /* suspend the port if the port is not suspended */ u32 t1, t2; - int slot_id; t1 = readl(port_array[port_index]); t2 = xhci_port_state_to_neutral(t1); + portsc_buf[port_index] = 0; - if ((t1 & PORT_PE) && !(t1 & PORT_PLS_MASK)) { - xhci_dbg(xhci, "port %d not suspended\n", port_index); - slot_id = xhci_find_slot_id_by_port(hcd, xhci, - port_index + 1); - if (slot_id) { + /* Bail out if a USB3 port has a new device in link training */ + if ((t1 & PORT_PLS_MASK) == XDEV_POLLING) { + bus_state->bus_suspended = 0; + spin_unlock_irqrestore(&xhci->lock, flags); + xhci_dbg(xhci, "Bus suspend bailout, port in polling\n"); + return -EBUSY; + } + + /* suspend ports in U0, or bail out for new connect changes */ + if ((t1 & PORT_PE) && (t1 & PORT_PLS_MASK) == XDEV_U0) { + if ((t1 & PORT_CSC) && wake_enabled) { + bus_state->bus_suspended = 0; spin_unlock_irqrestore(&xhci->lock, flags); - xhci_stop_device(xhci, slot_id, 1); - spin_lock_irqsave(&xhci->lock, flags); + xhci_dbg(xhci, "Bus suspend bailout, port connect change\n"); + return -EBUSY; } + xhci_dbg(xhci, "port %d not suspended\n", port_index); t2 &= ~PORT_PLS_MASK; t2 |= PORT_LINK_STROBE | XDEV_U3; set_bit(port_index, &bus_state->bus_suspended); @@ -1523,7 +1536,7 @@ int xhci_bus_suspend(struct usb_hcd *hcd) * including the USB 3.0 roothub, but only if CONFIG_PM * is enabled, so also enable remote wake here. */ - if (hcd->self.root_hub->do_remote_wakeup) { + if (wake_enabled) { if (t1 & PORT_CONNECT) { t2 |= PORT_WKOC_E | PORT_WKDISC_E; t2 &= ~PORT_WKCONN_E; @@ -1543,7 +1556,26 @@ int xhci_bus_suspend(struct usb_hcd *hcd) t1 = xhci_port_state_to_neutral(t1); if (t1 != t2) - writel(t2, port_array[port_index]); + portsc_buf[port_index] = t2; + } + + /* write port settings, stopping and suspending ports if needed */ + port_index = max_ports; + while (port_index--) { + if (!portsc_buf[port_index]) + continue; + if (test_bit(port_index, &bus_state->bus_suspended)) { + int slot_id; + + slot_id = xhci_find_slot_id_by_port(hcd, xhci, + port_index + 1); + if (slot_id) { + spin_unlock_irqrestore(&xhci->lock, flags); + xhci_stop_device(xhci, slot_id, 1); + spin_lock_irqsave(&xhci->lock, flags); + } + } + writel(portsc_buf[port_index], port_array[port_index]); } hcd->state = HC_STATE_SUSPENDED; bus_state->next_statechange = jiffies + msecs_to_jiffies(10); From e51b0b1c1f015c72315a2706e043cd9665af73cc Mon Sep 17 00:00:00 2001 From: "Y.C. Chen" Date: Wed, 3 Oct 2018 14:57:47 +0800 Subject: [PATCH 0637/2608] drm/ast: change resolution may cause screen blurred commit 1a37bd823891568f8721989aed0615835632d81a upstream. The value of pitches is not correct while calling mode_set. The issue we found so far on following system: - Debian8 with XFCE Desktop - Ubuntu with KDE Desktop - SUSE15 with KDE Desktop Signed-off-by: Y.C. Chen Cc: Tested-by: Jean Delvare Reviewed-by: Jean Delvare Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_mode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index e9f1e6fe7b94..604ff42121f8 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -568,6 +568,7 @@ static int ast_crtc_do_set_base(struct drm_crtc *crtc, } ast_bo_unreserve(bo); + ast_set_offset_reg(crtc); ast_set_start_address_crt1(crtc, (u32)gpu_addr); return 0; From ffc3c0ffbb93be9b1b75f0a71ba38501039f5180 Mon Sep 17 00:00:00 2001 From: "Y.C. Chen" Date: Tue, 30 Oct 2018 11:34:46 +0800 Subject: [PATCH 0638/2608] drm/ast: fixed cursor may disappear sometimes commit 7989b9ee8bafe5cc625381dd0c3c4586de27ca26 upstream. Signed-off-by: Y.C. Chen Cc: Reviewed-by: Dave Airlie Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_mode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 604ff42121f8..fae1176b2472 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -1255,7 +1255,7 @@ static int ast_cursor_move(struct drm_crtc *crtc, ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xc7, ((y >> 8) & 0x07)); /* dummy write to fire HWC */ - ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xCB, 0xFF, 0x00); + ast_show_cursor(crtc); return 0; } From dca24b512763930963408ea5c78a2f223a73a2a8 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 15 Nov 2018 11:42:16 +0100 Subject: [PATCH 0639/2608] drm/ast: Remove existing framebuffers before loading driver commit 5478ad10e7850ce3d8b7056db05ddfa3c9ddad9a upstream. If vesafb attaches to the AST device, it configures the framebuffer memory for uncached access by default. When ast.ko later tries to attach itself to the device, it wants to use write-combining on the framebuffer memory, but vesefb's existing configuration for uncached access takes precedence. This results in reduced performance. Removing the framebuffer's configuration before loding the AST driver fixes the problem. Other DRM drivers already contain equivalent code. Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1112963 Signed-off-by: Thomas Zimmermann Cc: Tested-by: Y.C. Chen Reviewed-by: Jean Delvare Tested-by: Jean Delvare Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_drv.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index 69dab82a3771..bf589c53b908 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -60,8 +60,29 @@ static const struct pci_device_id pciidlist[] = { MODULE_DEVICE_TABLE(pci, pciidlist); +static void ast_kick_out_firmware_fb(struct pci_dev *pdev) +{ + struct apertures_struct *ap; + bool primary = false; + + ap = alloc_apertures(1); + if (!ap) + return; + + ap->ranges[0].base = pci_resource_start(pdev, 0); + ap->ranges[0].size = pci_resource_len(pdev, 0); + +#ifdef CONFIG_X86 + primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW; +#endif + drm_fb_helper_remove_conflicting_framebuffers(ap, "astdrmfb", primary); + kfree(ap); +} + static int ast_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { + ast_kick_out_firmware_fb(pdev); + return drm_get_pci_dev(pdev, ent, &driver); } From b3f1f493529e060fd3e9d4fe5b406f8379b24d71 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 31 Oct 2018 10:37:46 +0100 Subject: [PATCH 0640/2608] can: dev: can_get_echo_skb(): factor out non sending code to __can_get_echo_skb() commit a4310fa2f24687888ce80fdb0e88583561a23700 upstream. This patch factors out all non sending parts of can_get_echo_skb() into a seperate function __can_get_echo_skb(), so that it can be re-used in an upcoming patch. Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 36 +++++++++++++++++++++++++----------- include/linux/can/dev.h | 1 + 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index b6a681bce400..ae08bc6e7267 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -476,14 +476,7 @@ void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, } EXPORT_SYMBOL_GPL(can_put_echo_skb); -/* - * Get the skb from the stack and loop it back locally - * - * The function is typically called when the TX done interrupt - * is handled in the device driver. The driver must protect - * access to priv->echo_skb, if necessary. - */ -unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) +struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) { struct can_priv *priv = netdev_priv(dev); @@ -494,13 +487,34 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) struct can_frame *cf = (struct can_frame *)skb->data; u8 dlc = cf->can_dlc; - netif_rx(priv->echo_skb[idx]); + *len_ptr = dlc; priv->echo_skb[idx] = NULL; - return dlc; + return skb; } - return 0; + return NULL; +} + +/* + * Get the skb from the stack and loop it back locally + * + * The function is typically called when the TX done interrupt + * is handled in the device driver. The driver must protect + * access to priv->echo_skb, if necessary. + */ +unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) +{ + struct sk_buff *skb; + u8 len; + + skb = __can_get_echo_skb(dev, idx, &len); + if (!skb) + return 0; + + netif_rx(skb); + + return len; } EXPORT_SYMBOL_GPL(can_get_echo_skb); diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 61f1cf2d9f44..c0c0b992210e 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -163,6 +163,7 @@ void can_change_state(struct net_device *dev, struct can_frame *cf, void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, unsigned int idx); +struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr); unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); void can_free_echo_skb(struct net_device *dev, unsigned int idx); From 3f6b2bbbee20a5aa52eed1aaea65cdab98aed8c6 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 31 Oct 2018 11:08:21 +0100 Subject: [PATCH 0641/2608] can: dev: __can_get_echo_skb(): replace struct can_frame by canfd_frame to access frame length commit 200f5c49f7a2cd694436bfc6cb0662b794c96736 upstream. This patch replaces the use of "struct can_frame::can_dlc" by "struct canfd_frame::len" to access the frame's length. As it is ensured that both structures have a compatible memory layout for this member this is no functional change. Futher, this compatibility is documented in a comment. Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index ae08bc6e7267..7900d5a39989 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -483,11 +483,14 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 BUG_ON(idx >= priv->echo_skb_max); if (priv->echo_skb[idx]) { + /* Using "struct canfd_frame::len" for the frame + * length is supported on both CAN and CANFD frames. + */ struct sk_buff *skb = priv->echo_skb[idx]; - struct can_frame *cf = (struct can_frame *)skb->data; - u8 dlc = cf->can_dlc; + struct canfd_frame *cf = (struct canfd_frame *)skb->data; + u8 len = cf->len; - *len_ptr = dlc; + *len_ptr = len; priv->echo_skb[idx] = NULL; return skb; From bf991335b02d62e067e09471b15cf2216d54d5cf Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 31 Oct 2018 14:05:26 +0100 Subject: [PATCH 0642/2608] can: dev: __can_get_echo_skb(): Don't crash the kernel if can_priv::echo_skb is accessed out of bounds commit e7a6994d043a1e31d5b17706a22ce33d2a3e4cdc upstream. If the "struct can_priv::echo_skb" is accessed out of bounds would lead to a kernel crash. Better print a sensible warning message instead and try to recover. Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 7900d5a39989..7d61d8801220 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -480,7 +480,11 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 { struct can_priv *priv = netdev_priv(dev); - BUG_ON(idx >= priv->echo_skb_max); + if (idx >= priv->echo_skb_max) { + netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n", + __func__, idx, priv->echo_skb_max); + return NULL; + } if (priv->echo_skb[idx]) { /* Using "struct canfd_frame::len" for the frame From 906ed1bdf850851b3433e4f5521b406651643b27 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 31 Oct 2018 14:15:13 +0100 Subject: [PATCH 0643/2608] can: dev: __can_get_echo_skb(): print error message, if trying to echo non existing skb commit 7da11ba5c5066dadc2e96835a6233d56d7b7764a upstream. Prior to echoing a successfully transmitted CAN frame (by calling can_get_echo_skb()), CAN drivers have to put the CAN frame (by calling can_put_echo_skb() in the transmit function). These put and get function take an index as parameter, which is used to identify the CAN frame. A driver calling can_get_echo_skb() with a index not pointing to a skb is a BUG, so add an appropriate error message. Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 7d61d8801220..035daca63168 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -479,6 +479,8 @@ EXPORT_SYMBOL_GPL(can_put_echo_skb); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) { struct can_priv *priv = netdev_priv(dev); + struct sk_buff *skb = priv->echo_skb[idx]; + struct canfd_frame *cf; if (idx >= priv->echo_skb_max) { netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n", @@ -486,21 +488,20 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 return NULL; } - if (priv->echo_skb[idx]) { - /* Using "struct canfd_frame::len" for the frame - * length is supported on both CAN and CANFD frames. - */ - struct sk_buff *skb = priv->echo_skb[idx]; - struct canfd_frame *cf = (struct canfd_frame *)skb->data; - u8 len = cf->len; - - *len_ptr = len; - priv->echo_skb[idx] = NULL; - - return skb; + if (!skb) { + netdev_err(dev, "%s: BUG! Trying to echo non existing skb: can_priv::echo_skb[%u]\n", + __func__, idx); + return NULL; } - return NULL; + /* Using "struct canfd_frame::len" for the frame + * length is supported on both CAN and CANFD frames. + */ + cf = (struct canfd_frame *)skb->data; + *len_ptr = cf->len; + priv->echo_skb[idx] = NULL; + + return skb; } /* From bb8813be5cc7a0134e5bd8584005b189223f237c Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 18 Sep 2018 11:40:38 +0200 Subject: [PATCH 0644/2608] can: rx-offload: introduce can_rx_offload_get_echo_skb() and can_rx_offload_queue_sorted() functions commit 55059f2b7f868cd43b3ad30e28e18347e1b46ace upstream. Current CAN framework can't guarantee proper/chronological order of RX and TX-ECHO messages. To make this possible, drivers should use this functions instead of can_get_echo_skb(). Signed-off-by: Oleksij Rempel Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/rx-offload.c | 46 ++++++++++++++++++++++++++++++++++ include/linux/can/rx-offload.h | 4 +++ 2 files changed, 50 insertions(+) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index f394f77d7528..61b30bfffacc 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -209,6 +209,52 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload) } EXPORT_SYMBOL_GPL(can_rx_offload_irq_offload_fifo); +int can_rx_offload_queue_sorted(struct can_rx_offload *offload, + struct sk_buff *skb, u32 timestamp) +{ + struct can_rx_offload_cb *cb; + unsigned long flags; + + if (skb_queue_len(&offload->skb_queue) > + offload->skb_queue_len_max) + return -ENOMEM; + + cb = can_rx_offload_get_cb(skb); + cb->timestamp = timestamp; + + spin_lock_irqsave(&offload->skb_queue.lock, flags); + __skb_queue_add_sort(&offload->skb_queue, skb, can_rx_offload_compare); + spin_unlock_irqrestore(&offload->skb_queue.lock, flags); + + can_rx_offload_schedule(offload); + + return 0; +} +EXPORT_SYMBOL_GPL(can_rx_offload_queue_sorted); + +unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, + unsigned int idx, u32 timestamp) +{ + struct net_device *dev = offload->dev; + struct net_device_stats *stats = &dev->stats; + struct sk_buff *skb; + u8 len; + int err; + + skb = __can_get_echo_skb(dev, idx, &len); + if (!skb) + return 0; + + err = can_rx_offload_queue_sorted(offload, skb, timestamp); + if (err) { + stats->rx_errors++; + stats->tx_fifo_errors++; + } + + return len; +} +EXPORT_SYMBOL_GPL(can_rx_offload_get_echo_skb); + int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb) { if (skb_queue_len(&offload->skb_queue) > diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index cb31683bbe15..01a7c9e5d8d8 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -41,6 +41,10 @@ int can_rx_offload_add_timestamp(struct net_device *dev, struct can_rx_offload * int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight); int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 reg); int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload); +int can_rx_offload_queue_sorted(struct can_rx_offload *offload, + struct sk_buff *skb, u32 timestamp); +unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, + unsigned int idx, u32 timestamp); int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb); void can_rx_offload_reset(struct can_rx_offload *offload); void can_rx_offload_del(struct can_rx_offload *offload); From 30b996ac6e61e9a39d6bec70ec4e39daeeeff8b6 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 18 Sep 2018 11:40:40 +0200 Subject: [PATCH 0645/2608] can: rx-offload: rename can_rx_offload_irq_queue_err_skb() to can_rx_offload_queue_tail() commit 4530ec36bb1e0d24f41c33229694adacda3d5d89 upstream. This function has nothing todo with error. Signed-off-by: Oleksij Rempel Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/flexcan.c | 4 ++-- drivers/net/can/rx-offload.c | 5 +++-- include/linux/can/rx-offload.h | 3 ++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index ed8a2a7ce500..9ef501fd153f 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -599,7 +599,7 @@ static void flexcan_irq_bus_err(struct net_device *dev, u32 reg_esr) if (tx_errors) dev->stats.tx_errors++; - can_rx_offload_irq_queue_err_skb(&priv->offload, skb); + can_rx_offload_queue_tail(&priv->offload, skb); } static void flexcan_irq_state(struct net_device *dev, u32 reg_esr) @@ -639,7 +639,7 @@ static void flexcan_irq_state(struct net_device *dev, u32 reg_esr) if (unlikely(new_state == CAN_STATE_BUS_OFF)) can_bus_off(dev); - can_rx_offload_irq_queue_err_skb(&priv->offload, skb); + can_rx_offload_queue_tail(&priv->offload, skb); } static inline struct flexcan_priv *rx_offload_to_priv(struct can_rx_offload *offload) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index 61b30bfffacc..d227db45fec9 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -255,7 +255,8 @@ unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, } EXPORT_SYMBOL_GPL(can_rx_offload_get_echo_skb); -int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb) +int can_rx_offload_queue_tail(struct can_rx_offload *offload, + struct sk_buff *skb) { if (skb_queue_len(&offload->skb_queue) > offload->skb_queue_len_max) @@ -266,7 +267,7 @@ int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_b return 0; } -EXPORT_SYMBOL_GPL(can_rx_offload_irq_queue_err_skb); +EXPORT_SYMBOL_GPL(can_rx_offload_queue_tail); static int can_rx_offload_init_queue(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight) { diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index 01a7c9e5d8d8..8268811a697e 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -45,7 +45,8 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload, struct sk_buff *skb, u32 timestamp); unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, unsigned int idx, u32 timestamp); -int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb); +int can_rx_offload_queue_tail(struct can_rx_offload *offload, + struct sk_buff *skb); void can_rx_offload_reset(struct can_rx_offload *offload); void can_rx_offload_del(struct can_rx_offload *offload); void can_rx_offload_enable(struct can_rx_offload *offload); From 9af977aeef27001698133948fd44fa7e59a5901d Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 24 Oct 2018 10:27:12 +0200 Subject: [PATCH 0646/2608] can: raw: check for CAN FD capable netdev in raw_sendmsg() commit a43608fa77213ad5ac5f75994254b9f65d57cfa0 upstream. When the socket is CAN FD enabled it can handle CAN FD frame transmissions. Add an additional check in raw_sendmsg() as a CAN2.0 CAN driver (non CAN FD) should never see a CAN FD frame. Due to the commonly used can_dropped_invalid_skb() function the CAN 2.0 driver would drop that CAN FD frame anyway - but with this patch the user gets a proper -EINVAL return code. Signed-off-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/raw.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/can/raw.c b/net/can/raw.c index 864c80dbdb72..e1f26441b49a 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -745,18 +745,19 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } else ifindex = ro->ifindex; - if (ro->fd_frames) { - if (unlikely(size != CANFD_MTU && size != CAN_MTU)) - return -EINVAL; - } else { - if (unlikely(size != CAN_MTU)) - return -EINVAL; - } - dev = dev_get_by_index(sock_net(sk), ifindex); if (!dev) return -ENXIO; + err = -EINVAL; + if (ro->fd_frames && dev->mtu == CANFD_MTU) { + if (unlikely(size != CANFD_MTU && size != CAN_MTU)) + goto put_dev; + } else { + if (unlikely(size != CAN_MTU)) + goto put_dev; + } + skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv), msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) From 7d0724cecb0ebfcf161277fc4c44616a2f9d1e0c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 27 Oct 2018 10:36:54 +0200 Subject: [PATCH 0647/2608] can: hi311x: Use level-triggered interrupt commit f164d0204b1156a7e0d8d1622c1a8d25752befec upstream. If the hi3110 shares the SPI bus with another traffic-intensive device and packets are received in high volume (by a separate machine sending with "cangen -g 0 -i -x"), reception stops after a few minutes and the counter in /proc/interrupts stops incrementing. Bus state is "active". Bringing the interface down and back up reconvenes the reception. The issue is not observed when the hi3110 is the sole device on the SPI bus. Using a level-triggered interrupt makes the issue go away and lets the hi3110 successfully receive 2 GByte over the course of 5 days while a ks8851 Ethernet chip on the same SPI bus handles 6 GByte of traffic. Unfortunately the hi3110 datasheet is mum on the trigger type. The pin description on page 3 only specifies the polarity (active high): http://www.holtic.com/documents/371-hi-3110_v-rev-kpdf.do Cc: Mathias Duckeck Cc: Akshay Bhat Cc: Casey Fitzpatrick Signed-off-by: Lukas Wunner Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/net/can/holt_hi311x.txt | 2 +- drivers/net/can/spi/hi311x.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/can/holt_hi311x.txt b/Documentation/devicetree/bindings/net/can/holt_hi311x.txt index 23aa94eab207..4e0ec14f7abf 100644 --- a/Documentation/devicetree/bindings/net/can/holt_hi311x.txt +++ b/Documentation/devicetree/bindings/net/can/holt_hi311x.txt @@ -18,7 +18,7 @@ Example: reg = <1>; clocks = <&clk32m>; interrupt-parent = <&gpio4>; - interrupts = <13 IRQ_TYPE_EDGE_RISING>; + interrupts = <13 IRQ_TYPE_LEVEL_HIGH>; vdd-supply = <®5v0>; xceiver-supply = <®5v0>; }; diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index 53e320c92a8b..ddaf46239e39 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -760,7 +760,7 @@ static int hi3110_open(struct net_device *net) { struct hi3110_priv *priv = netdev_priv(net); struct spi_device *spi = priv->spi; - unsigned long flags = IRQF_ONESHOT | IRQF_TRIGGER_RISING; + unsigned long flags = IRQF_ONESHOT | IRQF_TRIGGER_HIGH; int ret; ret = open_candev(net); From 240ec6ca4b9a2f05bf1190e37c637820fcfbeecd Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 10 Sep 2018 09:39:03 -0700 Subject: [PATCH 0648/2608] IB/hfi1: Eliminate races in the SDMA send error path commit a0e0cb82804a6a21d9067022c2dfdf80d11da429 upstream. pq_update() can only be called in two places: from the completion function when the complete (npkts) sequence of packets has been submitted and processed, or from setup function if a subset of the packets were submitted (i.e. the error path). Currently both paths can call pq_update() if an error occurrs. This race will cause the n_req value to go negative, hanging file_close(), or cause a crash by freeing the txlist more than once. Several variables are used to determine SDMA send state. Most of these are unnecessary, and have code inspectible races between the setup function and the completion function, in both the send path and the error path. The request 'status' value can be set by the setup or by the completion function. This is code inspectibly racy. Since the status is not needed in the completion code or by the caller it has been removed. The request 'done' value races between usage by the setup and the completion function. The completion function does not need this. When the number of processed packets matches npkts, it is done. The 'has_error' value races between usage of the setup and the completion function. This can cause incorrect error handling and leave the n_req in an incorrect value (i.e. negative). Simplify the code by removing all of the unneeded state checks and variables. Clean up iovs node when it is freed. Eliminate race conditions in the error path: If all packets are submitted, the completion handler will set the completion status correctly (ok or aborted). If all packets are not submitted, the caller must wait until the submitted packets have completed, and then set the completion status. These two change eliminate the race condition in the error path. Reviewed-by: Mitko Haralanov Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/user_sdma.c | 85 ++++++++++++-------------- drivers/infiniband/hw/hfi1/user_sdma.h | 3 - 2 files changed, 38 insertions(+), 50 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 8c954a0ae3b6..c14ec04f2a89 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -328,7 +328,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, u8 opcode, sc, vl; u16 pkey; u32 slid; - int req_queued = 0; u16 dlid; u32 selector; @@ -392,7 +391,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->data_len = 0; req->pq = pq; req->cq = cq; - req->status = -1; req->ahg_idx = -1; req->iov_idx = 0; req->sent = 0; @@ -400,12 +398,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->seqcomp = 0; req->seqsubmitted = 0; req->tids = NULL; - req->done = 0; req->has_error = 0; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); + /* The request is initialized, count it */ + atomic_inc(&pq->n_reqs); + if (req_opcode(info.ctrl) == EXPECTED) { /* expected must have a TID info and at least one data vector */ if (req->data_iovs < 2) { @@ -500,7 +500,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, ret = pin_vector_pages(req, &req->iovs[i]); if (ret) { req->data_iovs = i; - req->status = ret; goto free_req; } req->data_len += req->iovs[i].iov.iov_len; @@ -561,14 +560,10 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->ahg_idx = sdma_ahg_alloc(req->sde); set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); - atomic_inc(&pq->n_reqs); - req_queued = 1; /* Send the first N packets in the request to buy us some time */ ret = user_sdma_send_pkts(req, pcount); - if (unlikely(ret < 0 && ret != -EBUSY)) { - req->status = ret; + if (unlikely(ret < 0 && ret != -EBUSY)) goto free_req; - } /* * It is possible that the SDMA engine would have processed all the @@ -588,14 +583,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, while (req->seqsubmitted != req->info.npkts) { ret = user_sdma_send_pkts(req, pcount); if (ret < 0) { - if (ret != -EBUSY) { - req->status = ret; - WRITE_ONCE(req->has_error, 1); - if (ACCESS_ONCE(req->seqcomp) == - req->seqsubmitted - 1) - goto free_req; - return ret; - } + if (ret != -EBUSY) + goto free_req; wait_event_interruptible_timeout( pq->busy.wait_dma, (pq->state == SDMA_PKT_Q_ACTIVE), @@ -606,10 +595,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, *count += idx; return 0; free_req: - user_sdma_free_request(req, true); - if (req_queued) + /* + * If the submitted seqsubmitted == npkts, the completion routine + * controls the final state. If sequbmitted < npkts, wait for any + * outstanding packets to finish before cleaning up. + */ + if (req->seqsubmitted < req->info.npkts) { + if (req->seqsubmitted) + wait_event(pq->busy.wait_dma, + (req->seqcomp == req->seqsubmitted - 1)); + user_sdma_free_request(req, true); pq_update(pq); - set_comp_state(pq, cq, info.comp_idx, ERROR, req->status); + set_comp_state(pq, cq, info.comp_idx, ERROR, ret); + } return ret; } @@ -917,7 +915,6 @@ dosend: ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count); req->seqsubmitted += count; if (req->seqsubmitted == req->info.npkts) { - WRITE_ONCE(req->done, 1); /* * The txreq has already been submitted to the HW queue * so we can free the AHG entry now. Corruption will not @@ -1347,11 +1344,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, return diff; } -/* - * SDMA tx request completion callback. Called when the SDMA progress - * state machine gets notification that the SDMA descriptors for this - * tx request have been processed by the DMA engine. Called in - * interrupt context. +/** + * user_sdma_txreq_cb() - SDMA tx request completion callback. + * @txreq: valid sdma tx request + * @status: success/failure of request + * + * Called when the SDMA progress state machine gets notification that + * the SDMA descriptors for this tx request have been processed by the + * DMA engine. Called in interrupt context. + * Only do work on completed sequences. */ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) { @@ -1360,7 +1361,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) struct user_sdma_request *req; struct hfi1_user_sdma_pkt_q *pq; struct hfi1_user_sdma_comp_q *cq; - u16 idx; + enum hfi1_sdma_comp_state state = COMPLETE; if (!tx->req) return; @@ -1373,31 +1374,19 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) SDMA_DBG(req, "SDMA completion with error %d", status); WRITE_ONCE(req->has_error, 1); + state = ERROR; } req->seqcomp = tx->seqnum; kmem_cache_free(pq->txreq_cache, tx); - tx = NULL; - idx = req->info.comp_idx; - if (req->status == -1 && status == SDMA_TXREQ_S_OK) { - if (req->seqcomp == req->info.npkts - 1) { - req->status = 0; - user_sdma_free_request(req, false); - pq_update(pq); - set_comp_state(pq, cq, idx, COMPLETE, 0); - } - } else { - if (status != SDMA_TXREQ_S_OK) - req->status = status; - if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) && - (READ_ONCE(req->done) || - READ_ONCE(req->has_error))) { - user_sdma_free_request(req, false); - pq_update(pq); - set_comp_state(pq, cq, idx, ERROR, req->status); - } - } + /* sequence isn't complete? We are done */ + if (req->seqcomp != req->info.npkts - 1) + return; + + user_sdma_free_request(req, false); + set_comp_state(pq, cq, req->info.comp_idx, state, status); + pq_update(pq); } static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq) @@ -1430,6 +1419,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) if (!node) continue; + req->iovs[i].node = NULL; + if (unpin) hfi1_mmu_rb_remove(req->pq->handler, &node->rb); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 9b8bb5634c0d..5af52334b7dc 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -196,8 +196,6 @@ struct user_sdma_request { /* Writeable fields shared with interrupt */ u64 seqcomp ____cacheline_aligned_in_smp; u64 seqsubmitted; - /* status of the last txreq completed */ - int status; /* Send side fields */ struct list_head txps ____cacheline_aligned_in_smp; @@ -219,7 +217,6 @@ struct user_sdma_request { u16 tididx; /* progress index moving along the iovs array */ u8 iov_idx; - u8 done; u8 has_error; struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; From 6dce186b48bbf6abbe207a9f18deaaa6b6363d22 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 23 Oct 2018 18:03:19 +0200 Subject: [PATCH 0649/2608] pinctrl: meson: fix pinconf bias disable [ Upstream commit e39f9dd8206ad66992ac0e6218ef1ba746f2cce9 ] If a bias is enabled on a pin of an Amlogic SoC, calling .pin_config_set() with PIN_CONFIG_BIAS_DISABLE will not disable the bias. Instead it will force a pull-down bias on the pin. Instead of the pull type register bank, the driver should access the pull enable register bank. Fixes: 6ac730951104 ("pinctrl: add driver for Amlogic Meson SoCs") Signed-off-by: Jerome Brunet Acked-by: Neil Armstrong Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/meson/pinctrl-meson.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index 66ed70c12733..6c43322dbb97 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -273,7 +273,7 @@ static int meson_pinconf_set(struct pinctrl_dev *pcdev, unsigned int pin, dev_dbg(pc->dev, "pin %u: disable bias\n", pin); meson_calc_reg_and_bit(bank, pin, REG_PULL, ®, &bit); - ret = regmap_update_bits(pc->reg_pull, reg, + ret = regmap_update_bits(pc->reg_pullen, reg, BIT(bit), 0); if (ret) return ret; From 6af32ab13852790718ed3fb63cd09fc4d3e8afa3 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 6 Nov 2018 19:49:34 -0600 Subject: [PATCH 0650/2608] KVM: PPC: Move and undef TRACE_INCLUDE_PATH/FILE [ Upstream commit 28c5bcf74fa07c25d5bd118d1271920f51ce2a98 ] TRACE_INCLUDE_PATH and TRACE_INCLUDE_FILE are used by , so like that #include, they should be outside #ifdef protection. They also need to be #undefed before defining, in case multiple trace headers are included by the same C file. This became the case on book3e after commit cf4a6085151a ("powerpc/mm: Add missing tracepoint for tlbie"), leading to the following build error: CC arch/powerpc/kvm/powerpc.o In file included from arch/powerpc/kvm/powerpc.c:51:0: arch/powerpc/kvm/trace.h:9:0: error: "TRACE_INCLUDE_PATH" redefined [-Werror] #define TRACE_INCLUDE_PATH . ^ In file included from arch/powerpc/kvm/../mm/mmu_decl.h:25:0, from arch/powerpc/kvm/powerpc.c:48: ./arch/powerpc/include/asm/trace.h:224:0: note: this is the location of the previous definition #define TRACE_INCLUDE_PATH asm ^ cc1: all warnings being treated as errors Reported-by: Christian Zigotzky Signed-off-by: Scott Wood Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/kvm/trace.h | 8 ++++++-- arch/powerpc/kvm/trace_booke.h | 9 +++++++-- arch/powerpc/kvm/trace_hv.h | 9 +++++++-- arch/powerpc/kvm/trace_pr.h | 9 +++++++-- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index 491b0f715d6b..ea1d7c808319 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -6,8 +6,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace /* * Tracepoint for guest mode entry. @@ -120,4 +118,10 @@ TRACE_EVENT(kvm_check_requests, #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace + #include diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h index ac640e81fdc5..3837842986aa 100644 --- a/arch/powerpc/kvm/trace_booke.h +++ b/arch/powerpc/kvm/trace_booke.h @@ -6,8 +6,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_booke -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_booke #define kvm_trace_symbol_exit \ {0, "CRITICAL"}, \ @@ -218,4 +216,11 @@ TRACE_EVENT(kvm_booke_queue_irqprio, #endif /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_booke + #include diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index bcfe8a987f6a..8a1e3b0047f1 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -9,8 +9,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_hv -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_hv #define kvm_trace_symbol_hcall \ {H_REMOVE, "H_REMOVE"}, \ @@ -497,4 +495,11 @@ TRACE_EVENT(kvmppc_run_vcpu_exit, #endif /* _TRACE_KVM_HV_H */ /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_hv + #include diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h index 85785a370c0e..256530eb1354 100644 --- a/arch/powerpc/kvm/trace_pr.h +++ b/arch/powerpc/kvm/trace_pr.h @@ -8,8 +8,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_pr -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_pr TRACE_EVENT(kvm_book3s_reenter, TP_PROTO(int r, struct kvm_vcpu *vcpu), @@ -272,4 +270,11 @@ TRACE_EVENT(kvm_unmap_hva, #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_pr + #include From abaf1eb8132120f80071137cf5421a1c1eb7d720 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Mon, 5 Nov 2018 00:59:28 +0000 Subject: [PATCH 0651/2608] cpufreq: imx6q: add return value check for voltage scale [ Upstream commit 6ef28a04d1ccf718eee069b72132ce4aa1e52ab9 ] Add return value check for voltage scale when ARM clock rate change fail. Signed-off-by: Anson Huang Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpufreq/imx6q-cpufreq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 14466a9b01c0..63d28323a29c 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -135,8 +135,13 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) /* Ensure the arm clock divider is what we expect */ ret = clk_set_rate(arm_clk, new_freq * 1000); if (ret) { + int ret1; + dev_err(cpu_dev, "failed to set clock rate: %d\n", ret); - regulator_set_voltage_tol(arm_reg, volt_old, 0); + ret1 = regulator_set_voltage_tol(arm_reg, volt_old, 0); + if (ret1) + dev_warn(cpu_dev, + "failed to restore vddarm voltage: %d\n", ret1); return ret; } From 6e4fbdc7a308957c98d4599255a7a700ec92ca1c Mon Sep 17 00:00:00 2001 From: Xulin Sun Date: Tue, 6 Nov 2018 16:42:19 +0800 Subject: [PATCH 0652/2608] rtc: pcf2127: fix a kmemleak caused in pcf2127_i2c_gather_write [ Upstream commit 9bde0afb7a906f1dabdba37162551565740b862d ] pcf2127_i2c_gather_write() allocates memory as local variable for i2c_master_send(), after finishing the master transfer, the allocated memory should be freed. The kmemleak is reported: unreferenced object 0xffff80231e7dba80 (size 64): comm "hwclock", pid 27762, jiffies 4296880075 (age 356.944s) hex dump (first 32 bytes): 03 00 12 03 19 02 11 13 00 80 98 18 00 00 ff ff ................ 00 50 00 00 00 00 00 00 02 00 00 00 00 00 00 00 .P.............. backtrace: [] create_object+0xf8/0x278 [] kmemleak_alloc+0x74/0xa0 [] __kmalloc+0x1ac/0x348 [] pcf2127_i2c_gather_write+0x54/0xf8 [] _regmap_raw_write+0x464/0x850 [] regmap_bulk_write+0x1a4/0x348 [] pcf2127_rtc_set_time+0xac/0xe8 [] rtc_set_time+0x80/0x138 [] rtc_dev_ioctl+0x398/0x610 [] do_vfs_ioctl+0xb0/0x848 [] SyS_ioctl+0x8c/0xa8 [] el0_svc_naked+0x34/0x38 [] 0xffffffffffffffff Signed-off-by: Xulin Sun Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-pcf2127.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index f33447c5db85..9f1b14bf91ae 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -248,6 +248,9 @@ static int pcf2127_i2c_gather_write(void *context, memcpy(buf + 1, val, val_size); ret = i2c_master_send(client, buf, val_size + 1); + + kfree(buf); + if (ret != val_size + 1) return ret < 0 ? ret : -EIO; From 66da887d8732b75b9dccc92b75d3972381fe62c7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 8 Nov 2018 23:55:16 +0100 Subject: [PATCH 0653/2608] crypto: simd - correctly take reqsize of wrapped skcipher into account [ Upstream commit 508a1c4df085a547187eed346f1bfe5e381797f1 ] The simd wrapper's skcipher request context structure consists of a single subrequest whose size is taken from the subordinate skcipher. However, in simd_skcipher_init(), the reqsize that is retrieved is not from the subordinate skcipher but from the cryptd request structure, whose size is completely unrelated to the actual wrapped skcipher. Reported-by: Qian Cai Signed-off-by: Ard Biesheuvel Tested-by: Qian Cai Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/simd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/simd.c b/crypto/simd.c index 88203370a62f..894c62944106 100644 --- a/crypto/simd.c +++ b/crypto/simd.c @@ -126,8 +126,9 @@ static int simd_skcipher_init(struct crypto_skcipher *tfm) ctx->cryptd_tfm = cryptd_tfm; - reqsize = sizeof(struct skcipher_request); - reqsize += crypto_skcipher_reqsize(&cryptd_tfm->base); + reqsize = crypto_skcipher_reqsize(cryptd_skcipher_child(cryptd_tfm)); + reqsize = max(reqsize, crypto_skcipher_reqsize(&cryptd_tfm->base)); + reqsize += sizeof(struct skcipher_request); crypto_skcipher_set_reqsize(tfm, reqsize); From 5565c30a0bc770764cd085db881bc7a9f5e9d63c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Nov 2018 15:58:40 -0700 Subject: [PATCH 0654/2608] floppy: fix race condition in __floppy_read_block_0() [ Upstream commit de7b75d82f70c5469675b99ad632983c50b6f7e7 ] LKP recently reported a hang at bootup in the floppy code: [ 245.678853] INFO: task mount:580 blocked for more than 120 seconds. [ 245.679906] Tainted: G T 4.19.0-rc6-00172-ga9f38e1 #1 [ 245.680959] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 245.682181] mount D 6372 580 1 0x00000004 [ 245.683023] Call Trace: [ 245.683425] __schedule+0x2df/0x570 [ 245.683975] schedule+0x2d/0x80 [ 245.684476] schedule_timeout+0x19d/0x330 [ 245.685090] ? wait_for_common+0xa5/0x170 [ 245.685735] wait_for_common+0xac/0x170 [ 245.686339] ? do_sched_yield+0x90/0x90 [ 245.686935] wait_for_completion+0x12/0x20 [ 245.687571] __floppy_read_block_0+0xfb/0x150 [ 245.688244] ? floppy_resume+0x40/0x40 [ 245.688844] floppy_revalidate+0x20f/0x240 [ 245.689486] check_disk_change+0x43/0x60 [ 245.690087] floppy_open+0x1ea/0x360 [ 245.690653] __blkdev_get+0xb4/0x4d0 [ 245.691212] ? blkdev_get+0x1db/0x370 [ 245.691777] blkdev_get+0x1f3/0x370 [ 245.692351] ? path_put+0x15/0x20 [ 245.692871] ? lookup_bdev+0x4b/0x90 [ 245.693539] blkdev_get_by_path+0x3d/0x80 [ 245.694165] mount_bdev+0x2a/0x190 [ 245.694695] squashfs_mount+0x10/0x20 [ 245.695271] ? squashfs_alloc_inode+0x30/0x30 [ 245.695960] mount_fs+0xf/0x90 [ 245.696451] vfs_kern_mount+0x43/0x130 [ 245.697036] do_mount+0x187/0xc40 [ 245.697563] ? memdup_user+0x28/0x50 [ 245.698124] ksys_mount+0x60/0xc0 [ 245.698639] sys_mount+0x19/0x20 [ 245.699167] do_int80_syscall_32+0x61/0x130 [ 245.699813] entry_INT80_32+0xc7/0xc7 showing that we never complete that read request. The reason is that the completion setup is racy - it initializes the completion event AFTER submitting the IO, which means that the IO could complete before/during the init. If it does, we are passing garbage to complete() and we may sleep forever waiting for the event to occur. Fixes: 7b7b68bba5ef ("floppy: bail out in open() if drive is not responding to block0 read") Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/floppy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 3d0287e212fe..a7f212ea17bf 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4146,10 +4146,11 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive) bio.bi_end_io = floppy_rb0_cb; bio_set_op_attrs(&bio, REQ_OP_READ, 0); + init_completion(&cbdata.complete); + submit_bio(&bio); process_fd_request(); - init_completion(&cbdata.complete); wait_for_completion(&cbdata.complete); __free_page(page); From d5e236ba5bcdd95751980558b7af5519d05e19d1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 6 Nov 2018 23:37:58 +1100 Subject: [PATCH 0655/2608] powerpc/io: Fix the IO workarounds code to work with Radix [ Upstream commit 43c6494fa1499912c8177e71450c0279041152a6 ] Back in 2006 Ben added some workarounds for a misbehaviour in the Spider IO bridge used on early Cell machines, see commit 014da7ff47b5 ("[POWERPC] Cell "Spider" MMIO workarounds"). Later these were made to be generic, ie. not tied specifically to Spider. The code stashes a token in the high bits (59-48) of virtual addresses used for IO (eg. returned from ioremap()). This works fine when using the Hash MMU, but when we're using the Radix MMU the bits used for the token overlap with some of the bits of the virtual address. This is because the maximum virtual address is larger with Radix, up to c00fffffffffffff, and in fact we use that high part of the address range for ioremap(), see RADIX_KERN_IO_START. As it happens the bits that are used overlap with the bits that differentiate an IO address vs a linear map address. If the resulting address lies outside the linear mapping we will crash (see below), if not we just corrupt memory. virtio-pci 0000:00:00.0: Using 64-bit direct DMA at offset 800000000000000 Unable to handle kernel paging request for data at address 0xc000000080000014 ... CFAR: c000000000626b98 DAR: c000000080000014 DSISR: 42000000 IRQMASK: 0 GPR00: c0000000006c54fc c00000003e523378 c0000000016de600 0000000000000000 GPR04: c00c000080000014 0000000000000007 0fffffff000affff 0000000000000030 ^^^^ ... NIP [c000000000626c5c] .iowrite8+0xec/0x100 LR [c0000000006c992c] .vp_reset+0x2c/0x90 Call Trace: .pci_bus_read_config_dword+0xc4/0x120 (unreliable) .register_virtio_device+0x13c/0x1c0 .virtio_pci_probe+0x148/0x1f0 .local_pci_probe+0x68/0x140 .pci_device_probe+0x164/0x220 .really_probe+0x274/0x3b0 .driver_probe_device+0x80/0x170 .__driver_attach+0x14c/0x150 .bus_for_each_dev+0xb8/0x130 .driver_attach+0x34/0x50 .bus_add_driver+0x178/0x2f0 .driver_register+0x90/0x1a0 .__pci_register_driver+0x6c/0x90 .virtio_pci_driver_init+0x2c/0x40 .do_one_initcall+0x64/0x280 .kernel_init_freeable+0x36c/0x474 .kernel_init+0x24/0x160 .ret_from_kernel_thread+0x58/0x7c This hasn't been a problem because CONFIG_PPC_IO_WORKAROUNDS which enables this code is usually not enabled. It is only enabled when it's selected by PPC_CELL_NATIVE which is only selected by PPC_IBM_CELL_BLADE and that in turn depends on BIG_ENDIAN. So in order to hit the bug you need to build a big endian kernel, with IBM Cell Blade support enabled, as well as Radix MMU support, and then boot that on Power9 using Radix MMU. Still we can fix the bug, so let's do that. We simply use fewer bits for the token, taking the union of the restrictions on the address from both Hash and Radix, we end up with 8 bits we can use for the token. The only user of the token is iowa_mem_find_bus() which only supports 8 token values, so 8 bits is plenty for that. Fixes: 566ca99af026 ("powerpc/mm/radix: Add dummy radix_enabled()") Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/io.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 422f99cf9924..e6d33eed8202 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -287,19 +287,13 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src, * their hooks, a bitfield is reserved for use by the platform near the * top of MMIO addresses (not PIO, those have to cope the hard way). * - * This bit field is 12 bits and is at the top of the IO virtual - * addresses PCI_IO_INDIRECT_TOKEN_MASK. + * The highest address in the kernel virtual space are: * - * The kernel virtual space is thus: + * d0003fffffffffff # with Hash MMU + * c00fffffffffffff # with Radix MMU * - * 0xD000000000000000 : vmalloc - * 0xD000080000000000 : PCI PHB IO space - * 0xD000080080000000 : ioremap - * 0xD0000fffffffffff : end of ioremap region - * - * Since the top 4 bits are reserved as the region ID, we use thus - * the next 12 bits and keep 4 bits available for the future if the - * virtual address space is ever to be extended. + * The top 4 bits are reserved as the region ID on hash, leaving us 8 bits + * that can be used for the field. * * The direct IO mapping operations will then mask off those bits * before doing the actual access, though that only happen when @@ -311,8 +305,8 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src, */ #ifdef CONFIG_PPC_INDIRECT_MMIO -#define PCI_IO_IND_TOKEN_MASK 0x0fff000000000000ul -#define PCI_IO_IND_TOKEN_SHIFT 48 +#define PCI_IO_IND_TOKEN_SHIFT 52 +#define PCI_IO_IND_TOKEN_MASK (0xfful << PCI_IO_IND_TOKEN_SHIFT) #define PCI_FIX_ADDR(addr) \ ((PCI_IO_ADDR)(((unsigned long)(addr)) & ~PCI_IO_IND_TOKEN_MASK)) #define PCI_GET_ADDR_TOKEN(addr) \ From e1f0f55f27883820021a6cbc69deba0576098d0d Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 19 Oct 2018 10:04:18 -0700 Subject: [PATCH 0656/2608] perf/x86/intel/uncore: Add more IMC PCI IDs for KabyLake and CoffeeLake CPUs [ Upstream commit c10a8de0d32e95b0b8c7c17b6dc09baea5a5a899 ] KabyLake and CoffeeLake CPUs have the same client uncore events as SkyLake. Add the PCI IDs for the KabyLake Y, U, S processor lines and CoffeeLake U, H, S processor lines. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20181019170419.378-1-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/events/intel/uncore_snb.c | 115 ++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index aee5e8496be4..aa4e6f4e6a01 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -15,6 +15,25 @@ #define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 #define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f #define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c +#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 +#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 +#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f +#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f +#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc +#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0 +#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10 +#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4 +#define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC 0x3e0f +#define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC 0x3e1f +#define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC 0x3ec2 +#define PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC 0x3e30 +#define PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC 0x3e18 +#define PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC 0x3ec6 +#define PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC 0x3e31 +#define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33 +#define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca +#define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32 /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -632,7 +651,82 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, - + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_UQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4H_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6H_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -681,6 +775,25 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ + IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver), /* 7th Gen Core Y */ + IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U */ + IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */ + IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Dual Core */ + IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Quad Core */ + IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 2 Cores */ + IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 4 Cores */ + IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 4 Cores */ + IMC_DEV(CFL_6H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 6 Cores */ + IMC_DEV(CFL_2S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 2 Cores Desktop */ + IMC_DEV(CFL_4S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Desktop */ + IMC_DEV(CFL_6S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Desktop */ + IMC_DEV(CFL_8S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Desktop */ + IMC_DEV(CFL_4S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Work Station */ + IMC_DEV(CFL_6S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Work Station */ + IMC_DEV(CFL_8S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Work Station */ + IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */ + IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */ + IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */ { /* end marker */ } }; From 677805a95934eb37bcc2482766d6160e6d216d98 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Nov 2018 16:06:51 -0500 Subject: [PATCH 0657/2608] SUNRPC: Fix a bogus get/put in generic_key_to_expire() [ Upstream commit e3d5e573a54dabdc0f9f3cb039d799323372b251 ] Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- net/sunrpc/auth_generic.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index f1df9837f1ac..1ac08dcbf85d 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -281,13 +281,7 @@ static bool generic_key_to_expire(struct rpc_cred *cred) { struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; - bool ret; - - get_rpccred(cred); - ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); - put_rpccred(cred); - - return ret; + return test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); } static const struct rpc_credops generic_credops = { From 281a4f41c6be4f6c09859f1877ab7a8b294ead62 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 20 Sep 2018 08:59:14 -0400 Subject: [PATCH 0658/2608] kdb: Use strscpy with destination buffer size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c2b94c72d93d0929f48157eef128c4f9d2e603ce ] gcc 8.1.0 warns with: kernel/debug/kdb/kdb_support.c: In function ‘kallsyms_symbol_next’: kernel/debug/kdb/kdb_support.c:239:4: warning: ‘strncpy’ specified bound depends on the length of the source argument [-Wstringop-overflow=] strncpy(prefix_name, name, strlen(name)+1); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/debug/kdb/kdb_support.c:239:31: note: length computed here Use strscpy() with the destination buffer size, and use ellipses when displaying truncated symbols. v2: Use strscpy() Signed-off-by: Prarit Bhargava Cc: Jonathan Toppins Cc: Jason Wessel Cc: Daniel Thompson Cc: kgdb-bugreport@lists.sourceforge.net Reviewed-by: Daniel Thompson Signed-off-by: Daniel Thompson Signed-off-by: Sasha Levin --- kernel/debug/kdb/kdb_io.c | 15 +++++++++------ kernel/debug/kdb/kdb_private.h | 2 +- kernel/debug/kdb/kdb_support.c | 10 +++++----- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index ed5d34925ad0..6a4b41484afe 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -216,7 +216,7 @@ static char *kdb_read(char *buffer, size_t bufsize) int count; int i; int diag, dtab_count; - int key; + int key, buf_size, ret; diag = kdbgetintenv("DTABCOUNT", &dtab_count); @@ -336,9 +336,8 @@ poll_again: else p_tmp = tmpbuffer; len = strlen(p_tmp); - count = kallsyms_symbol_complete(p_tmp, - sizeof(tmpbuffer) - - (p_tmp - tmpbuffer)); + buf_size = sizeof(tmpbuffer) - (p_tmp - tmpbuffer); + count = kallsyms_symbol_complete(p_tmp, buf_size); if (tab == 2 && count > 0) { kdb_printf("\n%d symbols are found.", count); if (count > dtab_count) { @@ -350,9 +349,13 @@ poll_again: } kdb_printf("\n"); for (i = 0; i < count; i++) { - if (WARN_ON(!kallsyms_symbol_next(p_tmp, i))) + ret = kallsyms_symbol_next(p_tmp, i, buf_size); + if (WARN_ON(!ret)) break; - kdb_printf("%s ", p_tmp); + if (ret != -E2BIG) + kdb_printf("%s ", p_tmp); + else + kdb_printf("%s... ", p_tmp); *(p_tmp + len) = '\0'; } if (i >= dtab_count) diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index fc224fbcf954..f2158e463a0f 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -83,7 +83,7 @@ typedef struct __ksymtab { unsigned long sym_start; unsigned long sym_end; } kdb_symtab_t; -extern int kallsyms_symbol_next(char *prefix_name, int flag); +extern int kallsyms_symbol_next(char *prefix_name, int flag, int buf_size); extern int kallsyms_symbol_complete(char *prefix_name, int max_len); /* Exported Symbols for kernel loadable modules to use. */ diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 84422d2b95c0..014f6fbb3832 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -221,11 +221,13 @@ int kallsyms_symbol_complete(char *prefix_name, int max_len) * Parameters: * prefix_name prefix of a symbol name to lookup * flag 0 means search from the head, 1 means continue search. + * buf_size maximum length that can be written to prefix_name + * buffer * Returns: * 1 if a symbol matches the given prefix. * 0 if no string found */ -int kallsyms_symbol_next(char *prefix_name, int flag) +int kallsyms_symbol_next(char *prefix_name, int flag, int buf_size) { int prefix_len = strlen(prefix_name); static loff_t pos; @@ -235,10 +237,8 @@ int kallsyms_symbol_next(char *prefix_name, int flag) pos = 0; while ((name = kdb_walk_kallsyms(&pos))) { - if (strncmp(name, prefix_name, prefix_len) == 0) { - strncpy(prefix_name, name, strlen(name)+1); - return 1; - } + if (!strncmp(name, prefix_name, prefix_len)) + return strscpy(prefix_name, name, buf_size); } return 0; } From 2a968a024116dc75b9488744a4711b0166ac3172 Mon Sep 17 00:00:00 2001 From: Satheesh Rajendran Date: Thu, 8 Nov 2018 10:47:56 +0530 Subject: [PATCH 0659/2608] powerpc/numa: Suppress "VPHN is not supported" messages [ Upstream commit 437ccdc8ce629470babdda1a7086e2f477048cbd ] When VPHN function is not supported and during cpu hotplug event, kernel prints message 'VPHN function not supported. Disabling polling...'. Currently it prints on every hotplug event, it floods dmesg when a KVM guest tries to hotplug huge number of vcpus, let's just print once and suppress further kernel prints. Signed-off-by: Satheesh Rajendran Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9fead0796364..40fb9a8835fe 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1261,7 +1261,7 @@ static long vphn_get_associativity(unsigned long cpu, switch (rc) { case H_FUNCTION: - printk(KERN_INFO + printk_once(KERN_INFO "VPHN is not supported. Disabling polling...\n"); stop_topology_update(); break; From d6bfb89267efe3ae5eef1fc11984fae3dcd95137 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 14 Nov 2018 09:55:41 -0800 Subject: [PATCH 0660/2608] efi/arm: Revert deferred unmap of early memmap mapping [ Upstream commit 33412b8673135b18ea42beb7f5117ed0091798b6 ] Commit: 3ea86495aef2 ("efi/arm: preserve early mapping of UEFI memory map longer for BGRT") deferred the unmap of the early mapping of the UEFI memory map to accommodate the ACPI BGRT code, which looks up the memory type that backs the BGRT table to validate it against the requirements of the UEFI spec. Unfortunately, this causes problems on ARM, which does not permit early mappings to persist after paging_init() is called, resulting in a WARN() splat. Since we don't support the BGRT table on ARM anway, let's revert ARM to the old behaviour, which is to take down the early mapping at the end of efi_init(). Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 3ea86495aef2 ("efi/arm: preserve early mapping of UEFI memory ...") Link: http://lkml.kernel.org/r/20181114175544.12860-3-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- drivers/firmware/efi/arm-init.c | 4 ++++ drivers/firmware/efi/arm-runtime.c | 2 +- drivers/firmware/efi/memmap.c | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index a7c522eac640..312f9f32e168 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -265,6 +265,10 @@ void __init efi_init(void) (params.mmap & ~PAGE_MASK))); init_screen_info(); + + /* ARM does not permit early mappings to persist across paging_init() */ + if (IS_ENABLED(CONFIG_ARM)) + efi_memmap_unmap(); } static int __init register_gop_device(void) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 8995a48bd067..ad1530aff633 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -122,7 +122,7 @@ static int __init arm_enable_runtime_services(void) { u64 mapsize; - if (!efi_enabled(EFI_BOOT) || !efi_enabled(EFI_MEMMAP)) { + if (!efi_enabled(EFI_BOOT)) { pr_info("EFI services will not be available.\n"); return 0; } diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index 5fc70520e04c..1907db2b38d8 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -118,6 +118,9 @@ int __init efi_memmap_init_early(struct efi_memory_map_data *data) void __init efi_memmap_unmap(void) { + if (!efi_enabled(EFI_MEMMAP)) + return; + if (!efi.memmap.late) { unsigned long size; From e28ae7aaa9920570820c1af795663df317946ad7 Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Fri, 16 Nov 2018 15:07:56 -0800 Subject: [PATCH 0661/2608] z3fold: fix possible reclaim races [ Upstream commit ca0246bb97c23da9d267c2107c07fb77e38205c9 ] Reclaim and free can race on an object which is basically fine but in order for reclaim to be able to map "freed" object we need to encode object length in the handle. handle_to_chunks() is then introduced to extract object length from a handle and use it during mapping. Moreover, to avoid racing on a z3fold "headless" page release, we should not try to free that page in z3fold_free() if the reclaim bit is set. Also, in the unlikely case of trying to reclaim a page being freed, we should not proceed with that page. While at it, fix the page accounting in reclaim function. This patch supersedes "[PATCH] z3fold: fix reclaim lock-ups". Link: http://lkml.kernel.org/r/20181105162225.74e8837d03583a9b707cf559@gmail.com Signed-off-by: Vitaly Wool Signed-off-by: Jongseok Kim Reported-by-by: Jongseok Kim Reviewed-by: Snild Dolkow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/z3fold.c | 103 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 63 insertions(+), 40 deletions(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index f33403d718ac..2813cdfa46b9 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -99,6 +99,7 @@ struct z3fold_header { #define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) #define BUDDY_MASK (0x3) +#define BUDDY_SHIFT 2 /** * struct z3fold_pool - stores metadata for each z3fold pool @@ -145,7 +146,7 @@ enum z3fold_page_flags { MIDDLE_CHUNK_MAPPED, NEEDS_COMPACTING, PAGE_STALE, - UNDER_RECLAIM + PAGE_CLAIMED, /* by either reclaim or free */ }; /***************** @@ -174,7 +175,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page, clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); clear_bit(NEEDS_COMPACTING, &page->private); clear_bit(PAGE_STALE, &page->private); - clear_bit(UNDER_RECLAIM, &page->private); + clear_bit(PAGE_CLAIMED, &page->private); spin_lock_init(&zhdr->page_lock); kref_init(&zhdr->refcount); @@ -223,8 +224,11 @@ static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) unsigned long handle; handle = (unsigned long)zhdr; - if (bud != HEADLESS) - handle += (bud + zhdr->first_num) & BUDDY_MASK; + if (bud != HEADLESS) { + handle |= (bud + zhdr->first_num) & BUDDY_MASK; + if (bud == LAST) + handle |= (zhdr->last_chunks << BUDDY_SHIFT); + } return handle; } @@ -234,6 +238,12 @@ static struct z3fold_header *handle_to_z3fold_header(unsigned long handle) return (struct z3fold_header *)(handle & PAGE_MASK); } +/* only for LAST bud, returns zero otherwise */ +static unsigned short handle_to_chunks(unsigned long handle) +{ + return (handle & ~PAGE_MASK) >> BUDDY_SHIFT; +} + /* * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle * but that doesn't matter. because the masking will result in the @@ -717,37 +727,39 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) page = virt_to_page(zhdr); if (test_bit(PAGE_HEADLESS, &page->private)) { - /* HEADLESS page stored */ - bud = HEADLESS; - } else { - z3fold_page_lock(zhdr); - bud = handle_to_buddy(handle); - - switch (bud) { - case FIRST: - zhdr->first_chunks = 0; - break; - case MIDDLE: - zhdr->middle_chunks = 0; - zhdr->start_middle = 0; - break; - case LAST: - zhdr->last_chunks = 0; - break; - default: - pr_err("%s: unknown bud %d\n", __func__, bud); - WARN_ON(1); - z3fold_page_unlock(zhdr); - return; + /* if a headless page is under reclaim, just leave. + * NB: we use test_and_set_bit for a reason: if the bit + * has not been set before, we release this page + * immediately so we don't care about its value any more. + */ + if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) { + spin_lock(&pool->lock); + list_del(&page->lru); + spin_unlock(&pool->lock); + free_z3fold_page(page); + atomic64_dec(&pool->pages_nr); } + return; } - if (bud == HEADLESS) { - spin_lock(&pool->lock); - list_del(&page->lru); - spin_unlock(&pool->lock); - free_z3fold_page(page); - atomic64_dec(&pool->pages_nr); + /* Non-headless case */ + z3fold_page_lock(zhdr); + bud = handle_to_buddy(handle); + + switch (bud) { + case FIRST: + zhdr->first_chunks = 0; + break; + case MIDDLE: + zhdr->middle_chunks = 0; + break; + case LAST: + zhdr->last_chunks = 0; + break; + default: + pr_err("%s: unknown bud %d\n", __func__, bud); + WARN_ON(1); + z3fold_page_unlock(zhdr); return; } @@ -755,7 +767,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) atomic64_dec(&pool->pages_nr); return; } - if (test_bit(UNDER_RECLAIM, &page->private)) { + if (test_bit(PAGE_CLAIMED, &page->private)) { z3fold_page_unlock(zhdr); return; } @@ -833,20 +845,30 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) } list_for_each_prev(pos, &pool->lru) { page = list_entry(pos, struct page, lru); - if (test_bit(PAGE_HEADLESS, &page->private)) - /* candidate found */ - break; + + /* this bit could have been set by free, in which case + * we pass over to the next page in the pool. + */ + if (test_and_set_bit(PAGE_CLAIMED, &page->private)) + continue; zhdr = page_address(page); - if (!z3fold_page_trylock(zhdr)) + if (test_bit(PAGE_HEADLESS, &page->private)) + break; + + if (!z3fold_page_trylock(zhdr)) { + zhdr = NULL; continue; /* can't evict at this point */ + } kref_get(&zhdr->refcount); list_del_init(&zhdr->buddy); zhdr->cpu = -1; - set_bit(UNDER_RECLAIM, &page->private); break; } + if (!zhdr) + break; + list_del_init(&page->lru); spin_unlock(&pool->lock); @@ -895,6 +917,7 @@ next: if (test_bit(PAGE_HEADLESS, &page->private)) { if (ret == 0) { free_z3fold_page(page); + atomic64_dec(&pool->pages_nr); return 0; } spin_lock(&pool->lock); @@ -902,7 +925,7 @@ next: spin_unlock(&pool->lock); } else { z3fold_page_lock(zhdr); - clear_bit(UNDER_RECLAIM, &page->private); + clear_bit(PAGE_CLAIMED, &page->private); if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { atomic64_dec(&pool->pages_nr); @@ -961,7 +984,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) set_bit(MIDDLE_CHUNK_MAPPED, &page->private); break; case LAST: - addr += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); + addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); break; default: pr_err("unknown buddy id %d\n", buddy); From a84c872c8b306e11c95c4912f4bed7f567e901b4 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Fri, 16 Nov 2018 15:08:39 -0800 Subject: [PATCH 0662/2608] tmpfs: make lseek(SEEK_DATA/SEK_HOLE) return ENXIO with a negative offset [ Upstream commit 1a413646931cb14442065cfc17561e50f5b5bb44 ] Other filesystems such as ext4, f2fs and ubifs all return ENXIO when lseek (SEEK_DATA or SEEK_HOLE) requests a negative offset. man 2 lseek says : EINVAL whence is not valid. Or: the resulting file offset would be : negative, or beyond the end of a seekable device. : : ENXIO whence is SEEK_DATA or SEEK_HOLE, and the file offset is beyond : the end of the file. Make tmpfs return ENXIO under these circumstances as well. After this, tmpfs also passes xfstests's generic/448. [akpm@linux-foundation.org: rewrite changelog] Link: http://lkml.kernel.org/r/1540434176-14349-1-git-send-email-yuyufen@huawei.com Signed-off-by: Yufen Yu Reviewed-by: Andrew Morton Cc: Al Viro Cc: Hugh Dickins Cc: William Kucharski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/shmem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index ea786a504e1b..fa08f56fd5e5 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2590,9 +2590,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) inode_lock(inode); /* We're holding i_mutex so we can access i_size directly */ - if (offset < 0) - offset = -EINVAL; - else if (offset >= inode->i_size) + if (offset < 0 || offset >= inode->i_size) offset = -ENXIO; else { start = offset >> PAGE_SHIFT; From c6a4b3c3b81b818dc0138c813fd7656aeb78aa11 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 16 Nov 2018 15:08:53 -0800 Subject: [PATCH 0663/2608] mm, page_alloc: check for max order in hot path [ Upstream commit c63ae43ba53bc432b414fd73dd5f4b01fcb1ab43 ] Konstantin has noticed that kvmalloc might trigger the following warning: WARNING: CPU: 0 PID: 6676 at mm/vmstat.c:986 __fragmentation_index+0x54/0x60 [...] Call Trace: fragmentation_index+0x76/0x90 compaction_suitable+0x4f/0xf0 shrink_node+0x295/0x310 node_reclaim+0x205/0x250 get_page_from_freelist+0x649/0xad0 __alloc_pages_nodemask+0x12a/0x2a0 kmalloc_large_node+0x47/0x90 __kmalloc_node+0x22b/0x2e0 kvmalloc_node+0x3e/0x70 xt_alloc_table_info+0x3a/0x80 [x_tables] do_ip6t_set_ctl+0xcd/0x1c0 [ip6_tables] nf_setsockopt+0x44/0x60 SyS_setsockopt+0x6f/0xc0 do_syscall_64+0x67/0x120 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 the problem is that we only check for an out of bound order in the slow path and the node reclaim might happen from the fast path already. This is fixable by making sure that kvmalloc doesn't ever use kmalloc for requests that are larger than KMALLOC_MAX_SIZE but this also shows that the code is rather fragile. A recent UBSAN report just underlines that by the following report UBSAN: Undefined behaviour in mm/page_alloc.c:3117:19 shift exponent 51 is too large for 32-bit type 'int' CPU: 0 PID: 6520 Comm: syz-executor1 Not tainted 4.19.0-rc2 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xd2/0x148 lib/dump_stack.c:113 ubsan_epilogue+0x12/0x94 lib/ubsan.c:159 __ubsan_handle_shift_out_of_bounds+0x2b6/0x30b lib/ubsan.c:425 __zone_watermark_ok+0x2c7/0x400 mm/page_alloc.c:3117 zone_watermark_fast mm/page_alloc.c:3216 [inline] get_page_from_freelist+0xc49/0x44c0 mm/page_alloc.c:3300 __alloc_pages_nodemask+0x21e/0x640 mm/page_alloc.c:4370 alloc_pages_current+0xcc/0x210 mm/mempolicy.c:2093 alloc_pages include/linux/gfp.h:509 [inline] __get_free_pages+0x12/0x60 mm/page_alloc.c:4414 dma_mem_alloc+0x36/0x50 arch/x86/include/asm/floppy.h:156 raw_cmd_copyin drivers/block/floppy.c:3159 [inline] raw_cmd_ioctl drivers/block/floppy.c:3206 [inline] fd_locked_ioctl+0xa00/0x2c10 drivers/block/floppy.c:3544 fd_ioctl+0x40/0x60 drivers/block/floppy.c:3571 __blkdev_driver_ioctl block/ioctl.c:303 [inline] blkdev_ioctl+0xb3c/0x1a30 block/ioctl.c:601 block_ioctl+0x105/0x150 fs/block_dev.c:1883 vfs_ioctl fs/ioctl.c:46 [inline] do_vfs_ioctl+0x1c0/0x1150 fs/ioctl.c:687 ksys_ioctl+0x9e/0xb0 fs/ioctl.c:702 __do_sys_ioctl fs/ioctl.c:709 [inline] __se_sys_ioctl fs/ioctl.c:707 [inline] __x64_sys_ioctl+0x7e/0xc0 fs/ioctl.c:707 do_syscall_64+0xc4/0x510 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Note that this is not a kvmalloc path. It is just that the fast path really depends on having sanitzed order as well. Therefore move the order check to the fast path. Link: http://lkml.kernel.org/r/20181113094305.GM15120@dhcp22.suse.cz Signed-off-by: Michal Hocko Reported-by: Konstantin Khlebnikov Reported-by: Kyungtae Kim Acked-by: Vlastimil Babka Cc: Balbir Singh Cc: Mel Gorman Cc: Pavel Tatashin Cc: Oscar Salvador Cc: Mike Rapoport Cc: Aaron Lu Cc: Joonsoo Kim Cc: Byoungyoung Lee Cc: "Dae R. Jeong" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/page_alloc.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a604b5da6755..2074f424dabf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3867,17 +3867,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, unsigned int cpuset_mems_cookie; int reserve_flags; - /* - * In the slowpath, we sanity check order to avoid ever trying to - * reclaim >= MAX_ORDER areas which will never succeed. Callers may - * be using allocators in order of preference for an area that is - * too large. - */ - if (order >= MAX_ORDER) { - WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); - return NULL; - } - /* * We also sanity check to catch abuse of atomic reserves being used by * callers that are not in atomic context. @@ -4179,6 +4168,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */ struct alloc_context ac = { }; + /* + * There are several places where we assume that the order value is sane + * so bail out early if the request is out of bound. + */ + if (unlikely(order >= MAX_ORDER)) { + WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); + return NULL; + } + gfp_mask &= gfp_allowed_mask; alloc_mask = gfp_mask; if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags)) From c1ede7043d74de0022872921b453d106341fcf93 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Aug 2018 10:21:45 +0200 Subject: [PATCH 0664/2608] of: add helper to lookup compatible child node [ Upstream commit 36156f9241cb0f9e37d998052873ca7501ad4b36 ] Add of_get_compatible_child() helper that can be used to lookup compatible child nodes. Several drivers currently use of_find_compatible_node() to lookup child nodes while failing to notice that the of_find_ functions search the entire tree depth-first (from a given start node) and therefore can match unrelated nodes. The fact that these functions also drop a reference to the node they start searching from (e.g. the parent node) is typically also overlooked, something which can lead to use-after-free bugs. Signed-off-by: Johan Hovold Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/base.c | 25 +++++++++++++++++++++++++ include/linux/of.h | 8 ++++++++ 2 files changed, 33 insertions(+) diff --git a/drivers/of/base.c b/drivers/of/base.c index 63897531cd75..ce8a6e0c9b6a 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -737,6 +737,31 @@ struct device_node *of_get_next_available_child(const struct device_node *node, } EXPORT_SYMBOL(of_get_next_available_child); +/** + * of_get_compatible_child - Find compatible child node + * @parent: parent node + * @compatible: compatible string + * + * Lookup child node whose compatible property contains the given compatible + * string. + * + * Returns a node pointer with refcount incremented, use of_node_put() on it + * when done; or NULL if not found. + */ +struct device_node *of_get_compatible_child(const struct device_node *parent, + const char *compatible) +{ + struct device_node *child; + + for_each_child_of_node(parent, child) { + if (of_device_is_compatible(child, compatible)) + break; + } + + return child; +} +EXPORT_SYMBOL(of_get_compatible_child); + /** * of_get_child_by_name - Find the child node by name for a given parent * @node: parent node diff --git a/include/linux/of.h b/include/linux/of.h index b240ed69dc96..70b7dacf9238 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -288,6 +288,8 @@ extern struct device_node *of_get_next_child(const struct device_node *node, extern struct device_node *of_get_next_available_child( const struct device_node *node, struct device_node *prev); +extern struct device_node *of_get_compatible_child(const struct device_node *parent, + const char *compatible); extern struct device_node *of_get_child_by_name(const struct device_node *node, const char *name); @@ -625,6 +627,12 @@ static inline bool of_have_populated_dt(void) return false; } +static inline struct device_node *of_get_compatible_child(const struct device_node *parent, + const char *compatible) +{ + return NULL; +} + static inline struct device_node *of_get_child_by_name( const struct device_node *node, const char *name) From 2ae0ac647a773fc3db57770a202b1c62a66fe3db Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Aug 2018 10:21:52 +0200 Subject: [PATCH 0665/2608] NFC: nfcmrvl_uart: fix OF child-node lookup [ Upstream commit 5bf59773aaf36dd62117dc83d50e1bbf9ef432da ] Use the new of_get_compatible_child() helper to lookup the nfc child node instead of using of_find_compatible_node(), which searches the entire tree from a given start node and thus can return an unrelated (i.e. non-child) node. This also addresses a potential use-after-free (e.g. after probe deferral) as the tree-wide helper drops a reference to its first argument (i.e. the parent node). Fixes: e097dc624f78 ("NFC: nfcmrvl: add UART driver") Fixes: d8e018c0b321 ("NFC: nfcmrvl: update device tree bindings for Marvell NFC") Cc: stable # 4.2 Cc: Vincent Cuissard Cc: Samuel Ortiz Signed-off-by: Johan Hovold Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/nfc/nfcmrvl/uart.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c index 91162f8e0366..9a22056e8d9e 100644 --- a/drivers/nfc/nfcmrvl/uart.c +++ b/drivers/nfc/nfcmrvl/uart.c @@ -73,10 +73,9 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node, struct device_node *matched_node; int ret; - matched_node = of_find_compatible_node(node, NULL, "marvell,nfc-uart"); + matched_node = of_get_compatible_child(node, "marvell,nfc-uart"); if (!matched_node) { - matched_node = of_find_compatible_node(node, NULL, - "mrvl,nfc-uart"); + matched_node = of_get_compatible_child(node, "mrvl,nfc-uart"); if (!matched_node) return -ENODEV; } From 6f95f0734968025278b6d8b4d0c589b7afeb9c96 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Aug 2018 10:21:50 +0200 Subject: [PATCH 0666/2608] net: bcmgenet: fix OF child-node lookup [ Upstream commit d397dbe606120a1ea1b11b0020c3f7a3852da5ac ] Use the new of_get_compatible_child() helper to lookup the mdio child node instead of using of_find_compatible_node(), which searches the entire tree from a given start node and thus can return an unrelated (i.e. non-child) node. This also addresses a potential use-after-free (e.g. after probe deferral) as the tree-wide helper drops a reference to its first argument (i.e. the node of the device being probed). Fixes: aa09677cba42 ("net: bcmgenet: add MDIO routines") Cc: stable # 3.15 Cc: David S. Miller Reviewed-by: Florian Fainelli Signed-off-by: Johan Hovold Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index abbd2894f870..c421e2753c8c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -360,7 +360,7 @@ static struct device_node *bcmgenet_mii_of_find_mdio(struct bcmgenet_priv *priv) if (!compat) return NULL; - priv->mdio_dn = of_find_compatible_node(dn, NULL, compat); + priv->mdio_dn = of_get_compatible_child(dn, compat); kfree(compat); if (!priv->mdio_dn) { dev_err(kdev, "unable to find MDIO bus node\n"); From 279eb9ce4cbc20ae82f73fd3b2d05184f8f40d53 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Aug 2018 10:21:46 +0200 Subject: [PATCH 0667/2608] drm/mediatek: fix OF sibling-node lookup [ Upstream commit ceff2f4dcd44abf35864d9a99f85ac619e89a01d ] Use the new of_get_compatible_child() helper to lookup the sibling instead of using of_find_compatible_node(), which searches the entire tree from a given start node and thus can return an unrelated (i.e. non-sibling) node. This also addresses a potential use-after-free (e.g. after probe deferral) as the tree-wide helper drops a reference to its first argument (i.e. the parent device node). While at it, also fix the related cec-node reference leak. Fixes: 8f83f26891e1 ("drm/mediatek: Add HDMI support") Cc: stable # 4.8 Cc: Junzhi Zhao Cc: Philipp Zabel Cc: CK Hu Cc: David Airlie Signed-off-by: Johan Hovold Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 690c67507cbc..aba27ea9cea5 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1446,8 +1446,7 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, } /* The CEC module handles HDMI hotplug detection */ - cec_np = of_find_compatible_node(np->parent, NULL, - "mediatek,mt8173-cec"); + cec_np = of_get_compatible_child(np->parent, "mediatek,mt8173-cec"); if (!cec_np) { dev_err(dev, "Failed to find CEC node\n"); return -EINVAL; @@ -1457,8 +1456,10 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, if (!cec_pdev) { dev_err(hdmi->dev, "Waiting for CEC device %pOF\n", cec_np); + of_node_put(cec_np); return -EPROBE_DEFER; } + of_node_put(cec_np); hdmi->cec_dev = &cec_pdev->dev; /* From 5f2b2c591fc8fd84c70fd591aa13d903e32c2df7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Aug 2018 10:21:53 +0200 Subject: [PATCH 0668/2608] power: supply: twl4030-charger: fix OF sibling-node lookup [ Upstream commit 9844fb2e351311210e6660a9a1c62d17424a6145 ] Use the new of_get_compatible_child() helper to lookup the usb sibling node instead of using of_find_compatible_node(), which searches the entire tree from a given start node and thus can return an unrelated (non-sibling) node. This also addresses a potential use-after-free (e.g. after probe deferral) as the tree-wide helper drops a reference to its first argument (i.e. the parent device node). While at it, also fix the related phy-node reference leak. Fixes: f5e4edb8c888 ("power: twl4030_charger: find associated phy by more reliable means.") Cc: stable # 4.2 Cc: NeilBrown Cc: Felipe Balbi Cc: Sebastian Reichel Reviewed-by: Sebastian Reichel Signed-off-by: Johan Hovold Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/power/supply/twl4030_charger.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c index a5915f498eea..0cc12bfe7b02 100644 --- a/drivers/power/supply/twl4030_charger.c +++ b/drivers/power/supply/twl4030_charger.c @@ -996,12 +996,13 @@ static int twl4030_bci_probe(struct platform_device *pdev) if (bci->dev->of_node) { struct device_node *phynode; - phynode = of_find_compatible_node(bci->dev->of_node->parent, - NULL, "ti,twl4030-usb"); + phynode = of_get_compatible_child(bci->dev->of_node->parent, + "ti,twl4030-usb"); if (phynode) { bci->usb_nb.notifier_call = twl4030_bci_usb_ncb; bci->transceiver = devm_usb_get_phy_by_node( bci->dev, phynode, &bci->usb_nb); + of_node_put(phynode); if (IS_ERR(bci->transceiver)) { ret = PTR_ERR(bci->transceiver); if (ret == -EPROBE_DEFER) From 38f94eca31d47db6429edb1ba6a560db1f34d298 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Tue, 27 Nov 2018 11:15:20 -0800 Subject: [PATCH 0669/2608] arm64: remove no-op -p linker flag (commit 1a381d4a0a9a0f999a13faaba22bf6b3fc80dcb9 upstream) Linking the ARM64 defconfig kernel with LLVM lld fails with the error: ld.lld: error: unknown argument: -p Makefile:1015: recipe for target 'vmlinux' failed Without this flag, the ARM64 defconfig kernel successfully links with lld and boots on Dragonboard 410c. After digging through binutils source and changelogs, it turns out that -p is only relevant to ancient binutils installations targeting 32-bit ARM. binutils accepts -p for AArch64 too, but it's always been undocumented and silently ignored. A comment in ld/emultempl/aarch64elf.em explains that it's "Only here for backwards compatibility". Since this flag is a no-op on ARM64, we can safely drop it. Acked-by: Will Deacon Reviewed-by: Nick Desaulniers Signed-off-by: Greg Hackmann Signed-off-by: Catalin Marinas Signed-off-by: Nick Desaulniers Signed-off-by: Sasha Levin --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7318165cfc90..48f2b3657507 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=-p --no-undefined -X +LDFLAGS_vmlinux :=--no-undefined -X CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) GZFLAGS :=-9 From cd4f18da5ecaf08fbbb61e74434468b63519ecf3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 May 2018 18:41:36 +0100 Subject: [PATCH 0670/2608] xhci: Allow more than 32 quirks commit 36b6857932f380fcb55c31ac75857e3e81dd583a upstream. We now have 32 different quirks, and the field that holds them is full. Let's bump it up to the next stage so that we can handle some more... The type is now an unsigned long long, which is 64bit on most architectures. We take this opportunity to change the quirks from using (1 << x) to BIT_ULL(x). Tested-by: Domenico Andreoli Signed-off-by: Marc Zyngier Tested-by: Faiz Abbas Tested-by: Domenico Andreoli Acked-by: Mathias Nyman Cc: "Cherian, George" Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 6 ++-- drivers/usb/host/xhci.h | 64 +++++++++++++++++++++-------------------- 2 files changed, 36 insertions(+), 34 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 64ddba3f79a9..faf048682194 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -43,8 +43,8 @@ static int link_quirk; module_param(link_quirk, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(link_quirk, "Don't clear the chain bit on a link TRB"); -static unsigned int quirks; -module_param(quirks, uint, S_IRUGO); +static unsigned long long quirks; +module_param(quirks, ullong, S_IRUGO); MODULE_PARM_DESC(quirks, "Bit flags for quirks to be enabled as default"); static bool td_on_ring(struct xhci_td *td, struct xhci_ring *ring) @@ -4956,7 +4956,7 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) return retval; xhci_dbg(xhci, "Called HCD init\n"); - xhci_info(xhci, "hcc params 0x%08x hci version 0x%x quirks 0x%08x\n", + xhci_info(xhci, "hcc params 0x%08x hci version 0x%x quirks 0x%016llx\n", xhci->hcc_params, xhci->hci_version, xhci->quirks); return 0; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 29dc6160c9eb..ef7f171d6e97 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1794,12 +1794,12 @@ struct xhci_hcd { #define XHCI_STATE_DYING (1 << 0) #define XHCI_STATE_HALTED (1 << 1) #define XHCI_STATE_REMOVING (1 << 2) - unsigned int quirks; -#define XHCI_LINK_TRB_QUIRK (1 << 0) -#define XHCI_RESET_EP_QUIRK (1 << 1) -#define XHCI_NEC_HOST (1 << 2) -#define XHCI_AMD_PLL_FIX (1 << 3) -#define XHCI_SPURIOUS_SUCCESS (1 << 4) + unsigned long long quirks; +#define XHCI_LINK_TRB_QUIRK BIT_ULL(0) +#define XHCI_RESET_EP_QUIRK BIT_ULL(1) +#define XHCI_NEC_HOST BIT_ULL(2) +#define XHCI_AMD_PLL_FIX BIT_ULL(3) +#define XHCI_SPURIOUS_SUCCESS BIT_ULL(4) /* * Certain Intel host controllers have a limit to the number of endpoint * contexts they can handle. Ideally, they would signal that they can't handle @@ -1809,33 +1809,35 @@ struct xhci_hcd { * commands, reset device commands, disable slot commands, and address device * commands. */ -#define XHCI_EP_LIMIT_QUIRK (1 << 5) -#define XHCI_BROKEN_MSI (1 << 6) -#define XHCI_RESET_ON_RESUME (1 << 7) -#define XHCI_SW_BW_CHECKING (1 << 8) -#define XHCI_AMD_0x96_HOST (1 << 9) -#define XHCI_TRUST_TX_LENGTH (1 << 10) -#define XHCI_LPM_SUPPORT (1 << 11) -#define XHCI_INTEL_HOST (1 << 12) -#define XHCI_SPURIOUS_REBOOT (1 << 13) -#define XHCI_COMP_MODE_QUIRK (1 << 14) -#define XHCI_AVOID_BEI (1 << 15) -#define XHCI_PLAT (1 << 16) -#define XHCI_SLOW_SUSPEND (1 << 17) -#define XHCI_SPURIOUS_WAKEUP (1 << 18) +#define XHCI_EP_LIMIT_QUIRK BIT_ULL(5) +#define XHCI_BROKEN_MSI BIT_ULL(6) +#define XHCI_RESET_ON_RESUME BIT_ULL(7) +#define XHCI_SW_BW_CHECKING BIT_ULL(8) +#define XHCI_AMD_0x96_HOST BIT_ULL(9) +#define XHCI_TRUST_TX_LENGTH BIT_ULL(10) +#define XHCI_LPM_SUPPORT BIT_ULL(11) +#define XHCI_INTEL_HOST BIT_ULL(12) +#define XHCI_SPURIOUS_REBOOT BIT_ULL(13) +#define XHCI_COMP_MODE_QUIRK BIT_ULL(14) +#define XHCI_AVOID_BEI BIT_ULL(15) +#define XHCI_PLAT BIT_ULL(16) +#define XHCI_SLOW_SUSPEND BIT_ULL(17) +#define XHCI_SPURIOUS_WAKEUP BIT_ULL(18) /* For controllers with a broken beyond repair streams implementation */ -#define XHCI_BROKEN_STREAMS (1 << 19) -#define XHCI_PME_STUCK_QUIRK (1 << 20) -#define XHCI_MTK_HOST (1 << 21) -#define XHCI_SSIC_PORT_UNUSED (1 << 22) -#define XHCI_NO_64BIT_SUPPORT (1 << 23) -#define XHCI_MISSING_CAS (1 << 24) +#define XHCI_BROKEN_STREAMS BIT_ULL(19) +#define XHCI_PME_STUCK_QUIRK BIT_ULL(20) +#define XHCI_MTK_HOST BIT_ULL(21) +#define XHCI_SSIC_PORT_UNUSED BIT_ULL(22) +#define XHCI_NO_64BIT_SUPPORT BIT_ULL(23) +#define XHCI_MISSING_CAS BIT_ULL(24) /* For controller with a broken Port Disable implementation */ -#define XHCI_BROKEN_PORT_PED (1 << 25) -#define XHCI_LIMIT_ENDPOINT_INTERVAL_7 (1 << 26) -#define XHCI_U2_DISABLE_WAKE (1 << 27) -#define XHCI_ASMEDIA_MODIFY_FLOWCONTROL (1 << 28) -#define XHCI_SUSPEND_DELAY (1 << 30) +#define XHCI_BROKEN_PORT_PED BIT_ULL(25) +#define XHCI_LIMIT_ENDPOINT_INTERVAL_7 BIT_ULL(26) +#define XHCI_U2_DISABLE_WAKE BIT_ULL(27) +#define XHCI_ASMEDIA_MODIFY_FLOWCONTROL BIT_ULL(28) +#define XHCI_HW_LPM_DISABLE BIT_ULL(29) +#define XHCI_SUSPEND_DELAY BIT_ULL(30) +#define XHCI_INTEL_USB_ROLE_SW BIT_ULL(31) unsigned int num_active_eps; unsigned int limit_active_eps; From 649594a678c2aa9bea0693a22753034c35ab42c6 Mon Sep 17 00:00:00 2001 From: "Cherian, George" Date: Fri, 9 Nov 2018 17:21:22 +0200 Subject: [PATCH 0671/2608] xhci: Add quirk to workaround the errata seen on Cavium Thunder-X2 Soc commit 11644a7659529730eaf2f166efaabe7c3dc7af8c upstream. Implement workaround for ThunderX2 Errata-129 (documented in CN99XX Known Issues" available at Cavium support site). As per ThunderX2errata-129, USB 2 device may come up as USB 1 if a connection to a USB 1 device is followed by another connection to a USB 2 device, the link will come up as USB 1 for the USB 2 device. Resolution: Reset the PHY after the USB 1 device is disconnected. The PHY reset sequence is done using private registers in XHCI register space. After the PHY is reset we check for the PLL lock status and retry the operation if it fails. From our tests, retrying 4 times is sufficient. Add a new quirk flag XHCI_RESET_PLL_ON_DISCONNECT to invoke the workaround in handle_xhci_port_status(). Cc: stable@vger.kernel.org Signed-off-by: George Cherian Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 5 +++++ drivers/usb/host/xhci-ring.c | 35 ++++++++++++++++++++++++++++++++++- drivers/usb/host/xhci.h | 1 + 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 9218f506f8e3..4b07b6859b4c 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -236,6 +236,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241) xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_7; + if ((pdev->vendor == PCI_VENDOR_ID_BROADCOM || + pdev->vendor == PCI_VENDOR_ID_CAVIUM) && + pdev->device == 0x9026) + xhci->quirks |= XHCI_RESET_PLL_ON_DISCONNECT; + if (xhci->quirks & XHCI_RESET_ON_RESUME) xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, "QUIRK: Resetting on resume"); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 2d64cb024d00..998f61d82689 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1568,6 +1568,35 @@ static void handle_device_notification(struct xhci_hcd *xhci, usb_wakeup_notification(udev->parent, udev->portnum); } +/* + * Quirk hanlder for errata seen on Cavium ThunderX2 processor XHCI + * Controller. + * As per ThunderX2errata-129 USB 2 device may come up as USB 1 + * If a connection to a USB 1 device is followed by another connection + * to a USB 2 device. + * + * Reset the PHY after the USB device is disconnected if device speed + * is less than HCD_USB3. + * Retry the reset sequence max of 4 times checking the PLL lock status. + * + */ +static void xhci_cavium_reset_phy_quirk(struct xhci_hcd *xhci) +{ + struct usb_hcd *hcd = xhci_to_hcd(xhci); + u32 pll_lock_check; + u32 retry_count = 4; + + do { + /* Assert PHY reset */ + writel(0x6F, hcd->regs + 0x1048); + udelay(10); + /* De-assert the PHY reset */ + writel(0x7F, hcd->regs + 0x1048); + udelay(200); + pll_lock_check = readl(hcd->regs + 0x1070); + } while (!(pll_lock_check & 0x1) && --retry_count); +} + static void handle_port_status(struct xhci_hcd *xhci, union xhci_trb *event) { @@ -1725,9 +1754,13 @@ static void handle_port_status(struct xhci_hcd *xhci, goto cleanup; } - if (hcd->speed < HCD_USB3) + if (hcd->speed < HCD_USB3) { xhci_test_and_clear_bit(xhci, port_array, faked_port_index, PORT_PLC); + if ((xhci->quirks & XHCI_RESET_PLL_ON_DISCONNECT) && + (portsc & PORT_CSC) && !(portsc & PORT_CONNECT)) + xhci_cavium_reset_phy_quirk(xhci); + } cleanup: /* Update event ring dequeue pointer before dropping the lock */ diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index ef7f171d6e97..74ba20556020 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1838,6 +1838,7 @@ struct xhci_hcd { #define XHCI_HW_LPM_DISABLE BIT_ULL(29) #define XHCI_SUSPEND_DELAY BIT_ULL(30) #define XHCI_INTEL_USB_ROLE_SW BIT_ULL(31) +#define XHCI_RESET_PLL_ON_DISCONNECT BIT_ULL(34) unsigned int num_active_eps; unsigned int limit_active_eps; From 0aa6b111cc44f9aef0be1715e2d8ac8c2e9023cb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Aug 2018 10:21:49 +0200 Subject: [PATCH 0672/2608] mtd: rawnand: atmel: fix OF child-node lookup commit 5d1e9c2212ea6b4dd735e4fc3dd6279a365d5d10 upstream. Use the new of_get_compatible_child() helper to lookup the nfc child node instead of using of_find_compatible_node(), which searches the entire tree from a given start node and thus can return an unrelated (i.e. non-child) node. This also addresses a potential use-after-free (e.g. after probe deferral) as the tree-wide helper drops a reference to its first argument (i.e. the node of the device being probed). While at it, also fix a related nfc-node reference leak. Fixes: f88fc122cc34 ("mtd: nand: Cleanup/rework the atmel_nand driver") Cc: stable # 4.11 Cc: Nicolas Ferre Cc: Josh Wu Cc: Boris Brezillon Signed-off-by: Johan Hovold Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/atmel/nand-controller.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c index 32a2f947a454..0b93f152d993 100644 --- a/drivers/mtd/nand/atmel/nand-controller.c +++ b/drivers/mtd/nand/atmel/nand-controller.c @@ -2077,8 +2077,7 @@ atmel_hsmc_nand_controller_legacy_init(struct atmel_hsmc_nand_controller *nc) int ret; nand_np = dev->of_node; - nfc_np = of_find_compatible_node(dev->of_node, NULL, - "atmel,sama5d3-nfc"); + nfc_np = of_get_compatible_child(dev->of_node, "atmel,sama5d3-nfc"); if (!nfc_np) { dev_err(dev, "Could not find device node for sama5d3-nfc\n"); return -ENODEV; @@ -2492,15 +2491,19 @@ static int atmel_nand_controller_probe(struct platform_device *pdev) } if (caps->legacy_of_bindings) { + struct device_node *nfc_node; u32 ale_offs = 21; /* * If we are parsing legacy DT props and the DT contains a * valid NFC node, forward the request to the sama5 logic. */ - if (of_find_compatible_node(pdev->dev.of_node, NULL, - "atmel,sama5d3-nfc")) + nfc_node = of_get_compatible_child(pdev->dev.of_node, + "atmel,sama5d3-nfc"); + if (nfc_node) { caps = &atmel_sama5_nand_caps; + of_node_put(nfc_node); + } /* * Even if the compatible says we are dealing with an From da141471e77ba27b9c50f858ff866b4958d9e04c Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 28 May 2018 22:04:33 +0200 Subject: [PATCH 0673/2608] ubi: fastmap: Check each mapping only once commit 34653fd8c46e771585fce5975e4243f8fd401914 upstream. Maintain a bitmap to keep track of which LEB->PEB mapping was checked already. That way we have to read back VID headers only once. Fixes: a23cf10d9abb ("ubi: fastmap: Correctly handle interrupted erasures in EBA") Signed-off-by: Richard Weinberger Signed-off-by: Martin Kepplinger --- drivers/mtd/ubi/build.c | 1 + drivers/mtd/ubi/eba.c | 4 ++++ drivers/mtd/ubi/fastmap.c | 20 ++++++++++++++++++++ drivers/mtd/ubi/ubi.h | 11 +++++++++++ drivers/mtd/ubi/vmt.c | 1 + drivers/mtd/ubi/vtbl.c | 16 +++++++++++++++- 6 files changed, 52 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 18a72da759a0..6445c693d935 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -526,6 +526,7 @@ void ubi_free_internal_volumes(struct ubi_device *ubi) for (i = ubi->vtbl_slots; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { ubi_eba_replace_table(ubi->volumes[i], NULL); + ubi_fastmap_destroy_checkmap(ubi->volumes[i]); kfree(ubi->volumes[i]); } } diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index d0884bd9d955..c4d4b8f07630 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -517,6 +517,9 @@ static int check_mapping(struct ubi_device *ubi, struct ubi_volume *vol, int lnu if (!ubi->fast_attach) return 0; + if (!vol->checkmap || test_bit(lnum, vol->checkmap)) + return 0; + vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); if (!vidb) return -ENOMEM; @@ -551,6 +554,7 @@ static int check_mapping(struct ubi_device *ubi, struct ubi_volume *vol, int lnu goto out_free; } + set_bit(lnum, vol->checkmap); err = 0; out_free: diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 5a832bc79b1b..63e8527f7b65 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -1101,6 +1101,26 @@ free_fm_sb: goto out; } +int ubi_fastmap_init_checkmap(struct ubi_volume *vol, int leb_count) +{ + struct ubi_device *ubi = vol->ubi; + + if (!ubi->fast_attach) + return 0; + + vol->checkmap = kcalloc(BITS_TO_LONGS(leb_count), sizeof(unsigned long), + GFP_KERNEL); + if (!vol->checkmap) + return -ENOMEM; + + return 0; +} + +void ubi_fastmap_destroy_checkmap(struct ubi_volume *vol) +{ + kfree(vol->checkmap); +} + /** * ubi_write_fastmap - writes a fastmap. * @ubi: UBI device object diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 5fe62653995e..f5ba97c46160 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -334,6 +334,9 @@ struct ubi_eba_leb_desc { * @changing_leb: %1 if the atomic LEB change ioctl command is in progress * @direct_writes: %1 if direct writes are enabled for this volume * + * @checkmap: bitmap to remember which PEB->LEB mappings got checked, + * protected by UBI LEB lock tree. + * * The @corrupted field indicates that the volume's contents is corrupted. * Since UBI protects only static volumes, this field is not relevant to * dynamic volumes - it is user's responsibility to assure their data @@ -377,6 +380,10 @@ struct ubi_volume { unsigned int updating:1; unsigned int changing_leb:1; unsigned int direct_writes:1; + +#ifdef CONFIG_MTD_UBI_FASTMAP + unsigned long *checkmap; +#endif }; /** @@ -965,8 +972,12 @@ size_t ubi_calc_fm_size(struct ubi_device *ubi); int ubi_update_fastmap(struct ubi_device *ubi); int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, struct ubi_attach_info *scan_ai); +int ubi_fastmap_init_checkmap(struct ubi_volume *vol, int leb_count); +void ubi_fastmap_destroy_checkmap(struct ubi_volume *vol); #else static inline int ubi_update_fastmap(struct ubi_device *ubi) { return 0; } +int static inline ubi_fastmap_init_checkmap(struct ubi_volume *vol, int leb_count) { return 0; } +static inline void ubi_fastmap_destroy_checkmap(struct ubi_volume *vol) {} #endif /* block.c */ diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 3fd8d7ff7a02..0be516780e92 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -139,6 +139,7 @@ static void vol_release(struct device *dev) struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); ubi_eba_replace_table(vol, NULL); + ubi_fastmap_destroy_checkmap(vol); kfree(vol); } diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index 263743e7b741..94d7a865b135 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -534,7 +534,7 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_attach_info *ai, const struct ubi_vtbl_record *vtbl) { - int i, reserved_pebs = 0; + int i, err, reserved_pebs = 0; struct ubi_ainf_volume *av; struct ubi_volume *vol; @@ -620,6 +620,16 @@ static int init_volumes(struct ubi_device *ubi, (long long)(vol->used_ebs - 1) * vol->usable_leb_size; vol->used_bytes += av->last_data_size; vol->last_eb_bytes = av->last_data_size; + + /* + * We use ubi->peb_count and not vol->reserved_pebs because + * we want to keep the code simple. Otherwise we'd have to + * resize/check the bitmap upon volume resize too. + * Allocating a few bytes more does not hurt. + */ + err = ubi_fastmap_init_checkmap(vol, ubi->peb_count); + if (err) + return err; } /* And add the layout volume */ @@ -645,6 +655,9 @@ static int init_volumes(struct ubi_device *ubi, reserved_pebs += vol->reserved_pebs; ubi->vol_count += 1; vol->ubi = ubi; + err = ubi_fastmap_init_checkmap(vol, UBI_LAYOUT_VOLUME_EBS); + if (err) + return err; if (reserved_pebs > ubi->avail_pebs) { ubi_err(ubi, "not enough PEBs, required %d, available %d", @@ -849,6 +862,7 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_attach_info *ai) out_free: vfree(ubi->vtbl); for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { + ubi_fastmap_destroy_checkmap(ubi->volumes[i]); kfree(ubi->volumes[i]); ubi->volumes[i] = NULL; } From f299ecb085117412b313094d075e573994c704a0 Mon Sep 17 00:00:00 2001 From: Francis Therien Date: Mon, 26 Mar 2018 15:59:00 -0700 Subject: [PATCH 0674/2608] Input: xpad - add PDP device id 0x02a4 [ Upstream commit c6c848572f4da0e34ffe0a35364b4db871e13e42 ] Adds support for a PDP Xbox One controller with device ID (0x06ef:0x02a4). The Product string for this device is "PDP Wired Controller for Xbox One - Stealth Series | Phantom Black". Signed-off-by: Francis Therien Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/joystick/xpad.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 53f775c41cd1..d2a1857fdae5 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -231,6 +231,7 @@ static const struct xpad_device { { 0x0e6f, 0x021f, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0246, "Rock Candy Gamepad for Xbox One 2015", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02ab, "PDP Controller for Xbox One", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02a4, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0346, "Rock Candy Gamepad for Xbox One 2016", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 }, @@ -480,7 +481,8 @@ static const u8 xboxone_hori_init[] = { /* * This packet is required for some of the PDP pads to start - * sending input reports. One of those pads is (0x0e6f:0x02ab). + * sending input reports. These pads include: (0x0e6f:0x02ab), + * (0x0e6f:0x02a4). */ static const u8 xboxone_pdp_init1[] = { 0x0a, 0x20, 0x00, 0x03, 0x00, 0x01, 0x14 @@ -488,7 +490,8 @@ static const u8 xboxone_pdp_init1[] = { /* * This packet is required for some of the PDP pads to start - * sending input reports. One of those pads is (0x0e6f:0x02ab). + * sending input reports. These pads include: (0x0e6f:0x02ab), + * (0x0e6f:0x02a4). */ static const u8 xboxone_pdp_init2[] = { 0x06, 0x20, 0x00, 0x02, 0x01, 0x00 @@ -526,6 +529,8 @@ static const struct xboxone_init_packet xboxone_init_packets[] = { XBOXONE_INIT_PKT(0x0000, 0x0000, xboxone_fw2015_init), XBOXONE_INIT_PKT(0x0e6f, 0x02ab, xboxone_pdp_init1), XBOXONE_INIT_PKT(0x0e6f, 0x02ab, xboxone_pdp_init2), + XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init1), + XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init2), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init), From 5d3bf6348692bb664da58a07510cd21c1439394e Mon Sep 17 00:00:00 2001 From: Leo Sperling Date: Wed, 3 Aug 2016 17:31:09 -0700 Subject: [PATCH 0675/2608] Input: xpad - fix some coding style issues [ Upstream commit 68c78d0155e37992268664e134996d2b140ddf38 ] Fix some coding style issues reported by checkpatch.pl. Mostly brackets in macros, spacing and comment style. Signed-off-by: Leo Sperling Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/joystick/xpad.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index d2a1857fdae5..7e812a8877bc 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -89,8 +89,10 @@ #define XPAD_PKT_LEN 64 -/* xbox d-pads should map to buttons, as is required for DDR pads - but we map them to axes when possible to simplify things */ +/* + * xbox d-pads should map to buttons, as is required for DDR pads + * but we map them to axes when possible to simplify things + */ #define MAP_DPAD_TO_BUTTONS (1 << 0) #define MAP_TRIGGERS_TO_BUTTONS (1 << 1) #define MAP_STICKS_TO_NULL (1 << 2) @@ -391,15 +393,15 @@ static const signed short xpad_abs_triggers[] = { * match against vendor id as well. Wired Xbox 360 devices have protocol 1, * wireless controllers have protocol 129. */ -#define XPAD_XBOX360_VENDOR_PROTOCOL(vend,pr) \ +#define XPAD_XBOX360_VENDOR_PROTOCOL(vend, pr) \ .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_INFO, \ .idVendor = (vend), \ .bInterfaceClass = USB_CLASS_VENDOR_SPEC, \ .bInterfaceSubClass = 93, \ .bInterfaceProtocol = (pr) #define XPAD_XBOX360_VENDOR(vend) \ - { XPAD_XBOX360_VENDOR_PROTOCOL(vend,1) }, \ - { XPAD_XBOX360_VENDOR_PROTOCOL(vend,129) } + { XPAD_XBOX360_VENDOR_PROTOCOL((vend), 1) }, \ + { XPAD_XBOX360_VENDOR_PROTOCOL((vend), 129) } /* The Xbox One controller uses subclass 71 and protocol 208. */ #define XPAD_XBOXONE_VENDOR_PROTOCOL(vend, pr) \ @@ -409,7 +411,7 @@ static const signed short xpad_abs_triggers[] = { .bInterfaceSubClass = 71, \ .bInterfaceProtocol = (pr) #define XPAD_XBOXONE_VENDOR(vend) \ - { XPAD_XBOXONE_VENDOR_PROTOCOL(vend, 208) } + { XPAD_XBOXONE_VENDOR_PROTOCOL((vend), 208) } static const struct usb_device_id xpad_table[] = { { USB_INTERFACE_INFO('X', 'B', 0) }, /* X-Box USB-IF not approved class */ @@ -1578,6 +1580,7 @@ static void xpad_close(struct input_dev *dev) static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs) { struct usb_xpad *xpad = input_get_drvdata(input_dev); + set_bit(abs, input_dev->absbit); switch (abs) { From 8d6a81009de5a184da9da76de27961d4abd62092 Mon Sep 17 00:00:00 2001 From: Marcus Folkesson Date: Tue, 8 May 2018 15:17:07 -0700 Subject: [PATCH 0676/2608] Input: xpad - avoid using __set_bit() for capabilities [ Upstream commit a01308031c2647ed5f1c845104b73a8820a958a9 ] input_set_capability() and input_set_abs_param() will do it for you. Signed-off-by: Marcus Folkesson Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/joystick/xpad.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 7e812a8877bc..69b44aebaf01 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -1581,8 +1581,6 @@ static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs) { struct usb_xpad *xpad = input_get_drvdata(input_dev); - set_bit(abs, input_dev->absbit); - switch (abs) { case ABS_X: case ABS_Y: @@ -1601,6 +1599,9 @@ static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs) case ABS_HAT0Y: /* the d-pad (only if dpad is mapped to axes */ input_set_abs_params(input_dev, abs, -1, 1, 0, 0); break; + default: + input_set_abs_params(input_dev, abs, 0, 0, 0, 0); + break; } } @@ -1641,10 +1642,7 @@ static int xpad_init_input(struct usb_xpad *xpad) input_dev->close = xpad_close; } - __set_bit(EV_KEY, input_dev->evbit); - if (!(xpad->mapping & MAP_STICKS_TO_NULL)) { - __set_bit(EV_ABS, input_dev->evbit); /* set up axes */ for (i = 0; xpad_abs[i] >= 0; i++) xpad_set_up_abs(input_dev, xpad_abs[i]); @@ -1652,21 +1650,22 @@ static int xpad_init_input(struct usb_xpad *xpad) /* set up standard buttons */ for (i = 0; xpad_common_btn[i] >= 0; i++) - __set_bit(xpad_common_btn[i], input_dev->keybit); + input_set_capability(input_dev, EV_KEY, xpad_common_btn[i]); /* set up model-specific ones */ if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX360W || xpad->xtype == XTYPE_XBOXONE) { for (i = 0; xpad360_btn[i] >= 0; i++) - __set_bit(xpad360_btn[i], input_dev->keybit); + input_set_capability(input_dev, EV_KEY, xpad360_btn[i]); } else { for (i = 0; xpad_btn[i] >= 0; i++) - __set_bit(xpad_btn[i], input_dev->keybit); + input_set_capability(input_dev, EV_KEY, xpad_btn[i]); } if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { for (i = 0; xpad_btn_pad[i] >= 0; i++) - __set_bit(xpad_btn_pad[i], input_dev->keybit); + input_set_capability(input_dev, EV_KEY, + xpad_btn_pad[i]); } /* @@ -1683,7 +1682,8 @@ static int xpad_init_input(struct usb_xpad *xpad) if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) { for (i = 0; xpad_btn_triggers[i] >= 0; i++) - __set_bit(xpad_btn_triggers[i], input_dev->keybit); + input_set_capability(input_dev, EV_KEY, + xpad_btn_triggers[i]); } else { for (i = 0; xpad_abs_triggers[i] >= 0; i++) xpad_set_up_abs(input_dev, xpad_abs_triggers[i]); From eb4b4647003688e3b3a4e20fef6ca33a029a1e58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ramses=20Ram=C3=ADrez?= Date: Fri, 28 Sep 2018 16:59:26 -0700 Subject: [PATCH 0677/2608] Input: xpad - add support for Xbox1 PDP Camo series gamepad MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9735082a7cbae572c2eabdc45acecc8c9fa0759b ] The "Xbox One PDP Wired Controller - Camo series" has a different product-id than the regular PDP controller and the PDP stealth series, but it uses the same initialization sequence. This patch adds the product-id of the camo series to the structures that handle the other PDP Xbox One controllers. Signed-off-by: Ramses Ramírez Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/joystick/xpad.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 69b44aebaf01..2e52015634f9 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -234,6 +234,7 @@ static const struct xpad_device { { 0x0e6f, 0x0246, "Rock Candy Gamepad for Xbox One 2015", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02ab, "PDP Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a4, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02a6, "PDP Wired Controller for Xbox One - Camo Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0346, "Rock Candy Gamepad for Xbox One 2016", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 }, @@ -533,6 +534,8 @@ static const struct xboxone_init_packet xboxone_init_packets[] = { XBOXONE_INIT_PKT(0x0e6f, 0x02ab, xboxone_pdp_init2), XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init1), XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init2), + XBOXONE_INIT_PKT(0x0e6f, 0x02a6, xboxone_pdp_init1), + XBOXONE_INIT_PKT(0x0e6f, 0x02a6, xboxone_pdp_init2), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init), From 1fe2316fc23aa6a23721bf97a4cdd8a88e8213f4 Mon Sep 17 00:00:00 2001 From: Matt Chen Date: Fri, 3 Aug 2018 14:29:20 +0800 Subject: [PATCH 0678/2608] iwlwifi: fix wrong WGDS_WIFI_DATA_SIZE commit 66e839030fd698586734e017fd55c4f2a89dba0b upstream. From coreboot/BIOS: Name ("WGDS", Package() { Revision, Package() { DomainType, // 0x7:WiFi ==> We miss this one. WgdsWiFiSarDeltaGroup1PowerMax1, // Group 1 FCC 2400 Max WgdsWiFiSarDeltaGroup1PowerChainA1, // Group 1 FCC 2400 A Offset WgdsWiFiSarDeltaGroup1PowerChainB1, // Group 1 FCC 2400 B Offset WgdsWiFiSarDeltaGroup1PowerMax2, // Group 1 FCC 5200 Max WgdsWiFiSarDeltaGroup1PowerChainA2, // Group 1 FCC 5200 A Offset WgdsWiFiSarDeltaGroup1PowerChainB2, // Group 1 FCC 5200 B Offset WgdsWiFiSarDeltaGroup2PowerMax1, // Group 2 EC Jap 2400 Max WgdsWiFiSarDeltaGroup2PowerChainA1, // Group 2 EC Jap 2400 A Offset WgdsWiFiSarDeltaGroup2PowerChainB1, // Group 2 EC Jap 2400 B Offset WgdsWiFiSarDeltaGroup2PowerMax2, // Group 2 EC Jap 5200 Max WgdsWiFiSarDeltaGroup2PowerChainA2, // Group 2 EC Jap 5200 A Offset WgdsWiFiSarDeltaGroup2PowerChainB2, // Group 2 EC Jap 5200 B Offset WgdsWiFiSarDeltaGroup3PowerMax1, // Group 3 ROW 2400 Max WgdsWiFiSarDeltaGroup3PowerChainA1, // Group 3 ROW 2400 A Offset WgdsWiFiSarDeltaGroup3PowerChainB1, // Group 3 ROW 2400 B Offset WgdsWiFiSarDeltaGroup3PowerMax2, // Group 3 ROW 5200 Max WgdsWiFiSarDeltaGroup3PowerChainA2, // Group 3 ROW 5200 A Offset WgdsWiFiSarDeltaGroup3PowerChainB2, // Group 3 ROW 5200 B Offset } }) When read the ACPI data to find out the WGDS, the DATA_SIZE is never matched. From the above format, it gives 19 numbers, but our driver is hardcode as 18. Fix it to pass then can parse the data into our wgds table. Then we will see: iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init Sending GEO_TX_POWER_LIMIT iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[0] Band[0]: chain A = 68 chain B = 69 max_tx_power = 54 iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[0] Band[1]: chain A = 48 chain B = 49 max_tx_power = 70 iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[1] Band[0]: chain A = 51 chain B = 67 max_tx_power = 50 iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[1] Band[1]: chain A = 69 chain B = 70 max_tx_power = 68 iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[2] Band[0]: chain A = 49 chain B = 50 max_tx_power = 48 iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[2] Band[1]: chain A = 52 chain B = 53 max_tx_power = 51 Cc: stable@vger.kernel.org # 4.12+ Fixes: a6bff3cb19b7 ("iwlwifi: mvm: add GEO_TX_POWER_LIMIT cmd for geographic tx power table") Signed-off-by: Matt Chen Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 11319675f2d9..cebf0ce76d27 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -590,7 +590,7 @@ static int iwl_mvm_config_ltr(struct iwl_mvm *mvm) #define ACPI_WRDS_WIFI_DATA_SIZE (IWL_MVM_SAR_TABLE_SIZE + 2) #define ACPI_EWRD_WIFI_DATA_SIZE ((IWL_MVM_SAR_PROFILE_NUM - 1) * \ IWL_MVM_SAR_TABLE_SIZE + 3) -#define ACPI_WGDS_WIFI_DATA_SIZE 18 +#define ACPI_WGDS_WIFI_DATA_SIZE 19 #define ACPI_WGDS_NUM_BANDS 2 #define ACPI_WGDS_TABLE_SIZE 3 @@ -964,7 +964,7 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm) IWL_DEBUG_RADIO(mvm, "Sending GEO_TX_POWER_LIMIT\n"); BUILD_BUG_ON(IWL_NUM_GEO_PROFILES * ACPI_WGDS_NUM_BANDS * - ACPI_WGDS_TABLE_SIZE != ACPI_WGDS_WIFI_DATA_SIZE); + ACPI_WGDS_TABLE_SIZE + 1 != ACPI_WGDS_WIFI_DATA_SIZE); for (i = 0; i < IWL_NUM_GEO_PROFILES; i++) { struct iwl_per_chain_offset *chain = From 9d844b0e66929e3c3099fe428fe06543bc18b99a Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 17 Sep 2018 19:31:57 -0700 Subject: [PATCH 0679/2608] kbuild: allow to use GCC toolchain not in Clang search path commit ef8c4ed9db80261f397f0c0bf723684601ae3b52 upstream. When using a GCC cross toolchain which is not in a compiled in Clang search path, Clang reverts to the system assembler and linker. This leads to assembler or linker errors, depending on which tool is first used for a given architecture. It seems that Clang is not searching $PATH for a matching assembler or linker. Make sure that Clang picks up the correct assembler or linker by passing the cross compilers bin directory as search path. This allows to use Clang provided by distributions with GCC toolchains not in /usr/bin. Link: https://github.com/ClangBuiltLinux/linux/issues/78 Signed-off-by: Stefan Agner Reviewed-and-tested-by: Nick Desaulniers Signed-off-by: Masahiro Yamada [nc: Adjust context] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 874d72a3e6a7..cb131e135c42 100644 --- a/Makefile +++ b/Makefile @@ -480,13 +480,15 @@ endif ifeq ($(cc-name),clang) ifneq ($(CROSS_COMPILE),) CLANG_TARGET := --target=$(notdir $(CROSS_COMPILE:%-=%)) -GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) +GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD))) +CLANG_PREFIX := --prefix=$(GCC_TOOLCHAIN_DIR) +GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) endif ifneq ($(GCC_TOOLCHAIN),) CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) endif -KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) -KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) +KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) $(CLANG_PREFIX) +KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) $(CLANG_PREFIX) KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) endif From 7aaa554d09606185650cd1aa422887736b9e1056 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 14 Dec 2017 14:01:45 +0100 Subject: [PATCH 0680/2608] PCI: endpoint: Populate func_no before calling pci_epc_add_epf() commit 0c47cd7a9b6c9c36c08113e594e9ad017fb17865 upstream. func_no is a member of struct pci_epf. Since struct pci_epf is used as an argument to pci_epc_add_epf() (to bind an endpoint function to a controller), struct pci_epf.func_no should be populated before calling pci_epc_add_epf(). Initialize the struct pci_epf.func_no member before calling pci_epc_add_epf(), to fix the endpoint function binding to an endpoint controller. Fixes: d74679911610 ("PCI: endpoint: Introduce configfs entry for configuring EP functions") Signed-off-by: Niklas Cassel [lorenzo.pieralisi@arm.com: rewrote the commit log] Signed-off-by: Lorenzo Pieralisi Suggested-by: Kishon Vijay Abraham I Acked-by: Kishon Vijay Abraham I Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/pci/endpoint/pci-ep-cfs.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/pci/endpoint/pci-ep-cfs.c b/drivers/pci/endpoint/pci-ep-cfs.c index 16cec66b1d0b..8fdb9d07c50a 100644 --- a/drivers/pci/endpoint/pci-ep-cfs.c +++ b/drivers/pci/endpoint/pci-ep-cfs.c @@ -97,16 +97,10 @@ static int pci_epc_epf_link(struct config_item *epc_item, { int ret; u32 func_no = 0; - struct pci_epc *epc; - struct pci_epf *epf; struct pci_epf_group *epf_group = to_pci_epf_group(epf_item); struct pci_epc_group *epc_group = to_pci_epc_group(epc_item); - - epc = epc_group->epc; - epf = epf_group->epf; - ret = pci_epc_add_epf(epc, epf); - if (ret) - goto err_add_epf; + struct pci_epc *epc = epc_group->epc; + struct pci_epf *epf = epf_group->epf; func_no = find_first_zero_bit(&epc_group->function_num_map, BITS_PER_LONG); @@ -116,6 +110,10 @@ static int pci_epc_epf_link(struct config_item *epc_item, set_bit(func_no, &epc_group->function_num_map); epf->func_no = func_no; + ret = pci_epc_add_epf(epc, epf); + if (ret) + goto err_add_epf; + ret = pci_epf_bind(epf); if (ret) goto err_epf_bind; From 9d9cd2bcfc5298109bd90c52f239011337147812 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 13 Dec 2017 18:12:10 +0200 Subject: [PATCH 0681/2608] net/mlx4_core: Fix wrong calculation of free counters commit 0bb9fc4f5429ac970181c073aa32e521e20f7b73 upstream. The field res_free indicates the total number of counters which are available for allocation (reserved and unreserved). Fixed a bug where the reserved counters were subtracted from res_free before any allocation was performed. Before this fix, free counters which were not reserved could not be allocated. Fixes: 9de92c60beaa ("net/mlx4_core: Adjust counter grant policy in the resource tracker") Signed-off-by: Eran Ben Elisha Reviewed-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index b26da0952a4d..a5381b091710 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -611,7 +611,6 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) MLX4_MAX_PORTS; else res_alloc->guaranteed[t] = 0; - res_alloc->res_free -= res_alloc->guaranteed[t]; break; default: break; From 31a673709a853cf28ca4a1bb88e15bf4c0bf099f Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 7 May 2018 12:52:17 -0500 Subject: [PATCH 0682/2608] i40iw: Fix memory leak in error path of create QP commit 5a7189d529cd146cd5838af97b32fcac4122b471 upstream. If i40iw_allocate_dma_mem fails when creating a QP, the memory allocated for the QP structure using kzalloc is not freed because iwqp->allocated_buffer is used to free the memory and it is not setup until later. Fix this by setting iwqp->allocated_buffer before allocating the dma memory. Fixes: d37498417947 ("i40iw: add files for iwarp interface") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 39398dd074d6..c1021b4afb41 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -631,6 +631,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return ERR_PTR(-ENOMEM); iwqp = (struct i40iw_qp *)mem; + iwqp->allocated_buffer = mem; qp = &iwqp->sc_qp; qp->back_qp = (void *)iwqp; qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; @@ -659,7 +660,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } - iwqp->allocated_buffer = mem; iwqp->iwdev = iwdev; iwqp->iwpd = iwpd; iwqp->ibqp.qp_num = qp_num; From 7eebc934f302922de7c7399db4752956c19f7147 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 13 Oct 2017 00:06:44 +0200 Subject: [PATCH 0683/2608] rtc: omap: fix error path when pinctrl_register fails commit 26e480f7bb7840fc0daa9c3af7c4501b2cf5902f upstream. If pinctrl_register() fails probe will return with an error without locking the RTC and disabling pm_runtime. Set ret and jump to err instead. Fixes: 97ea1906b3c2 ("rtc: omap: Support ext_wakeup configuration") Signed-off-by: Alexandre Belloni Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-omap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index ac6e6a6a194c..ae6506a8b4f5 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -823,7 +823,8 @@ static int omap_rtc_probe(struct platform_device *pdev) rtc->pctldev = pinctrl_register(&rtc_pinctrl_desc, &pdev->dev, rtc); if (IS_ERR(rtc->pctldev)) { dev_err(&pdev->dev, "Couldn't register pinctrl driver\n"); - return PTR_ERR(rtc->pctldev); + ret = PTR_ERR(rtc->pctldev); + goto err; } if (rtc->is_pmic_controller) { From dc32281ec0281ffe2ea57d3157eaae6eb74aef8a Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 13 Mar 2018 11:46:12 +0100 Subject: [PATCH 0684/2608] clk: samsung: exynos5250: Add missing clocks for FIMC LITE SYSMMU devices commit 5b23fceec1ff94305c5d1accde018cae27448005 upstream. FIMC LITE SYSMMU devices are defined in exynos5250.dtsi, but clocks for them are not instantiated by Exynos5250 clock provider driver. Add needed definitions for those clocks to fix IOMMU probe failure: ERROR: could not get clock /soc/sysmmu@13c40000:sysmmu(0) exynos-sysmmu 13c40000.sysmmu: Failed to get device clock(s)! exynos-sysmmu: probe of 13c40000.sysmmu failed with error -38 ERROR: could not get clock /soc/sysmmu@13c50000:sysmmu(0) exynos-sysmmu 13c50000.sysmmu: Failed to get device clock(s)! exynos-sysmmu: probe of 13c50000.sysmmu failed with error -38 Signed-off-by: Marek Szyprowski Fixes: bfed1074f213 ("clk: exynos5250: Add missing sysmmu clocks for DISP and ISP blocks") Acked-by: Chanwoo Choi Signed-off-by: Sylwester Nawrocki Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/clk/samsung/clk-exynos5250.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/clk/samsung/clk-exynos5250.c b/drivers/clk/samsung/clk-exynos5250.c index 6a0cb8a515e8..b609219c802b 100644 --- a/drivers/clk/samsung/clk-exynos5250.c +++ b/drivers/clk/samsung/clk-exynos5250.c @@ -560,6 +560,8 @@ static const struct samsung_gate_clock exynos5250_gate_clks[] __initconst = { 0), GATE(CLK_GSCL3, "gscl3", "mout_aclk266_gscl_sub", GATE_IP_GSCL, 3, 0, 0), + GATE(CLK_CAMIF_TOP, "camif_top", "mout_aclk266_gscl_sub", + GATE_IP_GSCL, 4, 0, 0), GATE(CLK_GSCL_WA, "gscl_wa", "div_gscl_wa", GATE_IP_GSCL, 5, 0, 0), GATE(CLK_GSCL_WB, "gscl_wb", "div_gscl_wb", GATE_IP_GSCL, 6, 0, 0), GATE(CLK_SMMU_GSCL0, "smmu_gscl0", "mout_aclk266_gscl_sub", @@ -570,6 +572,10 @@ static const struct samsung_gate_clock exynos5250_gate_clks[] __initconst = { GATE_IP_GSCL, 9, 0, 0), GATE(CLK_SMMU_GSCL3, "smmu_gscl3", "mout_aclk266_gscl_sub", GATE_IP_GSCL, 10, 0, 0), + GATE(CLK_SMMU_FIMC_LITE0, "smmu_fimc_lite0", "mout_aclk266_gscl_sub", + GATE_IP_GSCL, 11, 0, 0), + GATE(CLK_SMMU_FIMC_LITE1, "smmu_fimc_lite1", "mout_aclk266_gscl_sub", + GATE_IP_GSCL, 12, 0, 0), GATE(CLK_FIMD1, "fimd1", "mout_aclk200_disp1_sub", GATE_IP_DISP1, 0, 0, 0), From a46ca4c8f1c64723ce0a9f66638a1343a71bbe3f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 21 Apr 2018 20:26:41 +0200 Subject: [PATCH 0685/2608] ARM: dts: exynos: Fix invalid node referenced by i2c20 alias in Peach Pit and Pi commit 70c3250ac1374688d7963e562fe58b23f70bcba9 upstream. After moving all nodes under "soc" node in commit 5d99cc59a3c6 ("ARM: dts: exynos: Move Exynos5250 and Exynos5420 nodes under soc"), the i2c20 alias in Peach Pit and Peach Pi stopped pointing to proper node: arch/arm/boot/dts/exynos5420-peach-pit.dtb: Warning (alias_paths): /aliases:i2c20: aliases property is not a valid node (/spi@12d40000/cros-ec@0/i2c-tunnel) arch/arm/boot/dts/exynos5800-peach-pi.dtb: Warning (alias_paths): /aliases:i2c20: aliases property is not a valid node (/spi@12d40000/cros-ec@0/i2c-tunnel) Fixes: 5d99cc59a3c6 ("ARM: dts: exynos: Move Exynos5250 and Exynos5420 nodes under soc") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos5420-peach-pit.dts | 4 ++-- arch/arm/boot/dts/exynos5800-peach-pi.dts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts index 683a4cfb4a23..c91eff8475a8 100644 --- a/arch/arm/boot/dts/exynos5420-peach-pit.dts +++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts @@ -31,7 +31,7 @@ aliases { /* Assign 20 so we don't get confused w/ builtin ones */ - i2c20 = "/spi@12d40000/cros-ec@0/i2c-tunnel"; + i2c20 = &i2c_tunnel; }; backlight: backlight { @@ -952,7 +952,7 @@ samsung,spi-feedback-delay = <1>; }; - i2c-tunnel { + i2c_tunnel: i2c-tunnel { compatible = "google,cros-ec-i2c-tunnel"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts index b2b95ff205e8..daad5d425cf5 100644 --- a/arch/arm/boot/dts/exynos5800-peach-pi.dts +++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts @@ -29,7 +29,7 @@ aliases { /* Assign 20 so we don't get confused w/ builtin ones */ - i2c20 = "/spi@12d40000/cros-ec@0/i2c-tunnel"; + i2c20 = &i2c_tunnel; }; backlight: backlight { @@ -921,7 +921,7 @@ samsung,spi-feedback-delay = <1>; }; - i2c-tunnel { + i2c_tunnel: i2c-tunnel { compatible = "google,cros-ec-i2c-tunnel"; #address-cells = <1>; #size-cells = <0>; From c8692d9f5ceff240792df5b42ca1639ace77204b Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Fri, 20 Oct 2017 20:01:01 +0800 Subject: [PATCH 0686/2608] driver core: Move device_links_purge() after bus_remove_device() commit 2ec16150179888b81717d1d3ce84e634f4736af2 upstream. The current ordering of code in device_del() triggers a WARN_ON() in device_links_purge(), because of an unexpected link status. The device_links_unbind_consumers() call in device_release_driver() has to take place before device_links_purge() for the status of all links to be correct, so move the device_links_purge() call in device_del() after the invocation of bus_remove_device() which calls device_release_driver(). Fixes: 9ed9895370ae (driver core: Functional dependencies tracking support) Signed-off-by: Jeffy Chen Reviewed-by: Rafael J. Wysocki Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index eb066cc827ef..3f463a61f8cf 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1973,7 +1973,6 @@ void device_del(struct device *dev) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DEL_DEVICE, dev); - device_links_purge(dev); dpm_sysfs_remove(dev); if (parent) klist_del(&dev->p->knode_parent); @@ -2001,6 +2000,7 @@ void device_del(struct device *dev) device_pm_remove(dev); driver_deferred_probe_del(dev); device_remove_properties(dev); + device_links_purge(dev); /* Notify the platform of the removal, in case they * need to do anything... From a956132ac00bd5a4b2909707f00f8d5366931648 Mon Sep 17 00:00:00 2001 From: Sebastien Boisvert Date: Fri, 26 Oct 2018 15:02:23 -0700 Subject: [PATCH 0687/2608] include/linux/pfn_t.h: force '~' to be parsed as an unary operator commit 4d54954a197175c0dcb3c82af0c0740d0c5f827a upstream. Tracing the event "fs_dax:dax_pmd_insert_mapping" with perf produces this warning: [fs_dax:dax_pmd_insert_mapping] unknown op '~' It is printed in process_op (tools/lib/traceevent/event-parse.c) because '~' is parsed as a binary operator. perf reads the format of fs_dax:dax_pmd_insert_mapping ("print fmt") from /sys/kernel/debug/tracing/events/fs_dax/dax_pmd_insert_mapping/format . The format contains: ~(((u64) ~(~(((1UL) << 12)-1))) ^ \ interpreted as a binary operator by process_op(). This part is generated in the declaration of the event class dax_pmd_insert_mapping_class in include/trace/events/fs_dax.h : __print_flags_u64(__entry->pfn_val & PFN_FLAGS_MASK, "|", PFN_FLAGS_TRACE), This patch adds a pair of parentheses in the declaration of PFN_FLAGS_MASK to make sure that '~' is parsed as a unary operator by perf. The part of the format that was problematic is now: ~(((u64) (~(~(((1UL) << 12)-1)))) Now, all the '~' are parsed as unary operators. Link: http://lkml.kernel.org/r/20181021145939.8760-1-sebhtml@videotron.qc.ca Signed-off-by: Sebastien Boisvert Acked-by: Dan Williams Cc: "Steven Rostedt (VMware)" Cc: Arnaldo Carvalho de Melo Cc: "Tzvetomir Stoyanov (VMware)" Cc: Namhyung Kim Cc: Ross Zwisler Cc: Elenie Godzaridis Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/pfn_t.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 43b1d7648e82..a8aef18c6244 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -10,7 +10,7 @@ * PFN_DEV - pfn is not covered by system memmap by default * PFN_MAP - pfn has a dynamic page mapping established by a device driver */ -#define PFN_FLAGS_MASK (((u64) ~PAGE_MASK) << (BITS_PER_LONG_LONG - PAGE_SHIFT)) +#define PFN_FLAGS_MASK (((u64) (~PAGE_MASK)) << (BITS_PER_LONG_LONG - PAGE_SHIFT)) #define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1)) #define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2)) #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3)) From 5e063b64163be65e2f04588808760e790a889e3c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 4 Oct 2018 11:06:13 -0700 Subject: [PATCH 0688/2608] tty: wipe buffer. commit c9a8e5fce009e3c601a43c49ea9dbcb25d1ffac5 upstream. After we are done with the tty buffer, zero it out. Reported-by: aszlig Tested-by: Milan Broz Tested-by: Daniel Zatovic Tested-by: aszlig Cc: Willy Tarreau Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_buffer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 677fa99b7747..217114227f8d 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -467,11 +467,15 @@ receive_buf(struct tty_port *port, struct tty_buffer *head, int count) { unsigned char *p = char_buf_ptr(head, head->read); char *f = NULL; + int n; if (~head->flags & TTYB_NORMAL) f = flag_buf_ptr(head, head->read); - return port->client_ops->receive_buf(port, p, f, count); + n = port->client_ops->receive_buf(port, p, f, count); + if (n > 0) + memset(p, 0, n); + return n; } /** From b008f3b25588868a89e9e4562b4dc641d1032553 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 4 Oct 2018 11:06:14 -0700 Subject: [PATCH 0689/2608] tty: wipe buffer if not echoing data commit b97b3d9fb57860a60592859e332de7759fd54c2e upstream. If we are not echoing the data to userspace or the console is in icanon mode, then perhaps it is a "secret" so we should wipe it once we are done with it. This mirrors the logic that the audit code has. Reported-by: aszlig Tested-by: Milan Broz Tested-by: Daniel Zatovic Tested-by: aszlig Cc: Willy Tarreau Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 0475f9685a41..904fc9c37fde 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -154,17 +154,28 @@ static inline unsigned char *echo_buf_addr(struct n_tty_data *ldata, size_t i) return &ldata->echo_buf[i & (N_TTY_BUF_SIZE - 1)]; } +/* If we are not echoing the data, perhaps this is a secret so erase it */ +static void zero_buffer(struct tty_struct *tty, u8 *buffer, int size) +{ + bool icanon = !!L_ICANON(tty); + bool no_echo = !L_ECHO(tty); + + if (icanon && no_echo) + memset(buffer, 0x00, size); +} + static int tty_copy_to_user(struct tty_struct *tty, void __user *to, size_t tail, size_t n) { struct n_tty_data *ldata = tty->disc_data; size_t size = N_TTY_BUF_SIZE - tail; - const void *from = read_buf_addr(ldata, tail); + void *from = read_buf_addr(ldata, tail); int uncopied; if (n > size) { tty_audit_add_data(tty, from, size); uncopied = copy_to_user(to, from, size); + zero_buffer(tty, from, size - uncopied); if (uncopied) return uncopied; to += size; @@ -173,7 +184,9 @@ static int tty_copy_to_user(struct tty_struct *tty, void __user *to, } tty_audit_add_data(tty, from, n); - return copy_to_user(to, from, n); + uncopied = copy_to_user(to, from, n); + zero_buffer(tty, from, n - uncopied); + return uncopied; } /** @@ -1962,11 +1975,12 @@ static int copy_from_read_buf(struct tty_struct *tty, n = min(head - ldata->read_tail, N_TTY_BUF_SIZE - tail); n = min(*nr, n); if (n) { - const unsigned char *from = read_buf_addr(ldata, tail); + unsigned char *from = read_buf_addr(ldata, tail); retval = copy_to_user(*b, from, n); n -= retval; is_eof = n == 1 && *from == EOF_CHAR(tty); tty_audit_add_data(tty, from, n); + zero_buffer(tty, from, n); smp_store_release(&ldata->read_tail, ldata->read_tail + n); /* Turn single EOF into zero-length read */ if (L_EXTPROC(tty) && ldata->icanon && is_eof && From 2ecab0857ab03cd74bfeeb8946323cf737b12280 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Fri, 9 Nov 2018 17:21:20 +0200 Subject: [PATCH 0690/2608] usb: xhci: fix uninitialized completion when USB3 port got wrong status commit 958c0bd86075d4ef1c936998deefe1947e539240 upstream. Realtek USB3.0 Card Reader [0bda:0328] reports wrong port status on Cannon lake PCH USB3.1 xHCI [8086:a36d] after resume from S3, after clear port reset it works fine. Since this device is registered on USB3 roothub at boot, when port status reports not superspeed, xhci_get_port_status will call an uninitialized completion in bus_state[0]. Kernel will hang because of NULL pointer. Restrict the USB2 resume status check in USB2 roothub to fix hang issue. Cc: stable@vger.kernel.org Signed-off-by: Aaron Ma Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 2 +- drivers/usb/host/xhci-ring.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index a2e350d28c05..6b2f6c41e2a9 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -895,7 +895,7 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, status |= USB_PORT_STAT_SUSPEND; } if ((raw_port_status & PORT_PLS_MASK) == XDEV_RESUME && - !DEV_SUPERSPEED_ANY(raw_port_status)) { + !DEV_SUPERSPEED_ANY(raw_port_status) && hcd->speed < HCD_USB3) { if ((raw_port_status & PORT_RESET) || !(raw_port_status & PORT_PE)) return 0xffffffff; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 998f61d82689..aa230706b875 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1746,7 +1746,7 @@ static void handle_port_status(struct xhci_hcd *xhci, * RExit to a disconnect state). If so, let the the driver know it's * out of the RExit state. */ - if (!DEV_SUPERSPEED_ANY(portsc) && + if (!DEV_SUPERSPEED_ANY(portsc) && hcd->speed < HCD_USB3 && test_and_clear_bit(faked_port_index, &bus_state->rexit_ports)) { complete(&bus_state->rexit_done[faked_port_index]); From 7bcfd8f985f2c7bf7be6a08333dfaf31ed58ccd4 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Thu, 23 Aug 2018 17:00:35 -0700 Subject: [PATCH 0691/2608] namei: allow restricted O_CREAT of FIFOs and regular files commit 30aba6656f61ed44cba445a3c0d38b296fa9e8f5 upstream. Disallows open of FIFOs or regular files not owned by the user in world writable sticky directories, unless the owner is the same as that of the directory or the file is opened without the O_CREAT flag. The purpose is to make data spoofing attacks harder. This protection can be turned on and off separately for FIFOs and regular files via sysctl, just like the symlinks/hardlinks protection. This patch is based on Openwall's "HARDEN_FIFO" feature by Solar Designer. This is a brief list of old vulnerabilities that could have been prevented by this feature, some of them even allow for privilege escalation: CVE-2000-1134 CVE-2007-3852 CVE-2008-0525 CVE-2009-0416 CVE-2011-4834 CVE-2015-1838 CVE-2015-7442 CVE-2016-7489 This list is not meant to be complete. It's difficult to track down all vulnerabilities of this kind because they were often reported without any mention of this particular attack vector. In fact, before hardlinks/symlinks restrictions, fifos/regular files weren't the favorite vehicle to exploit them. [s.mesoraca16@gmail.com: fix bug reported by Dan Carpenter] Link: https://lkml.kernel.org/r/20180426081456.GA7060@mwanda Link: http://lkml.kernel.org/r/1524829819-11275-1-git-send-email-s.mesoraca16@gmail.com [keescook@chromium.org: drop pr_warn_ratelimited() in favor of audit changes in the future] [keescook@chromium.org: adjust commit subjet] Link: http://lkml.kernel.org/r/20180416175918.GA13494@beast Signed-off-by: Salvatore Mesoraca Signed-off-by: Kees Cook Suggested-by: Solar Designer Suggested-by: Kees Cook Cc: Al Viro Cc: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Loic Signed-off-by: Greg Kroah-Hartman --- Documentation/sysctl/fs.txt | 36 +++++++++++++++++++++++++ fs/namei.c | 53 ++++++++++++++++++++++++++++++++++--- include/linux/fs.h | 2 ++ kernel/sysctl.c | 18 +++++++++++++ 4 files changed, 106 insertions(+), 3 deletions(-) diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 35e17f748ca7..af5859b2d0f9 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -34,7 +34,9 @@ Currently, these files are in /proc/sys/fs: - overflowgid - pipe-user-pages-hard - pipe-user-pages-soft +- protected_fifos - protected_hardlinks +- protected_regular - protected_symlinks - suid_dumpable - super-max @@ -182,6 +184,24 @@ applied. ============================================================== +protected_fifos: + +The intent of this protection is to avoid unintentional writes to +an attacker-controlled FIFO, where a program expected to create a regular +file. + +When set to "0", writing to FIFOs is unrestricted. + +When set to "1" don't allow O_CREAT open on FIFOs that we don't own +in world writable sticky directories, unless they are owned by the +owner of the directory. + +When set to "2" it also applies to group writable sticky directories. + +This protection is based on the restrictions in Openwall. + +============================================================== + protected_hardlinks: A long-standing class of security issues is the hardlink-based @@ -202,6 +222,22 @@ This protection is based on the restrictions in Openwall and grsecurity. ============================================================== +protected_regular: + +This protection is similar to protected_fifos, but it +avoids writes to an attacker-controlled regular file, where a program +expected to create one. + +When set to "0", writing to regular files is unrestricted. + +When set to "1" don't allow O_CREAT open on regular files that we +don't own in world writable sticky directories, unless they are +owned by the owner of the directory. + +When set to "2" it also applies to group writable sticky directories. + +============================================================== + protected_symlinks: A long-standing class of security issues is the symlink-based diff --git a/fs/namei.c b/fs/namei.c index 0b46b858cd42..d1e467b7b9de 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -904,6 +904,8 @@ static inline void put_link(struct nameidata *nd) int sysctl_protected_symlinks __read_mostly = 0; int sysctl_protected_hardlinks __read_mostly = 0; +int sysctl_protected_fifos __read_mostly; +int sysctl_protected_regular __read_mostly; /** * may_follow_link - Check symlink following for unsafe situations @@ -1017,6 +1019,45 @@ static int may_linkat(struct path *link) return -EPERM; } +/** + * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory + * should be allowed, or not, on files that already + * exist. + * @dir: the sticky parent directory + * @inode: the inode of the file to open + * + * Block an O_CREAT open of a FIFO (or a regular file) when: + * - sysctl_protected_fifos (or sysctl_protected_regular) is enabled + * - the file already exists + * - we are in a sticky directory + * - we don't own the file + * - the owner of the directory doesn't own the file + * - the directory is world writable + * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2 + * the directory doesn't have to be world writable: being group writable will + * be enough. + * + * Returns 0 if the open is allowed, -ve on error. + */ +static int may_create_in_sticky(struct dentry * const dir, + struct inode * const inode) +{ + if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) || + (!sysctl_protected_regular && S_ISREG(inode->i_mode)) || + likely(!(dir->d_inode->i_mode & S_ISVTX)) || + uid_eq(inode->i_uid, dir->d_inode->i_uid) || + uid_eq(current_fsuid(), inode->i_uid)) + return 0; + + if (likely(dir->d_inode->i_mode & 0002) || + (dir->d_inode->i_mode & 0020 && + ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) || + (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) { + return -EACCES; + } + return 0; +} + static __always_inline const char *get_link(struct nameidata *nd) { @@ -3355,9 +3396,15 @@ finish_open: if (error) return error; audit_inode(nd->name, nd->path.dentry, 0); - error = -EISDIR; - if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) - goto out; + if (open_flag & O_CREAT) { + error = -EISDIR; + if (d_is_dir(nd->path.dentry)) + goto out; + error = may_create_in_sticky(dir, + d_backing_inode(nd->path.dentry)); + if (unlikely(error)) + goto out; + } error = -ENOTDIR; if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry)) goto out; diff --git a/include/linux/fs.h b/include/linux/fs.h index 7374639f0aa0..f6a577edec67 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -72,6 +72,8 @@ extern struct inodes_stat_t inodes_stat; extern int leases_enable, lease_break_time; extern int sysctl_protected_symlinks; extern int sysctl_protected_hardlinks; +extern int sysctl_protected_fifos; +extern int sysctl_protected_regular; typedef __kernel_rwf_t rwf_t; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 069550540a39..d330b1ce3b94 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1793,6 +1793,24 @@ static struct ctl_table fs_table[] = { .extra1 = &zero, .extra2 = &one, }, + { + .procname = "protected_fifos", + .data = &sysctl_protected_fifos, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &two, + }, + { + .procname = "protected_regular", + .data = &sysctl_protected_regular, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &two, + }, { .procname = "suid_dumpable", .data = &suid_dumpable, From a4977f3e8a3a668878543828ceaf40d69b5bfc32 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Thu, 19 Apr 2018 17:59:38 +0100 Subject: [PATCH 0692/2608] lan78xx: Read MAC address from DT if present commit 760db29bdc97b73ff60b091315ad787b1deb5cf5 upstream. There is a standard mechanism for locating and using a MAC address from the Device Tree. Use this facility in the lan78xx driver to support applications without programmed EEPROM or OTP. At the same time, regularise the handling of the different address sources. Signed-off-by: Phil Elwell Signed-off-by: David S. Miller Tested-by: Paolo Pisati Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/lan78xx.c | 42 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 50e2e10a9050..e069b310d6a6 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "lan78xx.h" #define DRIVER_AUTHOR "WOOJUNG HUH " @@ -1645,34 +1646,31 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev) addr[5] = (addr_hi >> 8) & 0xFF; if (!is_valid_ether_addr(addr)) { - /* reading mac address from EEPROM or OTP */ - if ((lan78xx_read_eeprom(dev, EEPROM_MAC_OFFSET, ETH_ALEN, - addr) == 0) || - (lan78xx_read_otp(dev, EEPROM_MAC_OFFSET, ETH_ALEN, - addr) == 0)) { - if (is_valid_ether_addr(addr)) { - /* eeprom values are valid so use them */ - netif_dbg(dev, ifup, dev->net, - "MAC address read from EEPROM"); - } else { - /* generate random MAC */ - random_ether_addr(addr); - netif_dbg(dev, ifup, dev->net, - "MAC address set to random addr"); - } - - addr_lo = addr[0] | (addr[1] << 8) | - (addr[2] << 16) | (addr[3] << 24); - addr_hi = addr[4] | (addr[5] << 8); - - ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); - ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); + if (!eth_platform_get_mac_address(&dev->udev->dev, addr)) { + /* valid address present in Device Tree */ + netif_dbg(dev, ifup, dev->net, + "MAC address read from Device Tree"); + } else if (((lan78xx_read_eeprom(dev, EEPROM_MAC_OFFSET, + ETH_ALEN, addr) == 0) || + (lan78xx_read_otp(dev, EEPROM_MAC_OFFSET, + ETH_ALEN, addr) == 0)) && + is_valid_ether_addr(addr)) { + /* eeprom values are valid so use them */ + netif_dbg(dev, ifup, dev->net, + "MAC address read from EEPROM"); } else { /* generate random MAC */ random_ether_addr(addr); netif_dbg(dev, ifup, dev->net, "MAC address set to random addr"); } + + addr_lo = addr[0] | (addr[1] << 8) | + (addr[2] << 16) | (addr[3] << 24); + addr_hi = addr[4] | (addr[5] << 8); + + ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); + ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); } ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); From aef9f7db7e8cf0cd5357e5b449669af1e7cdccad Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Thu, 16 Aug 2018 09:02:31 +0100 Subject: [PATCH 0693/2608] s390/mm: Check for valid vma before zapping in gmap_discard commit 1843abd03250115af6cec0892683e70cf2297c25 upstream. Userspace could have munmapped the area before doing unmapping from the gmap. This would leave us with a valid vmaddr, but an invalid vma from which we would try to zap memory. Let's check before using the vma. Fixes: 1e133ab296f3 ("s390/mm: split arch/s390/mm/pgtable.c") Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand Reported-by: Dan Carpenter Message-Id: <20180816082432.78828-1-frankja@linux.ibm.com> Signed-off-by: Janosch Frank Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/gmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 2f66290c9b92..ec9292917d3f 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -689,6 +689,8 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) vmaddr |= gaddr & ~PMD_MASK; /* Find vma in the parent mm */ vma = find_vma(gmap->mm, vmaddr); + if (!vma) + continue; size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); zap_page_range(vma, vmaddr, size); } From 077506972ba23772b752e08b1ab7052cf5f04511 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Jul 2018 13:47:30 -0700 Subject: [PATCH 0694/2608] rcu: Make need_resched() respond to urgent RCU-QS needs commit 92aa39e9dc77481b90cbef25e547d66cab901496 upstream. The per-CPU rcu_dynticks.rcu_urgent_qs variable communicates an urgent need for an RCU quiescent state from the force-quiescent-state processing within the grace-period kthread to context switches and to cond_resched(). Unfortunately, such urgent needs are not communicated to need_resched(), which is sometimes used to decide when to invoke cond_resched(), for but one example, within the KVM vcpu_run() function. As of v4.15, this can result in synchronize_sched() being delayed by up to ten seconds, which can be problematic, to say nothing of annoying. This commit therefore checks rcu_dynticks.rcu_urgent_qs from within rcu_check_callbacks(), which is invoked from the scheduling-clock interrupt handler. If the current task is not an idle task and is not executing in usermode, a context switch is forced, and either way, the rcu_dynticks.rcu_urgent_qs variable is set to false. If the current task is an idle task, then RCU's dyntick-idle code will detect the quiescent state, so no further action is required. Similarly, if the task is executing in usermode, other code in rcu_check_callbacks() and its called functions will report the corresponding quiescent state. Reported-by: Marius Hillenbrand Reported-by: David Woodhouse Suggested-by: Peter Zijlstra Signed-off-by: Paul E. McKenney [ paulmck: Backported to make patch apply cleanly on older versions. ] Tested-by: Marius Hillenbrand Cc: # 4.12.x - 4.19.x Signed-off-by: Greg Kroah-Hartman --- kernel/rcu/tree.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 3e3650e94ae6..710ce1d6b982 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2772,6 +2772,15 @@ void rcu_check_callbacks(int user) rcu_bh_qs(); } rcu_preempt_check_callbacks(); + /* The load-acquire pairs with the store-release setting to true. */ + if (smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) { + /* Idle and userspace execution already are quiescent states. */ + if (!rcu_is_cpu_rrupt_from_idle() && !user) { + set_tsk_need_resched(current); + set_preempt_need_resched(); + } + __this_cpu_write(rcu_dynticks.rcu_urgent_qs, false); + } if (rcu_pending()) invoke_rcu_core(); if (user) From 57ccd1fc8ebbd656594354ab0213754fcda03bde Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 20 Apr 2018 14:54:13 -0400 Subject: [PATCH 0695/2608] net: ieee802154: 6lowpan: fix frag reassembly commit f18fa5de5ba7f1d6650951502bb96a6e4715a948 upstream. This patch initialize stack variables which are used in frag_lowpan_compare_key to zero. In my case there are padding bytes in the structures ieee802154_addr as well in frag_lowpan_compare_key. Otherwise the key variable contains random bytes. The result is that a compare of two keys by memcmp works incorrect. Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units") Signed-off-by: Alexander Aring Reported-by: Stefan Schmidt Signed-off-by: Stefan Schmidt Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/6lowpan/6lowpan_i.h | 4 ++-- net/ieee802154/6lowpan/reassembly.c | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h index b8d95cb71c25..44a7e16bf3b5 100644 --- a/net/ieee802154/6lowpan/6lowpan_i.h +++ b/net/ieee802154/6lowpan/6lowpan_i.h @@ -20,8 +20,8 @@ typedef unsigned __bitwise lowpan_rx_result; struct frag_lowpan_compare_key { u16 tag; u16 d_size; - const struct ieee802154_addr src; - const struct ieee802154_addr dst; + struct ieee802154_addr src; + struct ieee802154_addr dst; }; /* Equivalent of ipv4 struct ipq diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 1790b65944b3..2cc224106b69 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -75,14 +75,14 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb, { struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); - struct frag_lowpan_compare_key key = { - .tag = cb->d_tag, - .d_size = cb->d_size, - .src = *src, - .dst = *dst, - }; + struct frag_lowpan_compare_key key = {}; struct inet_frag_queue *q; + key.tag = cb->d_tag; + key.d_size = cb->d_size; + key.src = *src; + key.dst = *dst; + q = inet_frag_find(&ieee802154_lowpan->frags, &key); if (!q) return NULL; @@ -372,7 +372,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type) struct lowpan_frag_queue *fq; struct net *net = dev_net(skb->dev); struct lowpan_802154_cb *cb = lowpan_802154_cb(skb); - struct ieee802154_hdr hdr; + struct ieee802154_hdr hdr = {}; int err; if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) From de72a0f9ad28895b57b9403ae29aa5a6a579f31d Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Sat, 17 Jun 2017 23:56:23 -0400 Subject: [PATCH 0696/2608] ima: always measure and audit files in policy commit f3cc6b25dcc5616f0d5c720009b2ac66f97df2ff upstream. All files matching a "measure" rule must be included in the IMA measurement list, even when the file hash cannot be calculated. Similarly, all files matching an "audit" rule must be audited, even when the file hash can not be calculated. The file data hash field contained in the IMA measurement list template data will contain 0's instead of the actual file hash digest. Note: In general, adding, deleting or in anyway changing which files are included in the IMA measurement list is not a good idea, as it might result in not being able to unseal trusted keys sealed to a specific TPM PCR value. This patch not only adds file measurements that were not previously measured, but specifies that the file hash value for these files will be 0's. As the IMA measurement list ordering is not consistent from one boot to the next, it is unlikely that anyone is sealing keys based on the IMA measurement list. Remote attestation servers should be able to process these new measurement records, but might complain about these unknown records. Signed-off-by: Mimi Zohar Reviewed-by: Dmitry Kasatkin Cc: Aditya Kali Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima_api.c | 65 ++++++++++++++++++----------- security/integrity/ima/ima_crypto.c | 10 +++++ security/integrity/ima/ima_main.c | 9 ++-- 3 files changed, 55 insertions(+), 29 deletions(-) diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index c2edba8de35e..c7e8db0ea4c0 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -199,42 +199,59 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, struct inode *inode = file_inode(file); const char *filename = file->f_path.dentry->d_name.name; int result = 0; + int length; + void *tmpbuf; + u64 i_version; struct { struct ima_digest_data hdr; char digest[IMA_MAX_DIGEST_SIZE]; } hash; - if (!(iint->flags & IMA_COLLECTED)) { - u64 i_version = file_inode(file)->i_version; + if (iint->flags & IMA_COLLECTED) + goto out; - if (file->f_flags & O_DIRECT) { - audit_cause = "failed(directio)"; - result = -EACCES; - goto out; - } + /* + * Dectecting file change is based on i_version. On filesystems + * which do not support i_version, support is limited to an initial + * measurement/appraisal/audit. + */ + i_version = file_inode(file)->i_version; + hash.hdr.algo = algo; - hash.hdr.algo = algo; + /* Initialize hash digest to 0's in case of failure */ + memset(&hash.digest, 0, sizeof(hash.digest)); - result = (!buf) ? ima_calc_file_hash(file, &hash.hdr) : - ima_calc_buffer_hash(buf, size, &hash.hdr); - if (!result) { - int length = sizeof(hash.hdr) + hash.hdr.length; - void *tmpbuf = krealloc(iint->ima_hash, length, - GFP_NOFS); - if (tmpbuf) { - iint->ima_hash = tmpbuf; - memcpy(iint->ima_hash, &hash, length); - iint->version = i_version; - iint->flags |= IMA_COLLECTED; - } else - result = -ENOMEM; - } + if (buf) + result = ima_calc_buffer_hash(buf, size, &hash.hdr); + else + result = ima_calc_file_hash(file, &hash.hdr); + + if (result && result != -EBADF && result != -EINVAL) + goto out; + + length = sizeof(hash.hdr) + hash.hdr.length; + tmpbuf = krealloc(iint->ima_hash, length, GFP_NOFS); + if (!tmpbuf) { + result = -ENOMEM; + goto out; } + + iint->ima_hash = tmpbuf; + memcpy(iint->ima_hash, &hash, length); + iint->version = i_version; + + /* Possibly temporary failure due to type of read (eg. O_DIRECT) */ + if (!result) + iint->flags |= IMA_COLLECTED; out: - if (result) + if (result) { + if (file->f_flags & O_DIRECT) + audit_cause = "failed(directio)"; + integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, filename, "collect_data", audit_cause, result, 0); + } return result; } @@ -278,7 +295,7 @@ void ima_store_measurement(struct integrity_iint_cache *iint, } result = ima_store_template(entry, violation, inode, filename, pcr); - if (!result || result == -EEXIST) { + if ((!result || result == -EEXIST) && !(file->f_flags & O_DIRECT)) { iint->flags |= IMA_MEASURED; iint->measured_pcrs |= (0x1 << pcr); } diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index 90453aa1c813..cb041af9eddb 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -443,6 +443,16 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash) loff_t i_size; int rc; + /* + * For consistency, fail file's opened with the O_DIRECT flag on + * filesystems mounted with/without DAX option. + */ + if (file->f_flags & O_DIRECT) { + hash->length = hash_digest_size[ima_hash_algo]; + hash->algo = ima_hash_algo; + return -EINVAL; + } + i_size = i_size_read(file_inode(file)); if (ima_ahash_minsize && i_size >= ima_ahash_minsize) { diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index f8553179bdd7..06c4cf125f2a 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -242,11 +242,8 @@ static int process_measurement(struct file *file, char *buf, loff_t size, hash_algo = ima_get_hash_algo(xattr_value, xattr_len); rc = ima_collect_measurement(iint, file, buf, size, hash_algo); - if (rc != 0) { - if (file->f_flags & O_DIRECT) - rc = (iint->flags & IMA_PERMIT_DIRECTIO) ? 0 : -EACCES; + if (rc != 0 && rc != -EBADF && rc != -EINVAL) goto out_digsig; - } if (!pathbuf) /* ima_rdwr_violation possibly pre-fetched */ pathname = ima_d_path(&file->f_path, &pathbuf, filename); @@ -254,12 +251,14 @@ static int process_measurement(struct file *file, char *buf, loff_t size, if (action & IMA_MEASURE) ima_store_measurement(iint, file, pathname, xattr_value, xattr_len, pcr); - if (action & IMA_APPRAISE_SUBMASK) + if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) rc = ima_appraise_measurement(func, iint, file, pathname, xattr_value, xattr_len, opened); if (action & IMA_AUDIT) ima_audit_measurement(iint, pathname); + if ((file->f_flags & O_DIRECT) && (iint->flags & IMA_PERMIT_DIRECTIO)) + rc = 0; out_digsig: if ((mask & MAY_WRITE) && (iint->flags & IMA_DIGSIG) && !(iint->flags & IMA_NEW_FILE)) From e099863340d86741c0957217971c29becf24b881 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 7 Nov 2017 07:17:42 -0800 Subject: [PATCH 0697/2608] EVM: Add support for portable signature format commit 50b977481fce90aa5fbda55e330b9d722733e358 upstream. The EVM signature includes the inode number and (optionally) the filesystem UUID, making it impractical to ship EVM signatures in packages. This patch adds a new portable format intended to allow distributions to include EVM signatures. It is identical to the existing format but hardcodes the inode and generation numbers to 0 and does not include the filesystem UUID even if the kernel is configured to do so. Removing the inode means that the metadata and signature from one file could be copied to another file without invalidating it. This is avoided by ensuring that an IMA xattr is present during EVM validation. Portable signatures are intended to be immutable - ie, they will never be transformed into HMACs. Based on earlier work by Dmitry Kasatkin and Mikhail Kurinnoi. Signed-off-by: Matthew Garrett Cc: Dmitry Kasatkin Cc: Mikhail Kurinnoi Signed-off-by: Mimi Zohar Cc: Aditya Kali Signed-off-by: Greg Kroah-Hartman --- include/linux/integrity.h | 1 + security/integrity/evm/evm.h | 2 +- security/integrity/evm/evm_crypto.c | 75 +++++++++++++++++++++++---- security/integrity/evm/evm_main.c | 29 +++++++---- security/integrity/ima/ima_appraise.c | 4 +- security/integrity/integrity.h | 2 + 6 files changed, 92 insertions(+), 21 deletions(-) diff --git a/include/linux/integrity.h b/include/linux/integrity.h index c2d6082a1a4c..858d3f4a2241 100644 --- a/include/linux/integrity.h +++ b/include/linux/integrity.h @@ -14,6 +14,7 @@ enum integrity_status { INTEGRITY_PASS = 0, + INTEGRITY_PASS_IMMUTABLE, INTEGRITY_FAIL, INTEGRITY_NOLABEL, INTEGRITY_NOXATTRS, diff --git a/security/integrity/evm/evm.h b/security/integrity/evm/evm.h index f5f12727771a..2ff02459fcfd 100644 --- a/security/integrity/evm/evm.h +++ b/security/integrity/evm/evm.h @@ -48,7 +48,7 @@ int evm_calc_hmac(struct dentry *dentry, const char *req_xattr_name, size_t req_xattr_value_len, char *digest); int evm_calc_hash(struct dentry *dentry, const char *req_xattr_name, const char *req_xattr_value, - size_t req_xattr_value_len, char *digest); + size_t req_xattr_value_len, char type, char *digest); int evm_init_hmac(struct inode *inode, const struct xattr *xattr, char *hmac_val); int evm_init_secfs(void); diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index ee9c3de5065a..f1f030ae363b 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -139,7 +139,7 @@ out: * protection.) */ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode, - char *digest) + char type, char *digest) { struct h_misc { unsigned long ino; @@ -150,8 +150,13 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode, } hmac_misc; memset(&hmac_misc, 0, sizeof(hmac_misc)); - hmac_misc.ino = inode->i_ino; - hmac_misc.generation = inode->i_generation; + /* Don't include the inode or generation number in portable + * signatures + */ + if (type != EVM_XATTR_PORTABLE_DIGSIG) { + hmac_misc.ino = inode->i_ino; + hmac_misc.generation = inode->i_generation; + } /* The hmac uid and gid must be encoded in the initial user * namespace (not the filesystems user namespace) as encoding * them in the filesystems user namespace allows an attack @@ -164,7 +169,8 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode, hmac_misc.gid = from_kgid(&init_user_ns, inode->i_gid); hmac_misc.mode = inode->i_mode; crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc)); - if (evm_hmac_attrs & EVM_ATTR_FSUUID) + if ((evm_hmac_attrs & EVM_ATTR_FSUUID) && + type != EVM_XATTR_PORTABLE_DIGSIG) crypto_shash_update(desc, &inode->i_sb->s_uuid.b[0], sizeof(inode->i_sb->s_uuid)); crypto_shash_final(desc, digest); @@ -190,6 +196,7 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry, char *xattr_value = NULL; int error; int size; + bool ima_present = false; if (!(inode->i_opflags & IOP_XATTR)) return -EOPNOTSUPP; @@ -200,11 +207,18 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry, error = -ENODATA; for (xattrname = evm_config_xattrnames; *xattrname != NULL; xattrname++) { + bool is_ima = false; + + if (strcmp(*xattrname, XATTR_NAME_IMA) == 0) + is_ima = true; + if ((req_xattr_name && req_xattr_value) && !strcmp(*xattrname, req_xattr_name)) { error = 0; crypto_shash_update(desc, (const u8 *)req_xattr_value, req_xattr_value_len); + if (is_ima) + ima_present = true; continue; } size = vfs_getxattr_alloc(dentry, *xattrname, @@ -219,9 +233,14 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry, error = 0; xattr_size = size; crypto_shash_update(desc, (const u8 *)xattr_value, xattr_size); + if (is_ima) + ima_present = true; } - hmac_add_misc(desc, inode, digest); + hmac_add_misc(desc, inode, type, digest); + /* Portable EVM signatures must include an IMA hash */ + if (type == EVM_XATTR_PORTABLE_DIGSIG && !ima_present) + return -EPERM; out: kfree(xattr_value); kfree(desc); @@ -233,17 +252,45 @@ int evm_calc_hmac(struct dentry *dentry, const char *req_xattr_name, char *digest) { return evm_calc_hmac_or_hash(dentry, req_xattr_name, req_xattr_value, - req_xattr_value_len, EVM_XATTR_HMAC, digest); + req_xattr_value_len, EVM_XATTR_HMAC, digest); } int evm_calc_hash(struct dentry *dentry, const char *req_xattr_name, const char *req_xattr_value, size_t req_xattr_value_len, - char *digest) + char type, char *digest) { return evm_calc_hmac_or_hash(dentry, req_xattr_name, req_xattr_value, - req_xattr_value_len, IMA_XATTR_DIGEST, digest); + req_xattr_value_len, type, digest); } +static int evm_is_immutable(struct dentry *dentry, struct inode *inode) +{ + const struct evm_ima_xattr_data *xattr_data = NULL; + struct integrity_iint_cache *iint; + int rc = 0; + + iint = integrity_iint_find(inode); + if (iint && (iint->flags & EVM_IMMUTABLE_DIGSIG)) + return 1; + + /* Do this the hard way */ + rc = vfs_getxattr_alloc(dentry, XATTR_NAME_EVM, (char **)&xattr_data, 0, + GFP_NOFS); + if (rc <= 0) { + if (rc == -ENODATA) + return 0; + return rc; + } + if (xattr_data->type == EVM_XATTR_PORTABLE_DIGSIG) + rc = 1; + else + rc = 0; + + kfree(xattr_data); + return rc; +} + + /* * Calculate the hmac and update security.evm xattr * @@ -256,6 +303,16 @@ int evm_update_evmxattr(struct dentry *dentry, const char *xattr_name, struct evm_ima_xattr_data xattr_data; int rc = 0; + /* + * Don't permit any transformation of the EVM xattr if the signature + * is of an immutable type + */ + rc = evm_is_immutable(dentry, inode); + if (rc < 0) + return rc; + if (rc) + return -EPERM; + rc = evm_calc_hmac(dentry, xattr_name, xattr_value, xattr_value_len, xattr_data.digest); if (rc == 0) { @@ -281,7 +338,7 @@ int evm_init_hmac(struct inode *inode, const struct xattr *lsm_xattr, } crypto_shash_update(desc, lsm_xattr->value, lsm_xattr->value_len); - hmac_add_misc(desc, inode, hmac_val); + hmac_add_misc(desc, inode, EVM_XATTR_HMAC, hmac_val); kfree(desc); return 0; } diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 063d38aef64e..1d1a7053144b 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -31,7 +31,7 @@ int evm_initialized; static char *integrity_status_msg[] = { - "pass", "fail", "no_label", "no_xattrs", "unknown" + "pass", "pass_immutable", "fail", "no_label", "no_xattrs", "unknown" }; char *evm_hmac = "hmac(sha1)"; char *evm_hash = "sha1"; @@ -120,7 +120,8 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, enum integrity_status evm_status = INTEGRITY_PASS; int rc, xattr_len; - if (iint && iint->evm_status == INTEGRITY_PASS) + if (iint && (iint->evm_status == INTEGRITY_PASS || + iint->evm_status == INTEGRITY_PASS_IMMUTABLE)) return iint->evm_status; /* if status is not PASS, try to check again - against -ENOMEM */ @@ -161,22 +162,26 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, rc = -EINVAL; break; case EVM_IMA_XATTR_DIGSIG: + case EVM_XATTR_PORTABLE_DIGSIG: rc = evm_calc_hash(dentry, xattr_name, xattr_value, - xattr_value_len, calc.digest); + xattr_value_len, xattr_data->type, + calc.digest); if (rc) break; rc = integrity_digsig_verify(INTEGRITY_KEYRING_EVM, (const char *)xattr_data, xattr_len, calc.digest, sizeof(calc.digest)); if (!rc) { - /* Replace RSA with HMAC if not mounted readonly and - * not immutable - */ - if (!IS_RDONLY(d_backing_inode(dentry)) && - !IS_IMMUTABLE(d_backing_inode(dentry))) + if (xattr_data->type == EVM_XATTR_PORTABLE_DIGSIG) { + if (iint) + iint->flags |= EVM_IMMUTABLE_DIGSIG; + evm_status = INTEGRITY_PASS_IMMUTABLE; + } else if (!IS_RDONLY(d_backing_inode(dentry)) && + !IS_IMMUTABLE(d_backing_inode(dentry))) { evm_update_evmxattr(dentry, xattr_name, xattr_value, xattr_value_len); + } } break; default: @@ -277,7 +282,7 @@ static enum integrity_status evm_verify_current_integrity(struct dentry *dentry) * affect security.evm. An interesting side affect of writing posix xattr * acls is their modifying of the i_mode, which is included in security.evm. * For posix xattr acls only, permit security.evm, even if it currently - * doesn't exist, to be updated. + * doesn't exist, to be updated unless the EVM signature is immutable. */ static int evm_protect_xattr(struct dentry *dentry, const char *xattr_name, const void *xattr_value, size_t xattr_value_len) @@ -345,7 +350,8 @@ int evm_inode_setxattr(struct dentry *dentry, const char *xattr_name, if (strcmp(xattr_name, XATTR_NAME_EVM) == 0) { if (!xattr_value_len) return -EINVAL; - if (xattr_data->type != EVM_IMA_XATTR_DIGSIG) + if (xattr_data->type != EVM_IMA_XATTR_DIGSIG && + xattr_data->type != EVM_XATTR_PORTABLE_DIGSIG) return -EPERM; } return evm_protect_xattr(dentry, xattr_name, xattr_value, @@ -422,6 +428,9 @@ void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name) /** * evm_inode_setattr - prevent updating an invalid EVM extended attribute * @dentry: pointer to the affected dentry + * + * Permit update of file attributes when files have a valid EVM signature, + * except in the case of them having an immutable portable signature. */ int evm_inode_setattr(struct dentry *dentry, struct iattr *attr) { diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 348db9b78681..e91d28cc05cc 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -230,7 +230,9 @@ int ima_appraise_measurement(enum ima_hooks func, } status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, rc, iint); - if ((status != INTEGRITY_PASS) && (status != INTEGRITY_UNKNOWN)) { + if ((status != INTEGRITY_PASS) && + (status != INTEGRITY_PASS_IMMUTABLE) && + (status != INTEGRITY_UNKNOWN)) { if ((status == INTEGRITY_NOLABEL) || (status == INTEGRITY_NOXATTRS)) cause = "missing-HMAC"; diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index a53e7e4ab06c..cbc7de33fac7 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -33,6 +33,7 @@ #define IMA_DIGSIG_REQUIRED 0x02000000 #define IMA_PERMIT_DIRECTIO 0x04000000 #define IMA_NEW_FILE 0x08000000 +#define EVM_IMMUTABLE_DIGSIG 0x10000000 #define IMA_DO_MASK (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \ IMA_APPRAISE_SUBMASK) @@ -58,6 +59,7 @@ enum evm_ima_xattr_type { EVM_XATTR_HMAC, EVM_IMA_XATTR_DIGSIG, IMA_XATTR_DIGEST_NG, + EVM_XATTR_PORTABLE_DIGSIG, IMA_XATTR_LAST }; From 281c07f30f71b4982cb535b633e5286bf0c7d056 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Tue, 5 Dec 2017 21:06:34 +0200 Subject: [PATCH 0698/2608] ima: re-introduce own integrity cache lock commit 0d73a55208e94fc9fb6deaeea61438cd3280d4c0 upstream. Before IMA appraisal was introduced, IMA was using own integrity cache lock along with i_mutex. process_measurement and ima_file_free took the iint->mutex first and then the i_mutex, while setxattr, chmod and chown took the locks in reverse order. To resolve the potential deadlock, i_mutex was moved to protect entire IMA functionality and the redundant iint->mutex was eliminated. Solution was based on the assumption that filesystem code does not take i_mutex further. But when file is opened with O_DIRECT flag, direct-io implementation takes i_mutex and produces deadlock. Furthermore, certain other filesystem operations, such as llseek, also take i_mutex. More recently some filesystems have replaced their filesystem specific lock with the global i_rwsem to read a file. As a result, when IMA attempts to calculate the file hash, reading the file attempts to take the i_rwsem again. To resolve O_DIRECT related deadlock problem, this patch re-introduces iint->mutex. But to eliminate the original chmod() related deadlock problem, this patch eliminates the requirement for chmod hooks to take the iint->mutex by introducing additional atomic iint->attr_flags to indicate calling of the hooks. The allowed locking order is to take the iint->mutex first and then the i_rwsem. Original flags were cleared in chmod(), setxattr() or removwxattr() hooks and tested when file was closed or opened again. New atomic flags are set or cleared in those hooks and tested to clear iint->flags on close or on open. Atomic flags are following: * IMA_CHANGE_ATTR - indicates that chATTR() was called (chmod, chown, chgrp) and file attributes have changed. On file open, it causes IMA to clear iint->flags to re-evaluate policy and perform IMA functions again. * IMA_CHANGE_XATTR - indicates that setxattr or removexattr was called and extended attributes have changed. On file open, it causes IMA to clear iint->flags IMA_DONE_MASK to re-appraise. * IMA_UPDATE_XATTR - indicates that security.ima needs to be updated. It is cleared if file policy changes and no update is needed. * IMA_DIGSIG - indicates that file security.ima has signature and file security.ima must not update to file has on file close. * IMA_MUST_MEASURE - indicates the file is in the measurement policy. Fixes: Commit 6552321831dc ("xfs: remove i_iolock and use i_rwsem in the VFS inode instead") Signed-off-by: Dmitry Kasatkin Signed-off-by: Mimi Zohar Cc: Aditya Kali Signed-off-by: Greg Kroah-Hartman --- security/integrity/iint.c | 2 + security/integrity/ima/ima_appraise.c | 29 +++++------ security/integrity/ima/ima_main.c | 70 ++++++++++++++++++--------- security/integrity/integrity.h | 18 +++++-- 4 files changed, 78 insertions(+), 41 deletions(-) diff --git a/security/integrity/iint.c b/security/integrity/iint.c index 6fc888ca468e..2db461c38795 100644 --- a/security/integrity/iint.c +++ b/security/integrity/iint.c @@ -155,12 +155,14 @@ static void init_once(void *foo) memset(iint, 0, sizeof(*iint)); iint->version = 0; iint->flags = 0UL; + iint->atomic_flags = 0; iint->ima_file_status = INTEGRITY_UNKNOWN; iint->ima_mmap_status = INTEGRITY_UNKNOWN; iint->ima_bprm_status = INTEGRITY_UNKNOWN; iint->ima_read_status = INTEGRITY_UNKNOWN; iint->evm_status = INTEGRITY_UNKNOWN; iint->measured_pcrs = 0; + mutex_init(&iint->mutex); } static int __init integrity_iintcache_init(void) diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index e91d28cc05cc..84eb6cc956bf 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -251,6 +251,7 @@ int ima_appraise_measurement(enum ima_hooks func, status = INTEGRITY_FAIL; break; } + clear_bit(IMA_DIGSIG, &iint->atomic_flags); if (xattr_len - sizeof(xattr_value->type) - hash_start >= iint->ima_hash->length) /* xattr length may be longer. md5 hash in previous @@ -269,7 +270,7 @@ int ima_appraise_measurement(enum ima_hooks func, status = INTEGRITY_PASS; break; case EVM_IMA_XATTR_DIGSIG: - iint->flags |= IMA_DIGSIG; + set_bit(IMA_DIGSIG, &iint->atomic_flags); rc = integrity_digsig_verify(INTEGRITY_KEYRING_IMA, (const char *)xattr_value, rc, iint->ima_hash->digest, @@ -320,7 +321,7 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file) int rc = 0; /* do not collect and update hash for digital signatures */ - if (iint->flags & IMA_DIGSIG) + if (test_bit(IMA_DIGSIG, &iint->atomic_flags)) return; if (iint->ima_file_status != INTEGRITY_PASS) @@ -330,7 +331,9 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file) if (rc < 0) return; + inode_lock(file_inode(file)); ima_fix_xattr(dentry, iint); + inode_unlock(file_inode(file)); } /** @@ -353,16 +356,14 @@ void ima_inode_post_setattr(struct dentry *dentry) return; must_appraise = ima_must_appraise(inode, MAY_ACCESS, POST_SETATTR); - iint = integrity_iint_find(inode); - if (iint) { - iint->flags &= ~(IMA_APPRAISE | IMA_APPRAISED | - IMA_APPRAISE_SUBMASK | IMA_APPRAISED_SUBMASK | - IMA_ACTION_RULE_FLAGS); - if (must_appraise) - iint->flags |= IMA_APPRAISE; - } if (!must_appraise) __vfs_removexattr(dentry, XATTR_NAME_IMA); + iint = integrity_iint_find(inode); + if (iint) { + set_bit(IMA_CHANGE_ATTR, &iint->atomic_flags); + if (!must_appraise) + clear_bit(IMA_UPDATE_XATTR, &iint->atomic_flags); + } } /* @@ -391,12 +392,12 @@ static void ima_reset_appraise_flags(struct inode *inode, int digsig) iint = integrity_iint_find(inode); if (!iint) return; - - iint->flags &= ~IMA_DONE_MASK; iint->measured_pcrs = 0; + set_bit(IMA_CHANGE_XATTR, &iint->atomic_flags); if (digsig) - iint->flags |= IMA_DIGSIG; - return; + set_bit(IMA_DIGSIG, &iint->atomic_flags); + else + clear_bit(IMA_DIGSIG, &iint->atomic_flags); } int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name, diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 06c4cf125f2a..92d0ca7309f9 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -99,10 +99,13 @@ static void ima_rdwr_violation_check(struct file *file, if (!iint) iint = integrity_iint_find(inode); /* IMA_MEASURE is set from reader side */ - if (iint && (iint->flags & IMA_MEASURE)) + if (iint && test_bit(IMA_MUST_MEASURE, + &iint->atomic_flags)) send_tomtou = true; } } else { + if (must_measure) + set_bit(IMA_MUST_MEASURE, &iint->atomic_flags); if ((atomic_read(&inode->i_writecount) > 0) && must_measure) send_writers = true; } @@ -124,21 +127,24 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint, struct inode *inode, struct file *file) { fmode_t mode = file->f_mode; + bool update; if (!(mode & FMODE_WRITE)) return; - inode_lock(inode); + mutex_lock(&iint->mutex); if (atomic_read(&inode->i_writecount) == 1) { + update = test_and_clear_bit(IMA_UPDATE_XATTR, + &iint->atomic_flags); if ((iint->version != inode->i_version) || (iint->flags & IMA_NEW_FILE)) { iint->flags &= ~(IMA_DONE_MASK | IMA_NEW_FILE); iint->measured_pcrs = 0; - if (iint->flags & IMA_APPRAISE) + if (update) ima_update_xattr(iint, file); } } - inode_unlock(inode); + mutex_unlock(&iint->mutex); } /** @@ -171,7 +177,7 @@ static int process_measurement(struct file *file, char *buf, loff_t size, char *pathbuf = NULL; char filename[NAME_MAX]; const char *pathname = NULL; - int rc = -ENOMEM, action, must_appraise; + int rc = 0, action, must_appraise = 0; int pcr = CONFIG_IMA_MEASURE_PCR_IDX; struct evm_ima_xattr_data *xattr_value = NULL; int xattr_len = 0; @@ -202,17 +208,31 @@ static int process_measurement(struct file *file, char *buf, loff_t size, if (action) { iint = integrity_inode_get(inode); if (!iint) - goto out; + rc = -ENOMEM; } - if (violation_check) { + if (!rc && violation_check) ima_rdwr_violation_check(file, iint, action & IMA_MEASURE, &pathbuf, &pathname); - if (!action) { - rc = 0; - goto out_free; - } - } + + inode_unlock(inode); + + if (rc) + goto out; + if (!action) + goto out; + + mutex_lock(&iint->mutex); + + if (test_and_clear_bit(IMA_CHANGE_ATTR, &iint->atomic_flags)) + /* reset appraisal flags if ima_inode_post_setattr was called */ + iint->flags &= ~(IMA_APPRAISE | IMA_APPRAISED | + IMA_APPRAISE_SUBMASK | IMA_APPRAISED_SUBMASK | + IMA_ACTION_FLAGS); + + if (test_and_clear_bit(IMA_CHANGE_XATTR, &iint->atomic_flags)) + /* reset all flags if ima_inode_setxattr was called */ + iint->flags &= ~IMA_DONE_MASK; /* Determine if already appraised/measured based on bitmask * (IMA_MEASURE, IMA_MEASURED, IMA_XXXX_APPRAISE, IMA_XXXX_APPRAISED, @@ -230,7 +250,7 @@ static int process_measurement(struct file *file, char *buf, loff_t size, if (!action) { if (must_appraise) rc = ima_get_cache_status(iint, func); - goto out_digsig; + goto out_locked; } template_desc = ima_template_desc_current(); @@ -243,7 +263,7 @@ static int process_measurement(struct file *file, char *buf, loff_t size, rc = ima_collect_measurement(iint, file, buf, size, hash_algo); if (rc != 0 && rc != -EBADF && rc != -EINVAL) - goto out_digsig; + goto out_locked; if (!pathbuf) /* ima_rdwr_violation possibly pre-fetched */ pathname = ima_d_path(&file->f_path, &pathbuf, filename); @@ -251,26 +271,32 @@ static int process_measurement(struct file *file, char *buf, loff_t size, if (action & IMA_MEASURE) ima_store_measurement(iint, file, pathname, xattr_value, xattr_len, pcr); - if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) + if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) { + inode_lock(inode); rc = ima_appraise_measurement(func, iint, file, pathname, xattr_value, xattr_len, opened); + inode_unlock(inode); + } if (action & IMA_AUDIT) ima_audit_measurement(iint, pathname); if ((file->f_flags & O_DIRECT) && (iint->flags & IMA_PERMIT_DIRECTIO)) rc = 0; -out_digsig: - if ((mask & MAY_WRITE) && (iint->flags & IMA_DIGSIG) && +out_locked: + if ((mask & MAY_WRITE) && test_bit(IMA_DIGSIG, &iint->atomic_flags) && !(iint->flags & IMA_NEW_FILE)) rc = -EACCES; + mutex_unlock(&iint->mutex); kfree(xattr_value); -out_free: +out: if (pathbuf) __putname(pathbuf); -out: - inode_unlock(inode); - if ((rc && must_appraise) && (ima_appraise & IMA_APPRAISE_ENFORCE)) - return -EACCES; + if (must_appraise) { + if (rc && (ima_appraise & IMA_APPRAISE_ENFORCE)) + return -EACCES; + if (file->f_mode & FMODE_WRITE) + set_bit(IMA_UPDATE_XATTR, &iint->atomic_flags); + } return 0; } diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index cbc7de33fac7..f43ac351c172 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -29,11 +29,10 @@ /* iint cache flags */ #define IMA_ACTION_FLAGS 0xff000000 #define IMA_ACTION_RULE_FLAGS 0x06000000 -#define IMA_DIGSIG 0x01000000 -#define IMA_DIGSIG_REQUIRED 0x02000000 -#define IMA_PERMIT_DIRECTIO 0x04000000 -#define IMA_NEW_FILE 0x08000000 -#define EVM_IMMUTABLE_DIGSIG 0x10000000 +#define IMA_DIGSIG_REQUIRED 0x01000000 +#define IMA_PERMIT_DIRECTIO 0x02000000 +#define IMA_NEW_FILE 0x04000000 +#define EVM_IMMUTABLE_DIGSIG 0x08000000 #define IMA_DO_MASK (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \ IMA_APPRAISE_SUBMASK) @@ -54,6 +53,13 @@ #define IMA_APPRAISED_SUBMASK (IMA_FILE_APPRAISED | IMA_MMAP_APPRAISED | \ IMA_BPRM_APPRAISED | IMA_READ_APPRAISED) +/* iint cache atomic_flags */ +#define IMA_CHANGE_XATTR 0 +#define IMA_UPDATE_XATTR 1 +#define IMA_CHANGE_ATTR 2 +#define IMA_DIGSIG 3 +#define IMA_MUST_MEASURE 4 + enum evm_ima_xattr_type { IMA_XATTR_DIGEST = 0x01, EVM_XATTR_HMAC, @@ -102,10 +108,12 @@ struct signature_v2_hdr { /* integrity data associated with an inode */ struct integrity_iint_cache { struct rb_node rb_node; /* rooted in integrity_iint_tree */ + struct mutex mutex; /* protects: version, flags, digest */ struct inode *inode; /* back pointer to inode in question */ u64 version; /* track inode changes */ unsigned long flags; unsigned long measured_pcrs; + unsigned long atomic_flags; enum integrity_status ima_file_status:4; enum integrity_status ima_mmap_status:4; enum integrity_status ima_bprm_status:4; From d467320fdaf328fd6f98fc686b3a3fdd226dbaef Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 23 Jan 2018 10:00:41 -0500 Subject: [PATCH 0699/2608] ima: re-initialize iint->atomic_flags commit e2598077dc6a26c9644393e5c21f22a90dbdccdb upstream. Intermittently security.ima is not being written for new files. This patch re-initializes the new slab iint->atomic_flags field before freeing it. Fixes: commit 0d73a55208e9 ("ima: re-introduce own integrity cache lock") Signed-off-by: Mimi Zohar Signed-off-by: James Morris Cc: Aditya Kali Signed-off-by: Greg Kroah-Hartman --- security/integrity/iint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/integrity/iint.c b/security/integrity/iint.c index 2db461c38795..f4a40fb84b1e 100644 --- a/security/integrity/iint.c +++ b/security/integrity/iint.c @@ -74,6 +74,7 @@ static void iint_free(struct integrity_iint_cache *iint) iint->ima_hash = NULL; iint->version = 0; iint->flags = 0UL; + iint->atomic_flags = 0UL; iint->ima_file_status = INTEGRITY_UNKNOWN; iint->ima_mmap_status = INTEGRITY_UNKNOWN; iint->ima_bprm_status = INTEGRITY_UNKNOWN; From 5ff1ad556aad473952c1caca6092aac4517ac1ae Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 1 Dec 2018 09:43:00 +0100 Subject: [PATCH 0700/2608] Linux 4.14.85 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cb131e135c42..58a248264090 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 84 +SUBLEVEL = 85 EXTRAVERSION = NAME = Petit Gorille From e12b67d81b08a79994ee9b1484a7e0c4601bfdad Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:13 -0800 Subject: [PATCH 0701/2608] mm/huge_memory: rename freeze_page() to unmap_page() commit 906f9cdfc2a0800f13683f9e4ebdfd08c12ee81b upstream. The term "freeze" is used in several ways in the kernel, and in mm it has the particular meaning of forcing page refcount temporarily to 0. freeze_page() is just too confusing a name for a function that unmaps a page: rename it unmap_page(), and rename unfreeze_page() remap_page(). Went to change the mention of freeze_page() added later in mm/rmap.c, but found it to be incorrect: ordinary page reclaim reaches there too; but the substance of the comment still seems correct, so edit it down. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261514080.2275@eggly.anvils Fixes: e9b61f19858a5 ("thp: reintroduce split_huge_page()") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/huge_memory.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index adacfe66cf3d..09d7e9c4de89 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2280,7 +2280,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, } } -static void freeze_page(struct page *page) +static void unmap_page(struct page *page) { enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; @@ -2295,7 +2295,7 @@ static void freeze_page(struct page *page) VM_BUG_ON_PAGE(!unmap_success, page); } -static void unfreeze_page(struct page *page) +static void remap_page(struct page *page) { int i; if (PageTransHuge(page)) { @@ -2412,7 +2412,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); - unfreeze_page(head); + remap_page(head); for (i = 0; i < HPAGE_PMD_NR; i++) { struct page *subpage = head + i; @@ -2593,7 +2593,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) } /* - * Racy check if we can split the page, before freeze_page() will + * Racy check if we can split the page, before unmap_page() will * split PMDs */ if (!can_split_huge_page(head, &extra_pins)) { @@ -2602,7 +2602,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) } mlocked = PageMlocked(page); - freeze_page(head); + unmap_page(head); VM_BUG_ON_PAGE(compound_mapcount(head), head); /* Make sure the page is not on per-CPU pagevec as it takes pin */ @@ -2659,7 +2659,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) fail: if (mapping) spin_unlock(&mapping->tree_lock); spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); - unfreeze_page(head); + remap_page(head); ret = -EBUSY; } From 30241d721f90ea1cd783b79049024379d4a5b4f0 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 5 Apr 2018 16:23:28 -0700 Subject: [PATCH 0702/2608] mm/huge_memory.c: reorder operations in __split_huge_page_tail() commit 605ca5ede7643a01f4c4a15913f9714ac297f8a6 upstream. THP split makes non-atomic change of tail page flags. This is almost ok because tail pages are locked and isolated but this breaks recent changes in page locking: non-atomic operation could clear bit PG_waiters. As a result concurrent sequence get_page_unless_zero() -> lock_page() might block forever. Especially if this page was truncated later. Fix is trivial: clone flags before unfreezing page reference counter. This race exists since commit 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit") while unsave unfreeze itself was added in commit 8df651c7059e ("thp: cleanup split_huge_page()"). clear_compound_head() also must be called before unfreezing page reference because after successful get_page_unless_zero() might follow put_page() which needs correct compound_head(). And replace page_ref_inc()/page_ref_add() with page_ref_unfreeze() which is made especially for that and has semantic of smp_store_release(). Link: http://lkml.kernel.org/r/151844393341.210639.13162088407980624477.stgit@buzz Signed-off-by: Konstantin Khlebnikov Acked-by: Kirill A. Shutemov Cc: Michal Hocko Cc: Nicholas Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/huge_memory.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 09d7e9c4de89..8969f5cf3174 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2312,26 +2312,13 @@ static void __split_huge_page_tail(struct page *head, int tail, struct page *page_tail = head + tail; VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail); - VM_BUG_ON_PAGE(page_ref_count(page_tail) != 0, page_tail); /* - * tail_page->_refcount is zero and not changing from under us. But - * get_page_unless_zero() may be running from under us on the - * tail_page. If we used atomic_set() below instead of atomic_inc() or - * atomic_add(), we would then run atomic_set() concurrently with - * get_page_unless_zero(), and atomic_set() is implemented in C not - * using locked ops. spin_unlock on x86 sometime uses locked ops - * because of PPro errata 66, 92, so unless somebody can guarantee - * atomic_set() here would be safe on all archs (and not only on x86), - * it's safer to use atomic_inc()/atomic_add(). + * Clone page flags before unfreezing refcount. + * + * After successful get_page_unless_zero() might follow flags change, + * for exmaple lock_page() which set PG_waiters. */ - if (PageAnon(head) && !PageSwapCache(head)) { - page_ref_inc(page_tail); - } else { - /* Additional pin to radix tree */ - page_ref_add(page_tail, 2); - } - page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; page_tail->flags |= (head->flags & ((1L << PG_referenced) | @@ -2344,14 +2331,21 @@ static void __split_huge_page_tail(struct page *head, int tail, (1L << PG_unevictable) | (1L << PG_dirty))); - /* - * After clearing PageTail the gup refcount can be released. - * Page flags also must be visible before we make the page non-compound. - */ + /* Page flags must be visible before we make the page non-compound. */ smp_wmb(); + /* + * Clear PageTail before unfreezing page refcount. + * + * After successful get_page_unless_zero() might follow put_page() + * which needs correct compound_head(). + */ clear_compound_head(page_tail); + /* Finally unfreeze refcount. Additional reference from page cache. */ + page_ref_unfreeze(page_tail, 1 + (!PageAnon(head) || + PageSwapCache(head))); + if (page_is_young(head)) set_page_young(page_tail); if (page_is_idle(head)) From 16d07443b27746fc819600fbe79742335a471d47 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:16 -0800 Subject: [PATCH 0703/2608] mm/huge_memory: splitting set mapping+index before unfreeze commit 173d9d9fd3ddae84c110fea8aedf1f26af6be9ec upstream. Huge tmpfs stress testing has occasionally hit shmem_undo_range()'s VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page). Move the setting of mapping and index up before the page_ref_unfreeze() in __split_huge_page_tail() to fix this: so that a page cache lookup cannot get a reference while the tail's mapping and index are unstable. In fact, might as well move them up before the smp_wmb(): I don't see an actual need for that, but if I'm missing something, this way round is safer than the other, and no less efficient. You might argue that VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page) is misplaced, and should be left until after the trylock_page(); but left as is has not crashed since, and gives more stringent assurance. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261516380.2275@eggly.anvils Fixes: e9b61f19858a5 ("thp: reintroduce split_huge_page()") Requires: 605ca5ede764 ("mm/huge_memory.c: reorder operations in __split_huge_page_tail()") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Jerome Glisse Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/huge_memory.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8969f5cf3174..69ffeb439b0f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2331,6 +2331,12 @@ static void __split_huge_page_tail(struct page *head, int tail, (1L << PG_unevictable) | (1L << PG_dirty))); + /* ->mapping in first tail page is compound_mapcount */ + VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, + page_tail); + page_tail->mapping = head->mapping; + page_tail->index = head->index + tail; + /* Page flags must be visible before we make the page non-compound. */ smp_wmb(); @@ -2351,12 +2357,6 @@ static void __split_huge_page_tail(struct page *head, int tail, if (page_is_idle(head)) set_page_idle(page_tail); - /* ->mapping in first tail page is compound_mapcount */ - VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, - page_tail); - page_tail->mapping = head->mapping; - - page_tail->index = head->index + tail; page_cpupid_xchg_last(page_tail, page_cpupid_last(head)); lru_add_page_tail(head, page_tail, lruvec, list); } From 6f75a09833839caef7450275cc227dcef1679493 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:21 -0800 Subject: [PATCH 0704/2608] mm/huge_memory: fix lockdep complaint on 32-bit i_size_read() commit 006d3ff27e884f80bd7d306b041afc415f63598f upstream. Huge tmpfs testing, on 32-bit kernel with lockdep enabled, showed that __split_huge_page() was using i_size_read() while holding the irq-safe lru_lock and page tree lock, but the 32-bit i_size_read() uses an irq-unsafe seqlock which should not be nested inside them. Instead, read the i_size earlier in split_huge_page_to_list(), and pass the end offset down to __split_huge_page(): all while holding head page lock, which is enough to prevent truncation of that extent before the page tree lock has been taken. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261520070.2275@eggly.anvils Fixes: baa355fd33142 ("thp: file pages support for split_huge_page()") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/huge_memory.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 69ffeb439b0f..930f2aa3bb4d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2362,12 +2362,11 @@ static void __split_huge_page_tail(struct page *head, int tail, } static void __split_huge_page(struct page *page, struct list_head *list, - unsigned long flags) + pgoff_t end, unsigned long flags) { struct page *head = compound_head(page); struct zone *zone = page_zone(head); struct lruvec *lruvec; - pgoff_t end = -1; int i; lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat); @@ -2375,9 +2374,6 @@ static void __split_huge_page(struct page *page, struct list_head *list, /* complete memcg works before add pages to LRU */ mem_cgroup_split_huge_fixup(head); - if (!PageAnon(page)) - end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE); - for (i = HPAGE_PMD_NR - 1; i >= 1; i--) { __split_huge_page_tail(head, i, lruvec, list); /* Some pages can be beyond i_size: drop them from page cache */ @@ -2549,6 +2545,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) int count, mapcount, extra_pins, ret; bool mlocked; unsigned long flags; + pgoff_t end; VM_BUG_ON_PAGE(is_huge_zero_page(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); @@ -2571,6 +2568,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) ret = -EBUSY; goto out; } + end = -1; mapping = NULL; anon_vma_lock_write(anon_vma); } else { @@ -2584,6 +2582,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) anon_vma = NULL; i_mmap_lock_read(mapping); + + /* + *__split_huge_page() may need to trim off pages beyond EOF: + * but on 32-bit, i_size_read() takes an irq-unsafe seqlock, + * which cannot be nested inside the page tree lock. So note + * end now: i_size itself may be changed at any moment, but + * head page lock is good enough to serialize the trimming. + */ + end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); } /* @@ -2633,7 +2640,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) if (mapping) __dec_node_page_state(page, NR_SHMEM_THPS); spin_unlock(&pgdata->split_queue_lock); - __split_huge_page(page, list, flags); + __split_huge_page(page, list, end, flags); if (PageSwapCache(head)) { swp_entry_t entry = { .val = page_private(head) }; From 81d2848c99cc4951eb3f7c3ce4313e830b07cc2b Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:25 -0800 Subject: [PATCH 0705/2608] mm/khugepaged: collapse_shmem() stop if punched or truncated commit 701270fa193aadf00bdcf607738f64997275d4c7 upstream. Huge tmpfs testing showed that although collapse_shmem() recognizes a concurrently truncated or hole-punched page correctly, its handling of holes was liable to refill an emptied extent. Add check to stop that. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261522040.2275@eggly.anvils Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Reviewed-by: Matthew Wilcox Cc: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 0a5bb3e8a8a3..d4a06afbeda4 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1352,6 +1352,16 @@ static void collapse_shmem(struct mm_struct *mm, radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { int n = min(iter.index, end) - index; + /* + * Stop if extent has been hole-punched, and is now completely + * empty (the more obvious i_size_read() check would take an + * irq-unsafe seqlock on 32-bit). + */ + if (n >= HPAGE_PMD_NR) { + result = SCAN_TRUNCATED; + goto tree_locked; + } + /* * Handle holes in the radix tree: charge it from shmem and * insert relevant subpage of new_page into the radix-tree. @@ -1463,6 +1473,11 @@ out_unlock: if (result == SCAN_SUCCEED && index < end) { int n = end - index; + /* Stop if extent has been truncated, and is now empty */ + if (n >= HPAGE_PMD_NR) { + result = SCAN_TRUNCATED; + goto tree_locked; + } if (!shmem_charge(mapping->host, n)) { result = SCAN_FAIL; goto tree_locked; From 98f1ae169c2e7264b198c3df08612f1bcd0fcfac Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:29 -0800 Subject: [PATCH 0706/2608] mm/khugepaged: fix crashes due to misaccounted holes commit aaa52e340073b7f4593b3c4ddafcafa70cf838b5 upstream. Huge tmpfs testing on a shortish file mapped into a pmd-rounded extent hit shmem_evict_inode()'s WARN_ON(inode->i_blocks) followed by clear_inode()'s BUG_ON(inode->i_data.nrpages) when the file was later closed and unlinked. khugepaged's collapse_shmem() was forgetting to update mapping->nrpages on the rollback path, after it had added but then needs to undo some holes. There is indeed an irritating asymmetry between shmem_charge(), whose callers want it to increment nrpages after successfully accounting blocks, and shmem_uncharge(), when __delete_from_page_cache() already decremented nrpages itself: oh well, just add a comment on that to them both. And shmem_recalc_inode() is supposed to be called when the accounting is expected to be in balance (so it can deduce from imbalance that reclaim discarded some pages): so change shmem_charge() to update nrpages earlier (though it's rare for the difference to matter at all). Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261523450.2275@eggly.anvils Fixes: 800d8c63b2e98 ("shmem: add huge pages support") Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 4 +++- mm/shmem.c | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index d4a06afbeda4..be7b0863e33c 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1539,8 +1539,10 @@ tree_unlocked: *hpage = NULL; } else { /* Something went wrong: rollback changes to the radix-tree */ - shmem_uncharge(mapping->host, nr_none); spin_lock_irq(&mapping->tree_lock); + mapping->nrpages -= nr_none; + shmem_uncharge(mapping->host, nr_none); + radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { if (iter.index >= end) diff --git a/mm/shmem.c b/mm/shmem.c index fa08f56fd5e5..177fef62cbbd 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -296,12 +296,14 @@ bool shmem_charge(struct inode *inode, long pages) if (!shmem_inode_acct_block(inode, pages)) return false; + /* nrpages adjustment first, then shmem_recalc_inode() when balanced */ + inode->i_mapping->nrpages += pages; + spin_lock_irqsave(&info->lock, flags); info->alloced += pages; inode->i_blocks += pages * BLOCKS_PER_PAGE; shmem_recalc_inode(inode); spin_unlock_irqrestore(&info->lock, flags); - inode->i_mapping->nrpages += pages; return true; } @@ -311,6 +313,8 @@ void shmem_uncharge(struct inode *inode, long pages) struct shmem_inode_info *info = SHMEM_I(inode); unsigned long flags; + /* nrpages adjustment done by __delete_from_page_cache() or caller */ + spin_lock_irqsave(&info->lock, flags); info->alloced -= pages; inode->i_blocks -= pages * BLOCKS_PER_PAGE; From 5021918a515cd86fb8715ec0f24ae66f15f5aba5 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:35 -0800 Subject: [PATCH 0707/2608] mm/khugepaged: collapse_shmem() remember to clear holes commit 2af8ff291848cc4b1cce24b6c943394eb2c761e8 upstream. Huge tmpfs testing reminds us that there is no __GFP_ZERO in the gfp flags khugepaged uses to allocate a huge page - in all common cases it would just be a waste of effort - so collapse_shmem() must remember to clear out any holes that it instantiates. The obvious place to do so, where they are put into the page cache tree, is not a good choice: because interrupts are disabled there. Leave it until further down, once success is assured, where the other pages are copied (before setting PageUptodate). Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261525080.2275@eggly.anvils Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index be7b0863e33c..f535dbe3f9f5 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1502,7 +1502,12 @@ tree_unlocked: * Replacing old pages with new one has succeed, now we need to * copy the content and free old pages. */ + index = start; list_for_each_entry_safe(page, tmp, &pagelist, lru) { + while (index < page->index) { + clear_highpage(new_page + (index % HPAGE_PMD_NR)); + index++; + } copy_highpage(new_page + (page->index % HPAGE_PMD_NR), page); list_del(&page->lru); @@ -1512,6 +1517,11 @@ tree_unlocked: ClearPageActive(page); ClearPageUnevictable(page); put_page(page); + index++; + } + while (index < end) { + clear_highpage(new_page + (index % HPAGE_PMD_NR)); + index++; } local_irq_save(flags); From b447a6adf423cd80862a5944e4e9d9c91afff05a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:39 -0800 Subject: [PATCH 0708/2608] mm/khugepaged: minor reorderings in collapse_shmem() commit 042a30824871fa3149b0127009074b75cc25863c upstream. Several cleanups in collapse_shmem(): most of which probably do not really matter, beyond doing things in a more familiar and reassuring order. Simplify the failure gotos in the main loop, and on success update stats while interrupts still disabled from the last iteration. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261526400.2275@eggly.anvils Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 73 ++++++++++++++++++++----------------------------- 1 file changed, 30 insertions(+), 43 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index f535dbe3f9f5..0eac4344477a 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1333,13 +1333,12 @@ static void collapse_shmem(struct mm_struct *mm, goto out; } + __SetPageLocked(new_page); + __SetPageSwapBacked(new_page); new_page->index = start; new_page->mapping = mapping; - __SetPageSwapBacked(new_page); - __SetPageLocked(new_page); BUG_ON(!page_ref_freeze(new_page, 1)); - /* * At this point the new_page is 'frozen' (page_count() is zero), locked * and not up-to-date. It's safe to insert it into radix tree, because @@ -1368,13 +1367,13 @@ static void collapse_shmem(struct mm_struct *mm, */ if (n && !shmem_charge(mapping->host, n)) { result = SCAN_FAIL; - break; + goto tree_locked; } - nr_none += n; for (; index < min(iter.index, end); index++) { radix_tree_insert(&mapping->page_tree, index, new_page + (index % HPAGE_PMD_NR)); } + nr_none += n; /* We are done. */ if (index >= end) @@ -1390,12 +1389,12 @@ static void collapse_shmem(struct mm_struct *mm, result = SCAN_FAIL; goto tree_unlocked; } - spin_lock_irq(&mapping->tree_lock); } else if (trylock_page(page)) { get_page(page); + spin_unlock_irq(&mapping->tree_lock); } else { result = SCAN_PAGE_LOCK; - break; + goto tree_locked; } /* @@ -1410,11 +1409,10 @@ static void collapse_shmem(struct mm_struct *mm, result = SCAN_TRUNCATED; goto out_unlock; } - spin_unlock_irq(&mapping->tree_lock); if (isolate_lru_page(page)) { result = SCAN_DEL_PAGE_LRU; - goto out_isolate_failed; + goto out_unlock; } if (page_mapped(page)) @@ -1436,7 +1434,9 @@ static void collapse_shmem(struct mm_struct *mm, */ if (!page_ref_freeze(page, 3)) { result = SCAN_PAGE_COUNT; - goto out_lru; + spin_unlock_irq(&mapping->tree_lock); + putback_lru_page(page); + goto out_unlock; } /* @@ -1452,17 +1452,10 @@ static void collapse_shmem(struct mm_struct *mm, slot = radix_tree_iter_resume(slot, &iter); index++; continue; -out_lru: - spin_unlock_irq(&mapping->tree_lock); - putback_lru_page(page); -out_isolate_failed: - unlock_page(page); - put_page(page); - goto tree_unlocked; out_unlock: unlock_page(page); put_page(page); - break; + goto tree_unlocked; } /* @@ -1470,7 +1463,7 @@ out_unlock: * This code only triggers if there's nothing in radix tree * beyond 'end'. */ - if (result == SCAN_SUCCEED && index < end) { + if (index < end) { int n = end - index; /* Stop if extent has been truncated, and is now empty */ @@ -1482,7 +1475,6 @@ out_unlock: result = SCAN_FAIL; goto tree_locked; } - for (; index < end; index++) { radix_tree_insert(&mapping->page_tree, index, new_page + (index % HPAGE_PMD_NR)); @@ -1490,14 +1482,19 @@ out_unlock: nr_none += n; } + __inc_node_page_state(new_page, NR_SHMEM_THPS); + if (nr_none) { + struct zone *zone = page_zone(new_page); + + __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none); + __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none); + } + tree_locked: spin_unlock_irq(&mapping->tree_lock); tree_unlocked: if (result == SCAN_SUCCEED) { - unsigned long flags; - struct zone *zone = page_zone(new_page); - /* * Replacing old pages with new one has succeed, now we need to * copy the content and free old pages. @@ -1511,11 +1508,11 @@ tree_unlocked: copy_highpage(new_page + (page->index % HPAGE_PMD_NR), page); list_del(&page->lru); - unlock_page(page); - page_ref_unfreeze(page, 1); page->mapping = NULL; + page_ref_unfreeze(page, 1); ClearPageActive(page); ClearPageUnevictable(page); + unlock_page(page); put_page(page); index++; } @@ -1524,28 +1521,17 @@ tree_unlocked: index++; } - local_irq_save(flags); - __inc_node_page_state(new_page, NR_SHMEM_THPS); - if (nr_none) { - __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none); - __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none); - } - local_irq_restore(flags); - - /* - * Remove pte page tables, so we can re-faulti - * the page as huge. - */ - retract_page_tables(mapping, start); - /* Everything is ready, let's unfreeze the new_page */ - set_page_dirty(new_page); SetPageUptodate(new_page); page_ref_unfreeze(new_page, HPAGE_PMD_NR); + set_page_dirty(new_page); mem_cgroup_commit_charge(new_page, memcg, false, true); lru_cache_add_anon(new_page); - unlock_page(new_page); + /* + * Remove pte page tables, so we can re-fault the page as huge. + */ + retract_page_tables(mapping, start); *hpage = NULL; } else { /* Something went wrong: rollback changes to the radix-tree */ @@ -1578,8 +1564,8 @@ tree_unlocked: slot, page); slot = radix_tree_iter_resume(slot, &iter); spin_unlock_irq(&mapping->tree_lock); - putback_lru_page(page); unlock_page(page); + putback_lru_page(page); spin_lock_irq(&mapping->tree_lock); } VM_BUG_ON(nr_none); @@ -1588,9 +1574,10 @@ tree_unlocked: /* Unfreeze new_page, caller would take care about freeing it */ page_ref_unfreeze(new_page, 1); mem_cgroup_cancel_charge(new_page, memcg, true); - unlock_page(new_page); new_page->mapping = NULL; } + + unlock_page(new_page); out: VM_BUG_ON(!list_empty(&pagelist)); /* TODO: tracepoints */ From 84c55f8d40102855f3b86ffdfad6b5e4da517858 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:43 -0800 Subject: [PATCH 0709/2608] mm/khugepaged: collapse_shmem() without freezing new_page commit 87c460a0bded56195b5eb497d44709777ef7b415 upstream. khugepaged's collapse_shmem() does almost all of its work, to assemble the huge new_page from 512 scattered old pages, with the new_page's refcount frozen to 0 (and refcounts of all old pages so far also frozen to 0). Including shmem_getpage() to read in any which were out on swap, memory reclaim if necessary to allocate their intermediate pages, and copying over all the data from old to new. Imagine the frozen refcount as a spinlock held, but without any lock debugging to highlight the abuse: it's not good, and under serious load heads into lockups - speculative getters of the page are not expecting to spin while khugepaged is rescheduled. One can get a little further under load by hacking around elsewhere; but fortunately, freezing the new_page turns out to have been entirely unnecessary, with no hacks needed elsewhere. The huge new_page lock is already held throughout, and guards all its subpages as they are brought one by one into the page cache tree; and anything reading the data in that page, without the lock, before it has been marked PageUptodate, would already be in the wrong. So simply eliminate the freezing of the new_page. Each of the old pages remains frozen with refcount 0 after it has been replaced by a new_page subpage in the page cache tree, until they are all unfrozen on success or failure: just as before. They could be unfrozen sooner, but cause no problem once no longer visible to find_get_entry(), filemap_map_pages() and other speculative lookups. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261527570.2275@eggly.anvils Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 0eac4344477a..45bf0e5775f7 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1288,7 +1288,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) * collapse_shmem - collapse small tmpfs/shmem pages into huge one. * * Basic scheme is simple, details are more complex: - * - allocate and freeze a new huge page; + * - allocate and lock a new huge page; * - scan over radix tree replacing old pages the new one * + swap in pages if necessary; * + fill in gaps; @@ -1296,11 +1296,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) * - if replacing succeed: * + copy data over; * + free old pages; - * + unfreeze huge page; + * + unlock huge page; * - if replacing failed; * + put all pages back and unfreeze them; * + restore gaps in the radix-tree; - * + free huge page; + * + unlock and free huge page; */ static void collapse_shmem(struct mm_struct *mm, struct address_space *mapping, pgoff_t start, @@ -1337,13 +1337,11 @@ static void collapse_shmem(struct mm_struct *mm, __SetPageSwapBacked(new_page); new_page->index = start; new_page->mapping = mapping; - BUG_ON(!page_ref_freeze(new_page, 1)); /* - * At this point the new_page is 'frozen' (page_count() is zero), locked - * and not up-to-date. It's safe to insert it into radix tree, because - * nobody would be able to map it or use it in other way until we - * unfreeze it. + * At this point the new_page is locked and not up-to-date. + * It's safe to insert it into the page cache, because nobody would + * be able to map it or use it in another way until we unlock it. */ index = start; @@ -1521,9 +1519,8 @@ tree_unlocked: index++; } - /* Everything is ready, let's unfreeze the new_page */ SetPageUptodate(new_page); - page_ref_unfreeze(new_page, HPAGE_PMD_NR); + page_ref_add(new_page, HPAGE_PMD_NR - 1); set_page_dirty(new_page); mem_cgroup_commit_charge(new_page, memcg, false, true); lru_cache_add_anon(new_page); @@ -1571,8 +1568,6 @@ tree_unlocked: VM_BUG_ON(nr_none); spin_unlock_irq(&mapping->tree_lock); - /* Unfreeze new_page, caller would take care about freeing it */ - page_ref_unfreeze(new_page, 1); mem_cgroup_cancel_charge(new_page, memcg, true); new_page->mapping = NULL; } From 8f85b74fb1c0a7fa267f828a90b582e27fdf56e7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:47 -0800 Subject: [PATCH 0710/2608] mm/khugepaged: collapse_shmem() do not crash on Compound commit 06a5e1268a5fb9c2b346a3da6b97e85f2eba0f07 upstream. collapse_shmem()'s VM_BUG_ON_PAGE(PageTransCompound) was unsafe: before it holds page lock of the first page, racing truncation then extension might conceivably have inserted a hugepage there already. Fail with the SCAN_PAGE_COMPOUND result, instead of crashing (CONFIG_DEBUG_VM=y) or otherwise mishandling the unexpected hugepage - though later we might code up a more constructive way of handling it, with SCAN_SUCCESS. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261529310.2275@eggly.anvils Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Cc: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 45bf0e5775f7..d27a73737f1a 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1401,7 +1401,15 @@ static void collapse_shmem(struct mm_struct *mm, */ VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageUptodate(page), page); - VM_BUG_ON_PAGE(PageTransCompound(page), page); + + /* + * If file was truncated then extended, or hole-punched, before + * we locked the first page, then a THP might be there already. + */ + if (PageTransCompound(page)) { + result = SCAN_PAGE_COMPOUND; + goto out_unlock; + } if (page_mapping(page) != mapping) { result = SCAN_TRUNCATED; From 30cdc0c3bac950bebd3ba59f5ff980cdd3710e0f Mon Sep 17 00:00:00 2001 From: Matthias Schwarzott Date: Mon, 30 Oct 2017 06:07:29 -0400 Subject: [PATCH 0711/2608] media: em28xx: Fix use-after-free when disconnecting [ Upstream commit 910b0797fa9e8af09c44a3fa36cb310ba7a7218d ] Fix bug by moving the i2c_unregister_device calls after deregistration of dvb frontend. The new style i2c drivers already destroys the frontend object at i2c_unregister_device time. When the dvb frontend is unregistered afterwards it leads to this oops: [ 6058.866459] BUG: unable to handle kernel NULL pointer dereference at 00000000000001f8 [ 6058.866578] IP: dvb_frontend_stop+0x30/0xd0 [dvb_core] [ 6058.866644] PGD 0 [ 6058.866646] P4D 0 [ 6058.866726] Oops: 0000 [#1] SMP [ 6058.866768] Modules linked in: rc_pinnacle_pctv_hd(O) em28xx_rc(O) si2157(O) si2168(O) em28xx_dvb(O) em28xx(O) si2165(O) a8293(O) tda10071(O) tea5767(O) tuner(O) cx23885(O) tda18271(O) videobuf2_dvb(O) videobuf2_dma_sg(O) m88ds3103(O) tveeprom(O) cx2341x(O) v4l2_common(O) dvb_core(O) rc_core(O) videobuf2_memops(O) videobuf2_v4l2(O) videobuf2_core(O) videodev(O) media(O) bluetooth ecdh_generic ums_realtek uas rtl8192cu rtl_usb rtl8192c_common rtlwifi usb_storage snd_hda_codec_realtek snd_hda_codec_hdmi snd_hda_codec_generic i2c_mux snd_hda_intel snd_hda_codec snd_hwdep x86_pkg_temp_thermal snd_hda_core kvm_intel kvm irqbypass [last unloaded: videobuf2_memops] [ 6058.867497] CPU: 2 PID: 7349 Comm: kworker/2:0 Tainted: G W O 4.13.9-gentoo #1 [ 6058.867595] Hardware name: MEDION E2050 2391/H81H3-EM2, BIOS H81EM2W08.308 08/25/2014 [ 6058.867692] Workqueue: usb_hub_wq hub_event [ 6058.867746] task: ffff88011a15e040 task.stack: ffffc90003074000 [ 6058.867825] RIP: 0010:dvb_frontend_stop+0x30/0xd0 [dvb_core] [ 6058.867896] RSP: 0018:ffffc90003077b58 EFLAGS: 00010293 [ 6058.867964] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000010040001f [ 6058.868056] RDX: ffff88011a15e040 RSI: ffffea000464e400 RDI: ffff88001cbe3028 [ 6058.868150] RBP: ffffc90003077b68 R08: ffff880119390380 R09: 000000010040001f [ 6058.868241] R10: ffffc90003077b18 R11: 000000000001e200 R12: ffff88001cbe3028 [ 6058.868330] R13: ffff88001cbe68d0 R14: ffff8800cf734000 R15: ffff8800cf734098 [ 6058.868419] FS: 0000000000000000(0000) GS:ffff88011fb00000(0000) knlGS:0000000000000000 [ 6058.868511] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6058.868578] CR2: 00000000000001f8 CR3: 00000001113c5000 CR4: 00000000001406e0 [ 6058.868662] Call Trace: [ 6058.868705] dvb_unregister_frontend+0x2a/0x80 [dvb_core] [ 6058.868774] em28xx_dvb_fini+0x132/0x220 [em28xx_dvb] [ 6058.868840] em28xx_close_extension+0x34/0x90 [em28xx] [ 6058.868902] em28xx_usb_disconnect+0x4e/0x70 [em28xx] [ 6058.868968] usb_unbind_interface+0x6d/0x260 [ 6058.869025] device_release_driver_internal+0x150/0x210 [ 6058.869094] device_release_driver+0xd/0x10 [ 6058.869150] bus_remove_device+0xe4/0x160 [ 6058.869204] device_del+0x1ce/0x2f0 [ 6058.869253] usb_disable_device+0x99/0x270 [ 6058.869306] usb_disconnect+0x8d/0x260 [ 6058.869359] hub_event+0x93d/0x1520 [ 6058.869408] ? dequeue_task_fair+0xae5/0xd20 [ 6058.869467] process_one_work+0x1d9/0x3e0 [ 6058.869522] worker_thread+0x43/0x3e0 [ 6058.869576] kthread+0x104/0x140 [ 6058.869602] ? trace_event_raw_event_workqueue_work+0x80/0x80 [ 6058.869640] ? kthread_create_on_node+0x40/0x40 [ 6058.869673] ret_from_fork+0x22/0x30 [ 6058.869698] Code: 54 49 89 fc 53 48 8b 9f 18 03 00 00 0f 1f 44 00 00 41 83 bc 24 04 05 00 00 02 74 0c 41 c7 84 24 04 05 00 00 01 00 00 00 0f ae f0 <48> 8b bb f8 01 00 00 48 85 ff 74 5c e8 df 40 f0 e0 48 8b 93 f8 [ 6058.869850] RIP: dvb_frontend_stop+0x30/0xd0 [dvb_core] RSP: ffffc90003077b58 [ 6058.869894] CR2: 00000000000001f8 [ 6058.875880] ---[ end trace 717eecf7193b3fc6 ]--- Signed-off-by: Matthias Schwarzott Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/em28xx/em28xx-dvb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/em28xx/em28xx-dvb.c b/drivers/media/usb/em28xx/em28xx-dvb.c index 4a7db623fe29..29cdaaf1ed90 100644 --- a/drivers/media/usb/em28xx/em28xx-dvb.c +++ b/drivers/media/usb/em28xx/em28xx-dvb.c @@ -2105,6 +2105,8 @@ static int em28xx_dvb_fini(struct em28xx *dev) } } + em28xx_unregister_dvb(dvb); + /* remove I2C SEC */ client = dvb->i2c_client_sec; if (client) { @@ -2126,7 +2128,6 @@ static int em28xx_dvb_fini(struct em28xx *dev) i2c_unregister_device(client); } - em28xx_unregister_dvb(dvb); kfree(dvb); dev->dvb = NULL; kref_put(&dev->ref, em28xx_free_device); From ba9606d0121b644019d0699b5973926cc1a91c10 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 12 Jun 2018 09:33:16 +0200 Subject: [PATCH 0712/2608] ubi: Initialize Fastmap checkmapping correctly commit 25677478474a91fa1b46f19a4a591a9848bca6fb upstream We cannot do it last, otherwithse it will be skipped for dynamic volumes. Reported-by: Lachmann, Juergen Fixes: 34653fd8c46e ("ubi: fastmap: Check each mapping only once") Signed-off-by: Richard Weinberger Signed-off-by: Sudip Mukherjee Signed-off-by: Sasha Levin --- drivers/mtd/ubi/vtbl.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index 94d7a865b135..7504f430c011 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -578,6 +578,16 @@ static int init_volumes(struct ubi_device *ubi, vol->ubi = ubi; reserved_pebs += vol->reserved_pebs; + /* + * We use ubi->peb_count and not vol->reserved_pebs because + * we want to keep the code simple. Otherwise we'd have to + * resize/check the bitmap upon volume resize too. + * Allocating a few bytes more does not hurt. + */ + err = ubi_fastmap_init_checkmap(vol, ubi->peb_count); + if (err) + return err; + /* * In case of dynamic volume UBI knows nothing about how many * data is stored there. So assume the whole volume is used. @@ -620,16 +630,6 @@ static int init_volumes(struct ubi_device *ubi, (long long)(vol->used_ebs - 1) * vol->usable_leb_size; vol->used_bytes += av->last_data_size; vol->last_eb_bytes = av->last_data_size; - - /* - * We use ubi->peb_count and not vol->reserved_pebs because - * we want to keep the code simple. Otherwise we'd have to - * resize/check the bitmap upon volume resize too. - * Allocating a few bytes more does not hurt. - */ - err = ubi_fastmap_init_checkmap(vol, ubi->peb_count); - if (err) - return err; } /* And add the layout volume */ From 2fd0d0f9bb59ec5b628622e30f22158cb564c4ea Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 26 Jul 2018 15:17:46 +0200 Subject: [PATCH 0713/2608] libceph: store ceph_auth_handshake pointer in ceph_connection commit 262614c4294d33b1f19e0d18c0091d9c329b544a upstream. We already copy authorizer_reply_buf and authorizer_reply_buf_len into ceph_connection. Factoring out __prepare_write_connect() requires two more: authorizer_buf and authorizer_buf_len. Store the pointer to the handshake in con->auth rather than piling on. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- include/linux/ceph/messenger.h | 3 +- net/ceph/messenger.c | 54 ++++++++++++++++------------------ 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index ead9d85f1c11..9056077c023f 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -203,9 +203,8 @@ struct ceph_connection { attempt for this connection, client */ u32 peer_global_seq; /* peer's global seq for this connection */ + struct ceph_auth_handshake *auth; int auth_retry; /* true if we need a newer authorizer */ - void *auth_reply_buf; /* where to put the authorizer reply */ - int auth_reply_buf_len; struct mutex mutex; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5281da82371a..3a82e6d2864b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1411,24 +1411,26 @@ static void prepare_write_keepalive(struct ceph_connection *con) * Connection negotiation. */ -static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con, - int *auth_proto) +static int get_connect_authorizer(struct ceph_connection *con) { struct ceph_auth_handshake *auth; + int auth_proto; if (!con->ops->get_authorizer) { + con->auth = NULL; con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN; con->out_connect.authorizer_len = 0; - return NULL; + return 0; } - auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry); + auth = con->ops->get_authorizer(con, &auth_proto, con->auth_retry); if (IS_ERR(auth)) - return auth; + return PTR_ERR(auth); - con->auth_reply_buf = auth->authorizer_reply_buf; - con->auth_reply_buf_len = auth->authorizer_reply_buf_len; - return auth; + con->auth = auth; + con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto); + con->out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len); + return 0; } /* @@ -1448,8 +1450,7 @@ static int prepare_write_connect(struct ceph_connection *con) { unsigned int global_seq = get_global_seq(con->msgr, 0); int proto; - int auth_proto; - struct ceph_auth_handshake *auth; + int ret; switch (con->peer_name.type) { case CEPH_ENTITY_TYPE_MON: @@ -1476,20 +1477,15 @@ static int prepare_write_connect(struct ceph_connection *con) con->out_connect.protocol_version = cpu_to_le32(proto); con->out_connect.flags = 0; - auth_proto = CEPH_AUTH_UNKNOWN; - auth = get_connect_authorizer(con, &auth_proto); - if (IS_ERR(auth)) - return PTR_ERR(auth); - - con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto); - con->out_connect.authorizer_len = auth ? - cpu_to_le32(auth->authorizer_buf_len) : 0; + ret = get_connect_authorizer(con); + if (ret) + return ret; con_out_kvec_add(con, sizeof (con->out_connect), &con->out_connect); - if (auth && auth->authorizer_buf_len) - con_out_kvec_add(con, auth->authorizer_buf_len, - auth->authorizer_buf); + if (con->auth) + con_out_kvec_add(con, con->auth->authorizer_buf_len, + con->auth->authorizer_buf); con->out_more = 0; con_flag_set(con, CON_FLAG_WRITE_PENDING); @@ -1753,11 +1749,14 @@ static int read_partial_connect(struct ceph_connection *con) if (ret <= 0) goto out; - size = le32_to_cpu(con->in_reply.authorizer_len); - end += size; - ret = read_partial(con, end, size, con->auth_reply_buf); - if (ret <= 0) - goto out; + if (con->auth) { + size = le32_to_cpu(con->in_reply.authorizer_len); + end += size; + ret = read_partial(con, end, size, + con->auth->authorizer_reply_buf); + if (ret <= 0) + goto out; + } dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n", con, (int)con->in_reply.tag, @@ -1765,7 +1764,6 @@ static int read_partial_connect(struct ceph_connection *con) le32_to_cpu(con->in_reply.global_seq)); out: return ret; - } /* @@ -2048,7 +2046,7 @@ static int process_connect(struct ceph_connection *con) dout("process_connect on %p tag %d\n", con, (int)con->in_tag); - if (con->auth_reply_buf) { + if (con->auth) { /* * Any connection that defines ->get_authorizer() * should also define ->verify_authorizer_reply(). From 66abd96062b627f0ee20a684ebba48cec80233d7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 26 Jul 2018 17:43:47 +0200 Subject: [PATCH 0714/2608] libceph: factor out __prepare_write_connect() commit c0f56b483aa09c99bfe97409a43ad786f33b8a5a upstream. Will be used for sending ceph_msg_connect with an updated authorizer, after the server challenges the initial authorizer. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- net/ceph/messenger.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 3a82e6d2864b..0b121327d32f 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1446,6 +1446,17 @@ static void prepare_write_banner(struct ceph_connection *con) con_flag_set(con, CON_FLAG_WRITE_PENDING); } +static void __prepare_write_connect(struct ceph_connection *con) +{ + con_out_kvec_add(con, sizeof(con->out_connect), &con->out_connect); + if (con->auth) + con_out_kvec_add(con, con->auth->authorizer_buf_len, + con->auth->authorizer_buf); + + con->out_more = 0; + con_flag_set(con, CON_FLAG_WRITE_PENDING); +} + static int prepare_write_connect(struct ceph_connection *con) { unsigned int global_seq = get_global_seq(con->msgr, 0); @@ -1481,15 +1492,7 @@ static int prepare_write_connect(struct ceph_connection *con) if (ret) return ret; - con_out_kvec_add(con, sizeof (con->out_connect), - &con->out_connect); - if (con->auth) - con_out_kvec_add(con, con->auth->authorizer_buf_len, - con->auth->authorizer_buf); - - con->out_more = 0; - con_flag_set(con, CON_FLAG_WRITE_PENDING); - + __prepare_write_connect(con); return 0; } From 0858417b5c2e7bcc0b9e52c4b76e2af3d69e138b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 26 Jul 2018 18:05:43 +0200 Subject: [PATCH 0715/2608] libceph: factor out __ceph_x_decrypt() commit c571fe24d243bfe7017f0e67fe800b3cc2a1d1f7 upstream. Will be used for decrypting the server challenge which is only preceded by ceph_x_encrypt_header. Drop struct_v check to allow for extending ceph_x_encrypt_header in the future. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- net/ceph/auth_x.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 2f4a1baf5f52..9cac05239346 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -70,25 +70,40 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf, return sizeof(u32) + ciphertext_len; } +static int __ceph_x_decrypt(struct ceph_crypto_key *secret, void *p, + int ciphertext_len) +{ + struct ceph_x_encrypt_header *hdr = p; + int plaintext_len; + int ret; + + ret = ceph_crypt(secret, false, p, ciphertext_len, ciphertext_len, + &plaintext_len); + if (ret) + return ret; + + if (le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) { + pr_err("%s bad magic\n", __func__); + return -EINVAL; + } + + return plaintext_len - sizeof(*hdr); +} + static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end) { - struct ceph_x_encrypt_header *hdr = *p + sizeof(u32); - int ciphertext_len, plaintext_len; + int ciphertext_len; int ret; ceph_decode_32_safe(p, end, ciphertext_len, e_inval); ceph_decode_need(p, end, ciphertext_len, e_inval); - ret = ceph_crypt(secret, false, *p, end - *p, ciphertext_len, - &plaintext_len); - if (ret) + ret = __ceph_x_decrypt(secret, *p, ciphertext_len); + if (ret < 0) return ret; - if (hdr->struct_v != 1 || le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) - return -EPERM; - *p += ciphertext_len; - return plaintext_len - sizeof(struct ceph_x_encrypt_header); + return ret; e_inval: return -EINVAL; From a55056e1523990e1ef9d70531ffaf27fd1ddff55 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 27 Jul 2018 16:37:54 +0200 Subject: [PATCH 0716/2608] libceph: factor out encrypt_authorizer() commit 149cac4a50b0b4081b38b2f38de6ef71c27eaa85 upstream. Will be used for encrypting both the initial and updated authorizers. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- net/ceph/auth_x.c | 49 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 9cac05239346..722791f45b2a 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -290,6 +290,38 @@ bad: return -EINVAL; } +/* + * Encode and encrypt the second part (ceph_x_authorize_b) of the + * authorizer. The first part (ceph_x_authorize_a) should already be + * encoded. + */ +static int encrypt_authorizer(struct ceph_x_authorizer *au) +{ + struct ceph_x_authorize_a *msg_a; + struct ceph_x_authorize_b *msg_b; + void *p, *end; + int ret; + + msg_a = au->buf->vec.iov_base; + WARN_ON(msg_a->ticket_blob.secret_id != cpu_to_le64(au->secret_id)); + p = (void *)(msg_a + 1) + le32_to_cpu(msg_a->ticket_blob.blob_len); + end = au->buf->vec.iov_base + au->buf->vec.iov_len; + + msg_b = p + ceph_x_encrypt_offset(); + msg_b->struct_v = 1; + msg_b->nonce = cpu_to_le64(au->nonce); + + ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b)); + if (ret < 0) + return ret; + + p += ret; + WARN_ON(p > end); + au->buf->vec.iov_len = p - au->buf->vec.iov_base; + + return 0; +} + static void ceph_x_authorizer_cleanup(struct ceph_x_authorizer *au) { ceph_crypto_key_destroy(&au->session_key); @@ -306,7 +338,6 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, int maxlen; struct ceph_x_authorize_a *msg_a; struct ceph_x_authorize_b *msg_b; - void *p, *end; int ret; int ticket_blob_len = (th->ticket_blob ? th->ticket_blob->vec.iov_len : 0); @@ -350,21 +381,13 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, dout(" th %p secret_id %lld %lld\n", th, th->secret_id, le64_to_cpu(msg_a->ticket_blob.secret_id)); - p = msg_a + 1; - p += ticket_blob_len; - end = au->buf->vec.iov_base + au->buf->vec.iov_len; - - msg_b = p + ceph_x_encrypt_offset(); - msg_b->struct_v = 1; get_random_bytes(&au->nonce, sizeof(au->nonce)); - msg_b->nonce = cpu_to_le64(au->nonce); - ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b)); - if (ret < 0) + ret = encrypt_authorizer(au); + if (ret) { + pr_err("failed to encrypt authorizer: %d", ret); goto out_au; + } - p += ret; - WARN_ON(p > end); - au->buf->vec.iov_len = p - au->buf->vec.iov_base; dout(" built authorizer nonce %llx len %d\n", au->nonce, (int)au->buf->vec.iov_len); return 0; From 3fd73c8a71f299e30359a63add1f33e3fd834831 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 27 Jul 2018 19:18:34 +0200 Subject: [PATCH 0717/2608] libceph: add authorizer challenge commit 6daca13d2e72bedaaacfc08f873114c9307d5aea upstream. When a client authenticates with a service, an authorizer is sent with a nonce to the service (ceph_x_authorize_[ab]) and the service responds with a mutation of that nonce (ceph_x_authorize_reply). This lets the client verify the service is who it says it is but it doesn't protect against a replay: someone can trivially capture the exchange and reuse the same authorizer to authenticate themselves. Allow the service to reject an initial authorizer with a random challenge (ceph_x_authorize_challenge). The client then has to respond with an updated authorizer proving they are able to decrypt the service's challenge and that the new authorizer was produced for this specific connection instance. The accepting side requires this challenge and response unconditionally if the client side advertises they have CEPHX_V2 feature bit. This addresses CVE-2018-1128. Link: http://tracker.ceph.com/issues/24836 Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/ceph/mds_client.c | 11 ++++++ include/linux/ceph/auth.h | 8 ++++ include/linux/ceph/messenger.h | 3 ++ include/linux/ceph/msgr.h | 2 +- net/ceph/auth.c | 16 ++++++++ net/ceph/auth_x.c | 72 +++++++++++++++++++++++++++++++--- net/ceph/auth_x_protocol.h | 7 ++++ net/ceph/messenger.c | 17 +++++++- net/ceph/osd_client.c | 11 ++++++ 9 files changed, 140 insertions(+), 7 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index bf378ddca4db..a48984dd6426 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4079,6 +4079,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, return auth; } +static int add_authorizer_challenge(struct ceph_connection *con, + void *challenge_buf, int challenge_buf_len) +{ + struct ceph_mds_session *s = con->private; + struct ceph_mds_client *mdsc = s->s_mdsc; + struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; + + return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer, + challenge_buf, challenge_buf_len); +} static int verify_authorizer_reply(struct ceph_connection *con) { @@ -4142,6 +4152,7 @@ static const struct ceph_connection_operations mds_con_ops = { .put = con_put, .dispatch = dispatch, .get_authorizer = get_authorizer, + .add_authorizer_challenge = add_authorizer_challenge, .verify_authorizer_reply = verify_authorizer_reply, .invalidate_authorizer = invalidate_authorizer, .peer_reset = peer_reset, diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index e931da8424a4..6728c2ee0205 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -64,6 +64,10 @@ struct ceph_auth_client_ops { /* ensure that an existing authorizer is up to date */ int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *auth); + int (*add_authorizer_challenge)(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *challenge_buf, + int challenge_buf_len); int (*verify_authorizer_reply)(struct ceph_auth_client *ac, struct ceph_authorizer *a); void (*invalidate_authorizer)(struct ceph_auth_client *ac, @@ -118,6 +122,10 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a); extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *a); +int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *challenge_buf, + int challenge_buf_len); extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a); extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 9056077c023f..18fbe910ed55 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -31,6 +31,9 @@ struct ceph_connection_operations { struct ceph_auth_handshake *(*get_authorizer) ( struct ceph_connection *con, int *proto, int force_new); + int (*add_authorizer_challenge)(struct ceph_connection *con, + void *challenge_buf, + int challenge_buf_len); int (*verify_authorizer_reply) (struct ceph_connection *con); int (*invalidate_authorizer)(struct ceph_connection *con); diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 73ae2a926548..9e50aede46c8 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -91,7 +91,7 @@ struct ceph_entity_inst { #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ #define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */ #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */ - +#define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16 /* cephx v2 doing server challenge */ /* * connection negotiation diff --git a/net/ceph/auth.c b/net/ceph/auth.c index dbde2b3c3c15..fbeee068ea14 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c @@ -315,6 +315,22 @@ int ceph_auth_update_authorizer(struct ceph_auth_client *ac, } EXPORT_SYMBOL(ceph_auth_update_authorizer); +int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *challenge_buf, + int challenge_buf_len) +{ + int ret = 0; + + mutex_lock(&ac->mutex); + if (ac->ops && ac->ops->add_authorizer_challenge) + ret = ac->ops->add_authorizer_challenge(ac, a, challenge_buf, + challenge_buf_len); + mutex_unlock(&ac->mutex); + return ret; +} +EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge); + int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a) { diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 722791f45b2a..ce28bb07d8fd 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -295,7 +295,8 @@ bad: * authorizer. The first part (ceph_x_authorize_a) should already be * encoded. */ -static int encrypt_authorizer(struct ceph_x_authorizer *au) +static int encrypt_authorizer(struct ceph_x_authorizer *au, + u64 *server_challenge) { struct ceph_x_authorize_a *msg_a; struct ceph_x_authorize_b *msg_b; @@ -308,16 +309,28 @@ static int encrypt_authorizer(struct ceph_x_authorizer *au) end = au->buf->vec.iov_base + au->buf->vec.iov_len; msg_b = p + ceph_x_encrypt_offset(); - msg_b->struct_v = 1; + msg_b->struct_v = 2; msg_b->nonce = cpu_to_le64(au->nonce); + if (server_challenge) { + msg_b->have_challenge = 1; + msg_b->server_challenge_plus_one = + cpu_to_le64(*server_challenge + 1); + } else { + msg_b->have_challenge = 0; + msg_b->server_challenge_plus_one = 0; + } ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b)); if (ret < 0) return ret; p += ret; - WARN_ON(p > end); - au->buf->vec.iov_len = p - au->buf->vec.iov_base; + if (server_challenge) { + WARN_ON(p != end); + } else { + WARN_ON(p > end); + au->buf->vec.iov_len = p - au->buf->vec.iov_base; + } return 0; } @@ -382,7 +395,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, le64_to_cpu(msg_a->ticket_blob.secret_id)); get_random_bytes(&au->nonce, sizeof(au->nonce)); - ret = encrypt_authorizer(au); + ret = encrypt_authorizer(au, NULL); if (ret) { pr_err("failed to encrypt authorizer: %d", ret); goto out_au; @@ -664,6 +677,54 @@ static int ceph_x_update_authorizer( return 0; } +static int decrypt_authorize_challenge(struct ceph_x_authorizer *au, + void *challenge_buf, + int challenge_buf_len, + u64 *server_challenge) +{ + struct ceph_x_authorize_challenge *ch = + challenge_buf + sizeof(struct ceph_x_encrypt_header); + int ret; + + /* no leading len */ + ret = __ceph_x_decrypt(&au->session_key, challenge_buf, + challenge_buf_len); + if (ret < 0) + return ret; + if (ret < sizeof(*ch)) { + pr_err("bad size %d for ceph_x_authorize_challenge\n", ret); + return -EINVAL; + } + + *server_challenge = le64_to_cpu(ch->server_challenge); + return 0; +} + +static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *challenge_buf, + int challenge_buf_len) +{ + struct ceph_x_authorizer *au = (void *)a; + u64 server_challenge; + int ret; + + ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len, + &server_challenge); + if (ret) { + pr_err("failed to decrypt authorize challenge: %d", ret); + return ret; + } + + ret = encrypt_authorizer(au, &server_challenge); + if (ret) { + pr_err("failed to encrypt authorizer w/ challenge: %d", ret); + return ret; + } + + return 0; +} + static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a) { @@ -816,6 +877,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = { .handle_reply = ceph_x_handle_reply, .create_authorizer = ceph_x_create_authorizer, .update_authorizer = ceph_x_update_authorizer, + .add_authorizer_challenge = ceph_x_add_authorizer_challenge, .verify_authorizer_reply = ceph_x_verify_authorizer_reply, .invalidate_authorizer = ceph_x_invalidate_authorizer, .reset = ceph_x_reset, diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h index 32c13d763b9a..24b0b74564d0 100644 --- a/net/ceph/auth_x_protocol.h +++ b/net/ceph/auth_x_protocol.h @@ -70,6 +70,13 @@ struct ceph_x_authorize_a { struct ceph_x_authorize_b { __u8 struct_v; __le64 nonce; + __u8 have_challenge; + __le64 server_challenge_plus_one; +} __attribute__ ((packed)); + +struct ceph_x_authorize_challenge { + __u8 struct_v; + __le64 server_challenge; } __attribute__ ((packed)); struct ceph_x_authorize_reply { diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 0b121327d32f..ad33baa2008d 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2052,9 +2052,24 @@ static int process_connect(struct ceph_connection *con) if (con->auth) { /* * Any connection that defines ->get_authorizer() - * should also define ->verify_authorizer_reply(). + * should also define ->add_authorizer_challenge() and + * ->verify_authorizer_reply(). + * * See get_connect_authorizer(). */ + if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) { + ret = con->ops->add_authorizer_challenge( + con, con->auth->authorizer_reply_buf, + le32_to_cpu(con->in_reply.authorizer_len)); + if (ret < 0) + return ret; + + con_out_kvec_reset(con); + __prepare_write_connect(con); + prepare_read_connect(con); + return 0; + } + ret = con->ops->verify_authorizer_reply(con); if (ret < 0) { con->error_msg = "bad authorize reply"; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 2814dba5902d..53ea2d48896c 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -5292,6 +5292,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, return auth; } +static int add_authorizer_challenge(struct ceph_connection *con, + void *challenge_buf, int challenge_buf_len) +{ + struct ceph_osd *o = con->private; + struct ceph_osd_client *osdc = o->o_osdc; + struct ceph_auth_client *ac = osdc->client->monc.auth; + + return ceph_auth_add_authorizer_challenge(ac, o->o_auth.authorizer, + challenge_buf, challenge_buf_len); +} static int verify_authorizer_reply(struct ceph_connection *con) { @@ -5341,6 +5351,7 @@ static const struct ceph_connection_operations osd_con_ops = { .put = put_osd_con, .dispatch = dispatch, .get_authorizer = get_authorizer, + .add_authorizer_challenge = add_authorizer_challenge, .verify_authorizer_reply = verify_authorizer_reply, .invalidate_authorizer = invalidate_authorizer, .alloc_msg = alloc_msg, From b16d0c5d32468a0624505a7b6b211e20488295e9 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 27 Jul 2018 19:25:32 +0200 Subject: [PATCH 0718/2608] libceph: implement CEPHX_V2 calculation mode commit cc255c76c70f7a87d97939621eae04b600d9f4a1 upstream. Derive the signature from the entire buffer (both AES cipher blocks) instead of using just the first half of the first block, leaving out data_crc entirely. This addresses CVE-2018-1129. Link: http://tracker.ceph.com/issues/24837 Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- include/linux/ceph/ceph_features.h | 7 +-- net/ceph/auth_x.c | 73 +++++++++++++++++++++++------- 2 files changed, 60 insertions(+), 20 deletions(-) diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 59042d5ac520..70f42eef813b 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -165,9 +165,9 @@ DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap DEFINE_CEPH_FEATURE(59, 1, FS_BTIME) DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap -DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING) // *do not share this bit* +DEFINE_CEPH_FEATURE(60, 1, OSD_RECOVERY_DELETES) // *do not share this bit* +DEFINE_CEPH_FEATURE(61, 1, CEPHX_V2) // *do not share this bit* -DEFINE_CEPH_FEATURE(61, 1, RESERVED2) // unused, but slow down! DEFINE_CEPH_FEATURE(62, 1, RESERVED) // do not use; used as a sentinal DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing @@ -209,7 +209,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_SERVER_JEWEL | \ CEPH_FEATURE_MON_STATEFUL_SUB | \ CEPH_FEATURE_CRUSH_TUNABLES5 | \ - CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING) + CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \ + CEPH_FEATURE_CEPHX_V2) #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index ce28bb07d8fd..10eb759bbcb4 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -803,26 +804,64 @@ static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg, __le64 *psig) { void *enc_buf = au->enc_buf; - struct { - __le32 len; - __le32 header_crc; - __le32 front_crc; - __le32 middle_crc; - __le32 data_crc; - } __packed *sigblock = enc_buf + ceph_x_encrypt_offset(); int ret; - sigblock->len = cpu_to_le32(4*sizeof(u32)); - sigblock->header_crc = msg->hdr.crc; - sigblock->front_crc = msg->footer.front_crc; - sigblock->middle_crc = msg->footer.middle_crc; - sigblock->data_crc = msg->footer.data_crc; - ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN, - sizeof(*sigblock)); - if (ret < 0) - return ret; + if (!CEPH_HAVE_FEATURE(msg->con->peer_features, CEPHX_V2)) { + struct { + __le32 len; + __le32 header_crc; + __le32 front_crc; + __le32 middle_crc; + __le32 data_crc; + } __packed *sigblock = enc_buf + ceph_x_encrypt_offset(); + + sigblock->len = cpu_to_le32(4*sizeof(u32)); + sigblock->header_crc = msg->hdr.crc; + sigblock->front_crc = msg->footer.front_crc; + sigblock->middle_crc = msg->footer.middle_crc; + sigblock->data_crc = msg->footer.data_crc; + + ret = ceph_x_encrypt(&au->session_key, enc_buf, + CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock)); + if (ret < 0) + return ret; + + *psig = *(__le64 *)(enc_buf + sizeof(u32)); + } else { + struct { + __le32 header_crc; + __le32 front_crc; + __le32 front_len; + __le32 middle_crc; + __le32 middle_len; + __le32 data_crc; + __le32 data_len; + __le32 seq_lower_word; + } __packed *sigblock = enc_buf; + struct { + __le64 a, b, c, d; + } __packed *penc = enc_buf; + int ciphertext_len; + + sigblock->header_crc = msg->hdr.crc; + sigblock->front_crc = msg->footer.front_crc; + sigblock->front_len = msg->hdr.front_len; + sigblock->middle_crc = msg->footer.middle_crc; + sigblock->middle_len = msg->hdr.middle_len; + sigblock->data_crc = msg->footer.data_crc; + sigblock->data_len = msg->hdr.data_len; + sigblock->seq_lower_word = *(__le32 *)&msg->hdr.seq; + + /* no leading len, no ceph_x_encrypt_header */ + ret = ceph_crypt(&au->session_key, true, enc_buf, + CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock), + &ciphertext_len); + if (ret) + return ret; + + *psig = penc->a ^ penc->b ^ penc->c ^ penc->d; + } - *psig = *(__le64 *)(enc_buf + sizeof(u32)); return 0; } From 83b570c004da47b51d7417ac18d8491d9fc91420 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 May 2018 09:27:05 -0700 Subject: [PATCH 0719/2608] bpf: Prevent memory disambiguation attack commit af86ca4e3088fe5eacf2f7e58c01fa68ca067672 upstream. Detect code patterns where malicious 'speculative store bypass' can be used and sanitize such patterns. 39: (bf) r3 = r10 40: (07) r3 += -216 41: (79) r8 = *(u64 *)(r7 +0) // slow read 42: (7a) *(u64 *)(r10 -72) = 0 // verifier inserts this instruction 43: (7b) *(u64 *)(r8 +0) = r3 // this store becomes slow due to r8 44: (79) r1 = *(u64 *)(r6 +0) // cpu speculatively executes this load 45: (71) r2 = *(u8 *)(r1 +0) // speculatively arbitrary 'load byte' // is now sanitized Above code after x86 JIT becomes: e5: mov %rbp,%rdx e8: add $0xffffffffffffff28,%rdx ef: mov 0x0(%r13),%r14 f3: movq $0x0,-0x48(%rbp) fb: mov %rdx,0x0(%r14) ff: mov 0x0(%rbx),%rdi 103: movzbq 0x0(%rdi),%rsi Signed-off-by: Alexei Starovoitov Signed-off-by: Thomas Gleixner [bwh: Backported to 4.14: - Add bpf_verifier_env parameter to check_stack_write() - Look up stack slot_types with state->stack_slot_type[] rather than state->stack[].slot_type[] - Drop bpf_verifier_env argument to verbose() - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 62 +++++++++++++++++++++++++++++++++--- 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index a3333004fd2b..8458cc5fbce5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -113,6 +113,7 @@ struct bpf_insn_aux_data { struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ }; int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ + int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 013b0cd1958e..f6755fd5bae2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -717,8 +717,9 @@ static bool is_spillable_regtype(enum bpf_reg_type type) /* check_stack_read/write functions track spill/fill of registers, * stack boundary and alignment are checked in check_mem_access() */ -static int check_stack_write(struct bpf_verifier_state *state, int off, - int size, int value_regno) +static int check_stack_write(struct bpf_verifier_env *env, + struct bpf_verifier_state *state, int off, + int size, int value_regno, int insn_idx) { int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, @@ -738,8 +739,32 @@ static int check_stack_write(struct bpf_verifier_state *state, int off, state->spilled_regs[spi] = state->regs[value_regno]; state->spilled_regs[spi].live |= REG_LIVE_WRITTEN; - for (i = 0; i < BPF_REG_SIZE; i++) + for (i = 0; i < BPF_REG_SIZE; i++) { + if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC && + !env->allow_ptr_leaks) { + int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; + int soff = (-spi - 1) * BPF_REG_SIZE; + + /* detected reuse of integer stack slot with a pointer + * which means either llvm is reusing stack slot or + * an attacker is trying to exploit CVE-2018-3639 + * (speculative store bypass) + * Have to sanitize that slot with preemptive + * store of zero. + */ + if (*poff && *poff != soff) { + /* disallow programs where single insn stores + * into two different stack slots, since verifier + * cannot sanitize them + */ + verbose("insn %d cannot access two stack slots fp%d and fp%d", + insn_idx, *poff, soff); + return -EINVAL; + } + *poff = soff; + } state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL; + } } else { /* regular write of data into stack */ state->spilled_regs[spi] = (struct bpf_reg_state) {}; @@ -1216,7 +1241,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn verbose("attempt to corrupt spilled pointer on stack\n"); return -EACCES; } - err = check_stack_write(state, off, size, value_regno); + err = check_stack_write(env, state, off, size, + value_regno, insn_idx); } else { err = check_stack_read(state, off, size, value_regno); } @@ -4270,6 +4296,34 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) else continue; + if (type == BPF_WRITE && + env->insn_aux_data[i + delta].sanitize_stack_off) { + struct bpf_insn patch[] = { + /* Sanitize suspicious stack slot with zero. + * There are no memory dependencies for this store, + * since it's only using frame pointer and immediate + * constant of zero + */ + BPF_ST_MEM(BPF_DW, BPF_REG_FP, + env->insn_aux_data[i + delta].sanitize_stack_off, + 0), + /* the original STX instruction will immediately + * overwrite the same stack slot with appropriate value + */ + *insn, + }; + + cnt = ARRAY_SIZE(patch); + new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) continue; From 25f03991a5210f31491e1e2fccec7cf0d080772e Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:45 +0200 Subject: [PATCH 0720/2608] tls: Add function to update the TLS socket configuration commit 6d88207fcfddc002afe3e2e4a455e5201089d5d9 upstream. The tx configuration is now stored in ctx->tx_conf. And sk->sk_prot is updated trough a function This will simplify things when we add rx and support for different possible tx and rx cross configurations. Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- include/net/tls.h | 2 ++ net/tls/tls_main.c | 46 ++++++++++++++++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 86ed3dd80fe7..0c3ab2af74d3 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -89,6 +89,8 @@ struct tls_context { void *priv_ctx; + u8 tx_conf:2; + u16 prepend_size; u16 tag_size; u16 overhead_size; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 4f2971f528db..191a8adee3ea 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -46,8 +46,18 @@ MODULE_DESCRIPTION("Transport Layer Security Support"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_TCP_ULP("tls"); -static struct proto tls_base_prot; -static struct proto tls_sw_prot; +enum { + TLS_BASE_TX, + TLS_SW_TX, + TLS_NUM_CONFIG, +}; + +static struct proto tls_prots[TLS_NUM_CONFIG]; + +static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx) +{ + sk->sk_prot = &tls_prots[ctx->tx_conf]; +} int wait_on_pending_writer(struct sock *sk, long *timeo) { @@ -364,8 +374,8 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, { struct tls_crypto_info *crypto_info, tmp_crypto_info; struct tls_context *ctx = tls_get_ctx(sk); - struct proto *prot = NULL; int rc = 0; + int tx_conf; if (!optval || (optlen < sizeof(*crypto_info))) { rc = -EINVAL; @@ -422,11 +432,12 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, /* currently SW is default, we will have ethtool in future */ rc = tls_set_sw_offload(sk, ctx); - prot = &tls_sw_prot; + tx_conf = TLS_SW_TX; if (rc) goto err_crypto_info; - sk->sk_prot = prot; + ctx->tx_conf = tx_conf; + update_sk_prot(sk, ctx); goto out; err_crypto_info: @@ -488,7 +499,9 @@ static int tls_init(struct sock *sk) icsk->icsk_ulp_data = ctx; ctx->setsockopt = sk->sk_prot->setsockopt; ctx->getsockopt = sk->sk_prot->getsockopt; - sk->sk_prot = &tls_base_prot; + + ctx->tx_conf = TLS_BASE_TX; + update_sk_prot(sk, ctx); out: return rc; } @@ -499,16 +512,21 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .init = tls_init, }; +static void build_protos(struct proto *prot, struct proto *base) +{ + prot[TLS_BASE_TX] = *base; + prot[TLS_BASE_TX].setsockopt = tls_setsockopt; + prot[TLS_BASE_TX].getsockopt = tls_getsockopt; + + prot[TLS_SW_TX] = prot[TLS_BASE_TX]; + prot[TLS_SW_TX].close = tls_sk_proto_close; + prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg; + prot[TLS_SW_TX].sendpage = tls_sw_sendpage; +} + static int __init tls_register(void) { - tls_base_prot = tcp_prot; - tls_base_prot.setsockopt = tls_setsockopt; - tls_base_prot.getsockopt = tls_getsockopt; - - tls_sw_prot = tls_base_prot; - tls_sw_prot.sendmsg = tls_sw_sendmsg; - tls_sw_prot.sendpage = tls_sw_sendpage; - tls_sw_prot.close = tls_sk_proto_close; + build_protos(tls_prots, &tcp_prot); tcp_register_ulp(&tcp_tls_ulp_ops); From 797b8bb47fb27ee49a3b59ad110e5264585415aa Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:46 +0200 Subject: [PATCH 0721/2608] tls: Fix TLS ulp context leak, when TLS_TX setsockopt is not used. commit ff45d820a2df163957ad8ab459b6eb6976144c18 upstream. Previously the TLS ulp context would leak if we attached a TLS ulp to a socket but did not use the TLS_TX setsockopt, or did use it but it failed. This patch solves the issue by overriding prot[TLS_BASE_TX].close and fixing tls_sk_proto_close to work properly when its called with ctx->tx_conf == TLS_BASE_TX. This patch also removes ctx->free_resources as we can use ctx->tx_conf to obtain the relevant information. Fixes: 3c4d7559159b ('tls: kernel TLS support') Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller [bwh: Backported to 4.14: Keep using tls_ctx_free() as introduced by the earlier backport of "tls: zero the crypto information from tls_context before freeing"] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- include/net/tls.h | 2 +- net/tls/tls_main.c | 24 ++++++++++++++++-------- net/tls/tls_sw.c | 3 +-- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 0c3ab2af74d3..604fd982da19 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -106,7 +106,6 @@ struct tls_context { u16 pending_open_record_frags; int (*push_pending_record)(struct sock *sk, int flags); - void (*free_resources)(struct sock *sk); void (*sk_write_space)(struct sock *sk); void (*sk_proto_close)(struct sock *sk, long timeout); @@ -131,6 +130,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_sw_close(struct sock *sk, long timeout); +void tls_sw_free_tx_resources(struct sock *sk); void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); void tls_icsk_clean_acked(struct sock *sk); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 191a8adee3ea..b5f9c578bcf0 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -249,6 +249,12 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) void (*sk_proto_close)(struct sock *sk, long timeout); lock_sock(sk); + sk_proto_close = ctx->sk_proto_close; + + if (ctx->tx_conf == TLS_BASE_TX) { + tls_ctx_free(ctx); + goto skip_tx_cleanup; + } if (!tls_complete_pending_work(sk, ctx, 0, &timeo)) tls_handle_open_record(sk, 0); @@ -265,13 +271,16 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) sg++; } } - ctx->free_resources(sk); + kfree(ctx->rec_seq); kfree(ctx->iv); - sk_proto_close = ctx->sk_proto_close; - tls_ctx_free(ctx); + if (ctx->tx_conf == TLS_SW_TX) { + tls_sw_free_tx_resources(sk); + tls_ctx_free(ctx); + } +skip_tx_cleanup: release_sock(sk); sk_proto_close(sk, timeout); } @@ -428,8 +437,6 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, ctx->sk_write_space = sk->sk_write_space; sk->sk_write_space = tls_write_space; - ctx->sk_proto_close = sk->sk_prot->close; - /* currently SW is default, we will have ethtool in future */ rc = tls_set_sw_offload(sk, ctx); tx_conf = TLS_SW_TX; @@ -499,6 +506,7 @@ static int tls_init(struct sock *sk) icsk->icsk_ulp_data = ctx; ctx->setsockopt = sk->sk_prot->setsockopt; ctx->getsockopt = sk->sk_prot->getsockopt; + ctx->sk_proto_close = sk->sk_prot->close; ctx->tx_conf = TLS_BASE_TX; update_sk_prot(sk, ctx); @@ -515,11 +523,11 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { static void build_protos(struct proto *prot, struct proto *base) { prot[TLS_BASE_TX] = *base; - prot[TLS_BASE_TX].setsockopt = tls_setsockopt; - prot[TLS_BASE_TX].getsockopt = tls_getsockopt; + prot[TLS_BASE_TX].setsockopt = tls_setsockopt; + prot[TLS_BASE_TX].getsockopt = tls_getsockopt; + prot[TLS_BASE_TX].close = tls_sk_proto_close; prot[TLS_SW_TX] = prot[TLS_BASE_TX]; - prot[TLS_SW_TX].close = tls_sk_proto_close; prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg; prot[TLS_SW_TX].sendpage = tls_sw_sendpage; } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 6ae9ca567d6c..5996fb5756e1 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -646,7 +646,7 @@ sendpage_end: return ret; } -static void tls_sw_free_resources(struct sock *sk) +void tls_sw_free_tx_resources(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); @@ -685,7 +685,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) } ctx->priv_ctx = (struct tls_offload_context *)sw_ctx; - ctx->free_resources = tls_sw_free_resources; crypto_info = &ctx->crypto_send.info; switch (crypto_info->cipher_type) { From 93f16446c8ddacbf26ae8408ecd9c9c397b8d5b4 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:48 +0200 Subject: [PATCH 0722/2608] tls: Avoid copying crypto_info again after cipher_type check. commit 196c31b4b54474b31dee3c30352c45c2a93e9226 upstream. Avoid copying crypto_info again after cipher_type check to avoid a TOCTOU exploits. The temporary array on the stack is removed as we don't really need it Fixes: 3c4d7559159b ('tls: kernel TLS support') Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller [bwh: Backported to 4.14: Preserve changes made by earlier backports of "tls: return -EBUSY if crypto_info is already set" and "tls: zero the crypto information from tls_context before freeing"] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- net/tls/tls_main.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index b5f9c578bcf0..f88df514ad5f 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -381,7 +381,7 @@ static int tls_getsockopt(struct sock *sk, int level, int optname, static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, unsigned int optlen) { - struct tls_crypto_info *crypto_info, tmp_crypto_info; + struct tls_crypto_info *crypto_info; struct tls_context *ctx = tls_get_ctx(sk); int rc = 0; int tx_conf; @@ -391,38 +391,33 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, goto out; } - rc = copy_from_user(&tmp_crypto_info, optval, sizeof(*crypto_info)); - if (rc) { - rc = -EFAULT; - goto out; - } - - /* check version */ - if (tmp_crypto_info.version != TLS_1_2_VERSION) { - rc = -ENOTSUPP; - goto out; - } - - /* get user crypto info */ crypto_info = &ctx->crypto_send.info; - /* Currently we don't support set crypto info more than one time */ if (TLS_CRYPTO_INFO_READY(crypto_info)) { rc = -EBUSY; goto out; } - switch (tmp_crypto_info.cipher_type) { + rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info)); + if (rc) { + rc = -EFAULT; + goto out; + } + + /* check version */ + if (crypto_info->version != TLS_1_2_VERSION) { + rc = -ENOTSUPP; + goto err_crypto_info; + } + + switch (crypto_info->cipher_type) { case TLS_CIPHER_AES_GCM_128: { if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) { rc = -EINVAL; goto err_crypto_info; } - rc = copy_from_user( - crypto_info, - optval, - sizeof(struct tls12_crypto_info_aes_gcm_128)); - + rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info), + optlen - sizeof(*crypto_info)); if (rc) { rc = -EFAULT; goto err_crypto_info; From 2b8b2e76222f1241cec381eb0ec599ee33e9cd00 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:49 +0200 Subject: [PATCH 0723/2608] tls: don't override sk_write_space if tls_set_sw_offload fails. commit ee181e5201e640a4b92b217e9eab2531dab57d2c upstream. If we fail to enable tls in the kernel we shouldn't override the sk_write_space callback Fixes: 3c4d7559159b ('tls: kernel TLS support') Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- net/tls/tls_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index f88df514ad5f..33187e34599b 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -429,9 +429,6 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, goto err_crypto_info; } - ctx->sk_write_space = sk->sk_write_space; - sk->sk_write_space = tls_write_space; - /* currently SW is default, we will have ethtool in future */ rc = tls_set_sw_offload(sk, ctx); tx_conf = TLS_SW_TX; @@ -440,6 +437,8 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, ctx->tx_conf = tx_conf; update_sk_prot(sk, ctx); + ctx->sk_write_space = sk->sk_write_space; + sk->sk_write_space = tls_write_space; goto out; err_crypto_info: From 2a0f5919e1e6a1c0423d895ab75eb15f94a67c69 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Tue, 27 Feb 2018 14:18:39 +0200 Subject: [PATCH 0724/2608] tls: Use correct sk->sk_prot for IPV6 commit c113187d38ff85dc302a1bb55864b203ebb2ba10 upstream. The tls ulp overrides sk->prot with a new tls specific proto structs. The tls specific structs were previously based on the ipv4 specific tcp_prot sturct. As a result, attaching the tls ulp to an ipv6 tcp socket replaced some ipv6 callback with the ipv4 equivalents. This patch adds ipv6 tls proto structs and uses them when attached to ipv6 sockets. Fixes: 3c4d7559159b ('tls: kernel TLS support') Signed-off-by: Boris Pismenny Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- net/tls/tls_main.c | 52 +++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 33187e34599b..e903bdd39b9f 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -46,17 +46,27 @@ MODULE_DESCRIPTION("Transport Layer Security Support"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_TCP_ULP("tls"); +enum { + TLSV4, + TLSV6, + TLS_NUM_PROTS, +}; + enum { TLS_BASE_TX, TLS_SW_TX, TLS_NUM_CONFIG, }; -static struct proto tls_prots[TLS_NUM_CONFIG]; +static struct proto *saved_tcpv6_prot; +static DEFINE_MUTEX(tcpv6_prot_mutex); +static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG]; static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx) { - sk->sk_prot = &tls_prots[ctx->tx_conf]; + int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; + + sk->sk_prot = &tls_prots[ip_ver][ctx->tx_conf]; } int wait_on_pending_writer(struct sock *sk, long *timeo) @@ -476,8 +486,21 @@ static int tls_setsockopt(struct sock *sk, int level, int optname, return do_tls_setsockopt(sk, optname, optval, optlen); } +static void build_protos(struct proto *prot, struct proto *base) +{ + prot[TLS_BASE_TX] = *base; + prot[TLS_BASE_TX].setsockopt = tls_setsockopt; + prot[TLS_BASE_TX].getsockopt = tls_getsockopt; + prot[TLS_BASE_TX].close = tls_sk_proto_close; + + prot[TLS_SW_TX] = prot[TLS_BASE_TX]; + prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg; + prot[TLS_SW_TX].sendpage = tls_sw_sendpage; +} + static int tls_init(struct sock *sk) { + int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; struct inet_connection_sock *icsk = inet_csk(sk); struct tls_context *ctx; int rc = 0; @@ -502,6 +525,17 @@ static int tls_init(struct sock *sk) ctx->getsockopt = sk->sk_prot->getsockopt; ctx->sk_proto_close = sk->sk_prot->close; + /* Build IPv6 TLS whenever the address of tcpv6_prot changes */ + if (ip_ver == TLSV6 && + unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) { + mutex_lock(&tcpv6_prot_mutex); + if (likely(sk->sk_prot != saved_tcpv6_prot)) { + build_protos(tls_prots[TLSV6], sk->sk_prot); + smp_store_release(&saved_tcpv6_prot, sk->sk_prot); + } + mutex_unlock(&tcpv6_prot_mutex); + } + ctx->tx_conf = TLS_BASE_TX; update_sk_prot(sk, ctx); out: @@ -514,21 +548,9 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .init = tls_init, }; -static void build_protos(struct proto *prot, struct proto *base) -{ - prot[TLS_BASE_TX] = *base; - prot[TLS_BASE_TX].setsockopt = tls_setsockopt; - prot[TLS_BASE_TX].getsockopt = tls_getsockopt; - prot[TLS_BASE_TX].close = tls_sk_proto_close; - - prot[TLS_SW_TX] = prot[TLS_BASE_TX]; - prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg; - prot[TLS_SW_TX].sendpage = tls_sw_sendpage; -} - static int __init tls_register(void) { - build_protos(tls_prots, &tcp_prot); + build_protos(tls_prots[TLSV4], &tcp_prot); tcp_register_ulp(&tcp_tls_ulp_ops); From 39d9e1c62e3f8b5569a0d9bf4d47a3d5d8ae02e2 Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Mon, 10 Sep 2018 22:53:46 +0530 Subject: [PATCH 0725/2608] net/tls: Fixed return value when tls_complete_pending_work() fails commit 150085791afb8054e11d2e080d4b9cd755dd7f69 upstream. In tls_sw_sendmsg() and tls_sw_sendpage(), the variable 'ret' has been set to return value of tls_complete_pending_work(). This allows return of proper error code if tls_complete_pending_work() fails. Fixes: 3c4d7559159b ("tls: kernel TLS support") Signed-off-by: Vakul Garg Signed-off-by: David S. Miller [bwh: Backported to 4.14: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 5996fb5756e1..d18d4a478e4f 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -388,7 +388,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); - int ret = 0; + int ret; int required_size; long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); bool eor = !(msg->msg_flags & MSG_MORE); @@ -403,7 +403,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) lock_sock(sk); - if (tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo)) + ret = tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo); + if (ret) goto send_end; if (unlikely(msg->msg_controllen)) { @@ -539,7 +540,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page, { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); - int ret = 0; + int ret; long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); bool eor; size_t orig_size = size; @@ -559,7 +560,8 @@ int tls_sw_sendpage(struct sock *sk, struct page *page, sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - if (tls_complete_pending_work(sk, tls_ctx, flags, &timeo)) + ret = tls_complete_pending_work(sk, tls_ctx, flags, &timeo); + if (ret) goto sendpage_end; /* Call the sk_stream functions to manage the sndbuf mem. */ From 107b02c81a8761f1f7efc1e8b54d435324ccd13e Mon Sep 17 00:00:00 2001 From: Lior David Date: Tue, 14 Nov 2017 15:25:39 +0200 Subject: [PATCH 0726/2608] wil6210: missing length check in wmi_set_ie commit b5a8ffcae4103a9d823ea3aa3a761f65779fbe2a upstream. Add a length check in wmi_set_ie to detect unsigned integer overflow. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/wil6210/wmi.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index ffdd2fa401b1..d63d7c326801 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -1380,8 +1380,14 @@ int wmi_set_ie(struct wil6210_priv *wil, u8 type, u16 ie_len, const void *ie) }; int rc; u16 len = sizeof(struct wmi_set_appie_cmd) + ie_len; - struct wmi_set_appie_cmd *cmd = kzalloc(len, GFP_KERNEL); + struct wmi_set_appie_cmd *cmd; + if (len < ie_len) { + rc = -EINVAL; + goto out; + } + + cmd = kzalloc(len, GFP_KERNEL); if (!cmd) { rc = -ENOMEM; goto out; From f7eef132ccc95c9af50b647c5da0511d2b8492f8 Mon Sep 17 00:00:00 2001 From: Gu Jinxiang Date: Wed, 4 Jul 2018 18:16:39 +0800 Subject: [PATCH 0727/2608] btrfs: validate type when reading a chunk commit 315409b0098fb2651d86553f0436b70502b29bb2 upstream. Reported in https://bugzilla.kernel.org/show_bug.cgi?id=199839, with an image that has an invalid chunk type but does not return an error. Add chunk type check in btrfs_check_chunk_valid, to detect the wrong type combinations. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199839 Reported-by: Xu Wen Reviewed-by: Qu Wenruo Signed-off-by: Gu Jinxiang Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/volumes.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a0947f4a3e87..cfd5728e7519 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6353,6 +6353,8 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, u16 num_stripes; u16 sub_stripes; u64 type; + u64 features; + bool mixed = false; length = btrfs_chunk_length(leaf, chunk); stripe_len = btrfs_chunk_stripe_len(leaf, chunk); @@ -6391,6 +6393,32 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, btrfs_chunk_type(leaf, chunk)); return -EIO; } + + if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { + btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type); + return -EIO; + } + + if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && + (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) { + btrfs_err(fs_info, + "system chunk with data or metadata type: 0x%llx", type); + return -EIO; + } + + features = btrfs_super_incompat_flags(fs_info->super_copy); + if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) + mixed = true; + + if (!mixed) { + if ((type & BTRFS_BLOCK_GROUP_METADATA) && + (type & BTRFS_BLOCK_GROUP_DATA)) { + btrfs_err(fs_info, + "mixed chunk type in non-mixed mode: 0x%llx", type); + return -EIO; + } + } + if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) || (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || From 895586ecb7a4528336d41f81d0ce3985e8abbed6 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 1 Aug 2018 10:37:17 +0800 Subject: [PATCH 0728/2608] btrfs: Verify that every chunk has corresponding block group at mount time commit 7ef49515fa6727cb4b6f2f5b0ffbc5fc20a9f8c6 upstream. If a crafted image has missing block group items, it could cause unexpected behavior and breaks the assumption of 1:1 chunk<->block group mapping. Although we have the block group -> chunk mapping check, we still need chunk -> block group mapping check. This patch will do extra check to ensure each chunk has its corresponding block group. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199847 Reported-by: Xu Wen Signed-off-by: Qu Wenruo Reviewed-by: Gu Jinxiang Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/extent-tree.c | 58 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2cb3569ac548..fdc42eddccc2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10092,6 +10092,62 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info, return cache; } + +/* + * Iterate all chunks and verify that each of them has the corresponding block + * group + */ +static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) +{ + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct extent_map *em; + struct btrfs_block_group_cache *bg; + u64 start = 0; + int ret = 0; + + while (1) { + read_lock(&map_tree->map_tree.lock); + /* + * lookup_extent_mapping will return the first extent map + * intersecting the range, so setting @len to 1 is enough to + * get the first chunk. + */ + em = lookup_extent_mapping(&map_tree->map_tree, start, 1); + read_unlock(&map_tree->map_tree.lock); + if (!em) + break; + + bg = btrfs_lookup_block_group(fs_info, em->start); + if (!bg) { + btrfs_err(fs_info, + "chunk start=%llu len=%llu doesn't have corresponding block group", + em->start, em->len); + ret = -EUCLEAN; + free_extent_map(em); + break; + } + if (bg->key.objectid != em->start || + bg->key.offset != em->len || + (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != + (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { + btrfs_err(fs_info, +"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx", + em->start, em->len, + em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK, + bg->key.objectid, bg->key.offset, + bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK); + ret = -EUCLEAN; + free_extent_map(em); + btrfs_put_block_group(bg); + break; + } + start = em->start + em->len; + free_extent_map(em); + btrfs_put_block_group(bg); + } + return ret; +} + int btrfs_read_block_groups(struct btrfs_fs_info *info) { struct btrfs_path *path; @@ -10264,7 +10320,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) } init_global_block_rsv(info); - ret = 0; + ret = check_chunk_block_group_mappings(info); error: btrfs_free_path(path); return ret; From a5cc85fe139c181e53cc557230c5ea4001a90c80 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 23 Aug 2017 16:57:56 +0900 Subject: [PATCH 0729/2608] btrfs: Refactor check_leaf function for later expansion commit c3267bbaa9cae09b62960eafe33ad19196803285 upstream. Current check_leaf() function does a good job checking key order and item offset/size. However it only checks from slot 0 to the last but one slot, this is good but makes later expansion hard. So this refactoring iterates from slot 0 to the last slot. For key comparison, it uses a key with all 0 as initial key, so all valid keys should be larger than that. And for item size/offset checks, it compares current item end with previous item offset. For slot 0, use leaf end as a special case. This makes later item/key offset checks and item size checks easier to be implemented. Also, makes check_leaf() to return -EUCLEAN other than -EIO to indicate error. Signed-off-by: Qu Wenruo Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 50 +++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0e67cee73c53..4a1e63df1183 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -554,8 +554,9 @@ static noinline int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) { struct btrfs_fs_info *fs_info = root->fs_info; + /* No valid key type is 0, so all key should be larger than this key */ + struct btrfs_key prev_key = {0, 0, 0}; struct btrfs_key key; - struct btrfs_key leaf_key; u32 nritems = btrfs_header_nritems(leaf); int slot; @@ -588,7 +589,7 @@ static noinline int check_leaf(struct btrfs_root *root, CORRUPT("non-root leaf's nritems is 0", leaf, check_root, 0); free_extent_buffer(eb); - return -EIO; + return -EUCLEAN; } free_extent_buffer(eb); } @@ -598,28 +599,23 @@ static noinline int check_leaf(struct btrfs_root *root, if (nritems == 0) return 0; - /* Check the 0 item */ - if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != - BTRFS_LEAF_DATA_SIZE(fs_info)) { - CORRUPT("invalid item offset size pair", leaf, root, 0); - return -EIO; - } - /* - * Check to make sure each items keys are in the correct order and their - * offsets make sense. We only have to loop through nritems-1 because - * we check the current slot against the next slot, which verifies the - * next slot's offset+size makes sense and that the current's slot - * offset is correct. + * Check the following things to make sure this is a good leaf, and + * leaf users won't need to bother with similar sanity checks: + * + * 1) key order + * 2) item offset and size + * No overlap, no hole, all inside the leaf. */ - for (slot = 0; slot < nritems - 1; slot++) { - btrfs_item_key_to_cpu(leaf, &leaf_key, slot); - btrfs_item_key_to_cpu(leaf, &key, slot + 1); + for (slot = 0; slot < nritems; slot++) { + u32 item_end_expected; + + btrfs_item_key_to_cpu(leaf, &key, slot); /* Make sure the keys are in the right order */ - if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) { + if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) { CORRUPT("bad key order", leaf, root, slot); - return -EIO; + return -EUCLEAN; } /* @@ -627,10 +623,14 @@ static noinline int check_leaf(struct btrfs_root *root, * item data starts at the end of the leaf and grows towards the * front. */ - if (btrfs_item_offset_nr(leaf, slot) != - btrfs_item_end_nr(leaf, slot + 1)) { + if (slot == 0) + item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info); + else + item_end_expected = btrfs_item_offset_nr(leaf, + slot - 1); + if (btrfs_item_end_nr(leaf, slot) != item_end_expected) { CORRUPT("slot offset bad", leaf, root, slot); - return -EIO; + return -EUCLEAN; } /* @@ -641,8 +641,12 @@ static noinline int check_leaf(struct btrfs_root *root, if (btrfs_item_end_nr(leaf, slot) > BTRFS_LEAF_DATA_SIZE(fs_info)) { CORRUPT("slot end outside of leaf", leaf, root, slot); - return -EIO; + return -EUCLEAN; } + + prev_key.objectid = key.objectid; + prev_key.type = key.type; + prev_key.offset = key.offset; } return 0; From ac6ea50bb6306ed056222a5967b2bad3c3a9e2dd Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 23 Aug 2017 16:57:57 +0900 Subject: [PATCH 0730/2608] btrfs: Check if item pointer overlaps with the item itself commit 7f43d4affb2a254d421ab20b0cf65ac2569909fb upstream. Function check_leaf() checks if any item pointer points outside of the leaf, but it doesn't check if the pointer overlaps with the item itself. Normally only the last item may be the victim, but adding such check is never a bad idea anyway. Signed-off-by: Qu Wenruo Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4a1e63df1183..da7b2039e4cb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -644,6 +644,13 @@ static noinline int check_leaf(struct btrfs_root *root, return -EUCLEAN; } + /* Also check if the item pointer overlaps with btrfs item. */ + if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) > + btrfs_item_ptr_offset(leaf, slot)) { + CORRUPT("slot overlap with its data", leaf, root, slot); + return -EUCLEAN; + } + prev_key.objectid = key.objectid; prev_key.type = key.type; prev_key.offset = key.offset; From fa5d29e6d7cbfac45e3b7bdb7bb3d48f124bbe55 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 23 Aug 2017 16:57:58 +0900 Subject: [PATCH 0731/2608] btrfs: Add sanity check for EXTENT_DATA when reading out leaf commit 40c3c40947324d9f40bf47830c92c59a9bbadf4a upstream. Add extra checks for item with EXTENT_DATA type. This checks the following thing: 0) Key offset All key offsets must be aligned to sectorsize. Inline extent must have 0 for key offset. 1) Item size Uncompressed inline file extent size must match item size. (Compressed inline file extent has no information about its on-disk size.) Regular/preallocated file extent size must be a fixed value. 2) Every member of regular file extent item Including alignment for bytenr and offset, possible value for compression/encryption/type. 3) Type/compression/encode must be one of the valid values. This should be the most comprehensive and strict check in the context of btrfs_item for EXTENT_DATA. Signed-off-by: Qu Wenruo Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba [ switch to BTRFS_FILE_EXTENT_TYPES, similar to what BTRFS_COMPRESS_TYPES does ] Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 103 ++++++++++++++++++++++++++++++++ include/uapi/linux/btrfs_tree.h | 1 + 2 files changed, 104 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index da7b2039e4cb..ab8925b2efd1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -550,6 +550,100 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, btrfs_header_level(eb) == 0 ? "leaf" : "node", \ reason, btrfs_header_bytenr(eb), root->objectid, slot) +static int check_extent_data_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_file_extent_item *fi; + u32 sectorsize = root->fs_info->sectorsize; + u32 item_size = btrfs_item_size_nr(leaf, slot); + + if (!IS_ALIGNED(key->offset, sectorsize)) { + CORRUPT("unaligned key offset for file extent", + leaf, root, slot); + return -EUCLEAN; + } + + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) { + CORRUPT("invalid file extent type", leaf, root, slot); + return -EUCLEAN; + } + + /* + * Support for new compression/encrption must introduce incompat flag, + * and must be caught in open_ctree(). + */ + if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) { + CORRUPT("invalid file extent compression", leaf, root, slot); + return -EUCLEAN; + } + if (btrfs_file_extent_encryption(leaf, fi)) { + CORRUPT("invalid file extent encryption", leaf, root, slot); + return -EUCLEAN; + } + if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { + /* Inline extent must have 0 as key offset */ + if (key->offset) { + CORRUPT("inline extent has non-zero key offset", + leaf, root, slot); + return -EUCLEAN; + } + + /* Compressed inline extent has no on-disk size, skip it */ + if (btrfs_file_extent_compression(leaf, fi) != + BTRFS_COMPRESS_NONE) + return 0; + + /* Uncompressed inline extent size must match item size */ + if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + + btrfs_file_extent_ram_bytes(leaf, fi)) { + CORRUPT("plaintext inline extent has invalid size", + leaf, root, slot); + return -EUCLEAN; + } + return 0; + } + + /* Regular or preallocated extent has fixed item size */ + if (item_size != sizeof(*fi)) { + CORRUPT( + "regluar or preallocated extent data item size is invalid", + leaf, root, slot); + return -EUCLEAN; + } + if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) { + CORRUPT( + "regular or preallocated extent data item has unaligned value", + leaf, root, slot); + return -EUCLEAN; + } + + return 0; +} + +/* + * Common point to switch the item-specific validation. + */ +static int check_leaf_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + int ret = 0; + + switch (key->type) { + case BTRFS_EXTENT_DATA_KEY: + ret = check_extent_data_item(root, leaf, key, slot); + break; + } + return ret; +} + static noinline int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) { @@ -606,9 +700,13 @@ static noinline int check_leaf(struct btrfs_root *root, * 1) key order * 2) item offset and size * No overlap, no hole, all inside the leaf. + * 3) item content + * If possible, do comprehensive sanity check. + * NOTE: All checks must only rely on the item data itself. */ for (slot = 0; slot < nritems; slot++) { u32 item_end_expected; + int ret; btrfs_item_key_to_cpu(leaf, &key, slot); @@ -651,6 +749,11 @@ static noinline int check_leaf(struct btrfs_root *root, return -EUCLEAN; } + /* Check if the item size and content meet other criteria */ + ret = check_leaf_item(root, leaf, &key, slot); + if (ret < 0) + return ret; + prev_key.objectid = key.objectid; prev_key.type = key.type; prev_key.offset = key.offset; diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 7115838fbf2a..38ab0e06259a 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -734,6 +734,7 @@ struct btrfs_balance_item { #define BTRFS_FILE_EXTENT_INLINE 0 #define BTRFS_FILE_EXTENT_REG 1 #define BTRFS_FILE_EXTENT_PREALLOC 2 +#define BTRFS_FILE_EXTENT_TYPES 2 struct btrfs_file_extent_item { /* From 64948fd63f662180c00f0fe70b623eab51108a89 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 23 Aug 2017 16:57:59 +0900 Subject: [PATCH 0732/2608] btrfs: Add checker for EXTENT_CSUM commit 4b865cab96fe2a30ed512cf667b354bd291b3b0a upstream. EXTENT_CSUM checker is a relatively easy one, only needs to check: 1) Objectid Fixed to BTRFS_EXTENT_CSUM_OBJECTID 2) Key offset alignment Must be aligned to sectorsize 3) Item size alignedment Must be aligned to csum size Signed-off-by: Qu Wenruo Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ab8925b2efd1..53841d773a40 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -627,6 +627,27 @@ static int check_extent_data_item(struct btrfs_root *root, return 0; } +static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + u32 sectorsize = root->fs_info->sectorsize; + u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy); + + if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) { + CORRUPT("invalid objectid for csum item", leaf, root, slot); + return -EUCLEAN; + } + if (!IS_ALIGNED(key->offset, sectorsize)) { + CORRUPT("unaligned key offset for csum item", leaf, root, slot); + return -EUCLEAN; + } + if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) { + CORRUPT("unaligned csum item size", leaf, root, slot); + return -EUCLEAN; + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -640,6 +661,9 @@ static int check_leaf_item(struct btrfs_root *root, case BTRFS_EXTENT_DATA_KEY: ret = check_extent_data_item(root, leaf, key, slot); break; + case BTRFS_EXTENT_CSUM_KEY: + ret = check_csum_item(root, leaf, key, slot); + break; } return ret; } From eb3493e2476c5ce60c6a376a6b50d465f2864968 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 9 Oct 2017 01:51:02 +0000 Subject: [PATCH 0733/2608] btrfs: Move leaf and node validation checker to tree-checker.c commit 557ea5dd003d371536f6b4e8f7c8209a2b6fd4e3 upstream. It's no doubt the comprehensive tree block checker will become larger, so moving them into their own files is quite reasonable. Signed-off-by: Qu Wenruo [ wording adjustments ] Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/Makefile | 2 +- fs/btrfs/disk-io.c | 285 +----------------------------------- fs/btrfs/tree-checker.c | 309 ++++++++++++++++++++++++++++++++++++++++ fs/btrfs/tree-checker.h | 26 ++++ 4 files changed, 340 insertions(+), 282 deletions(-) create mode 100644 fs/btrfs/tree-checker.c create mode 100644 fs/btrfs/tree-checker.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index f2cd9dedb037..195229df5ba0 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -10,7 +10,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ - uuid-tree.o props.o hash.o free-space-tree.o + uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 53841d773a40..2df5e906db08 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -50,6 +50,7 @@ #include "sysfs.h" #include "qgroup.h" #include "compression.h" +#include "tree-checker.h" #ifdef CONFIG_X86 #include @@ -544,284 +545,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, return ret; } -#define CORRUPT(reason, eb, root, slot) \ - btrfs_crit(root->fs_info, \ - "corrupt %s, %s: block=%llu, root=%llu, slot=%d", \ - btrfs_header_level(eb) == 0 ? "leaf" : "node", \ - reason, btrfs_header_bytenr(eb), root->objectid, slot) - -static int check_extent_data_item(struct btrfs_root *root, - struct extent_buffer *leaf, - struct btrfs_key *key, int slot) -{ - struct btrfs_file_extent_item *fi; - u32 sectorsize = root->fs_info->sectorsize; - u32 item_size = btrfs_item_size_nr(leaf, slot); - - if (!IS_ALIGNED(key->offset, sectorsize)) { - CORRUPT("unaligned key offset for file extent", - leaf, root, slot); - return -EUCLEAN; - } - - fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - - if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) { - CORRUPT("invalid file extent type", leaf, root, slot); - return -EUCLEAN; - } - - /* - * Support for new compression/encrption must introduce incompat flag, - * and must be caught in open_ctree(). - */ - if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) { - CORRUPT("invalid file extent compression", leaf, root, slot); - return -EUCLEAN; - } - if (btrfs_file_extent_encryption(leaf, fi)) { - CORRUPT("invalid file extent encryption", leaf, root, slot); - return -EUCLEAN; - } - if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { - /* Inline extent must have 0 as key offset */ - if (key->offset) { - CORRUPT("inline extent has non-zero key offset", - leaf, root, slot); - return -EUCLEAN; - } - - /* Compressed inline extent has no on-disk size, skip it */ - if (btrfs_file_extent_compression(leaf, fi) != - BTRFS_COMPRESS_NONE) - return 0; - - /* Uncompressed inline extent size must match item size */ - if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + - btrfs_file_extent_ram_bytes(leaf, fi)) { - CORRUPT("plaintext inline extent has invalid size", - leaf, root, slot); - return -EUCLEAN; - } - return 0; - } - - /* Regular or preallocated extent has fixed item size */ - if (item_size != sizeof(*fi)) { - CORRUPT( - "regluar or preallocated extent data item size is invalid", - leaf, root, slot); - return -EUCLEAN; - } - if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) || - !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) || - !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) || - !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) || - !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) { - CORRUPT( - "regular or preallocated extent data item has unaligned value", - leaf, root, slot); - return -EUCLEAN; - } - - return 0; -} - -static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, - struct btrfs_key *key, int slot) -{ - u32 sectorsize = root->fs_info->sectorsize; - u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy); - - if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) { - CORRUPT("invalid objectid for csum item", leaf, root, slot); - return -EUCLEAN; - } - if (!IS_ALIGNED(key->offset, sectorsize)) { - CORRUPT("unaligned key offset for csum item", leaf, root, slot); - return -EUCLEAN; - } - if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) { - CORRUPT("unaligned csum item size", leaf, root, slot); - return -EUCLEAN; - } - return 0; -} - -/* - * Common point to switch the item-specific validation. - */ -static int check_leaf_item(struct btrfs_root *root, - struct extent_buffer *leaf, - struct btrfs_key *key, int slot) -{ - int ret = 0; - - switch (key->type) { - case BTRFS_EXTENT_DATA_KEY: - ret = check_extent_data_item(root, leaf, key, slot); - break; - case BTRFS_EXTENT_CSUM_KEY: - ret = check_csum_item(root, leaf, key, slot); - break; - } - return ret; -} - -static noinline int check_leaf(struct btrfs_root *root, - struct extent_buffer *leaf) -{ - struct btrfs_fs_info *fs_info = root->fs_info; - /* No valid key type is 0, so all key should be larger than this key */ - struct btrfs_key prev_key = {0, 0, 0}; - struct btrfs_key key; - u32 nritems = btrfs_header_nritems(leaf); - int slot; - - /* - * Extent buffers from a relocation tree have a owner field that - * corresponds to the subvolume tree they are based on. So just from an - * extent buffer alone we can not find out what is the id of the - * corresponding subvolume tree, so we can not figure out if the extent - * buffer corresponds to the root of the relocation tree or not. So skip - * this check for relocation trees. - */ - if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { - struct btrfs_root *check_root; - - key.objectid = btrfs_header_owner(leaf); - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - - check_root = btrfs_get_fs_root(fs_info, &key, false); - /* - * The only reason we also check NULL here is that during - * open_ctree() some roots has not yet been set up. - */ - if (!IS_ERR_OR_NULL(check_root)) { - struct extent_buffer *eb; - - eb = btrfs_root_node(check_root); - /* if leaf is the root, then it's fine */ - if (leaf != eb) { - CORRUPT("non-root leaf's nritems is 0", - leaf, check_root, 0); - free_extent_buffer(eb); - return -EUCLEAN; - } - free_extent_buffer(eb); - } - return 0; - } - - if (nritems == 0) - return 0; - - /* - * Check the following things to make sure this is a good leaf, and - * leaf users won't need to bother with similar sanity checks: - * - * 1) key order - * 2) item offset and size - * No overlap, no hole, all inside the leaf. - * 3) item content - * If possible, do comprehensive sanity check. - * NOTE: All checks must only rely on the item data itself. - */ - for (slot = 0; slot < nritems; slot++) { - u32 item_end_expected; - int ret; - - btrfs_item_key_to_cpu(leaf, &key, slot); - - /* Make sure the keys are in the right order */ - if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) { - CORRUPT("bad key order", leaf, root, slot); - return -EUCLEAN; - } - - /* - * Make sure the offset and ends are right, remember that the - * item data starts at the end of the leaf and grows towards the - * front. - */ - if (slot == 0) - item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info); - else - item_end_expected = btrfs_item_offset_nr(leaf, - slot - 1); - if (btrfs_item_end_nr(leaf, slot) != item_end_expected) { - CORRUPT("slot offset bad", leaf, root, slot); - return -EUCLEAN; - } - - /* - * Check to make sure that we don't point outside of the leaf, - * just in case all the items are consistent to each other, but - * all point outside of the leaf. - */ - if (btrfs_item_end_nr(leaf, slot) > - BTRFS_LEAF_DATA_SIZE(fs_info)) { - CORRUPT("slot end outside of leaf", leaf, root, slot); - return -EUCLEAN; - } - - /* Also check if the item pointer overlaps with btrfs item. */ - if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) > - btrfs_item_ptr_offset(leaf, slot)) { - CORRUPT("slot overlap with its data", leaf, root, slot); - return -EUCLEAN; - } - - /* Check if the item size and content meet other criteria */ - ret = check_leaf_item(root, leaf, &key, slot); - if (ret < 0) - return ret; - - prev_key.objectid = key.objectid; - prev_key.type = key.type; - prev_key.offset = key.offset; - } - - return 0; -} - -static int check_node(struct btrfs_root *root, struct extent_buffer *node) -{ - unsigned long nr = btrfs_header_nritems(node); - struct btrfs_key key, next_key; - int slot; - u64 bytenr; - int ret = 0; - - if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) { - btrfs_crit(root->fs_info, - "corrupt node: block %llu root %llu nritems %lu", - node->start, root->objectid, nr); - return -EIO; - } - - for (slot = 0; slot < nr - 1; slot++) { - bytenr = btrfs_node_blockptr(node, slot); - btrfs_node_key_to_cpu(node, &key, slot); - btrfs_node_key_to_cpu(node, &next_key, slot + 1); - - if (!bytenr) { - CORRUPT("invalid item slot", node, root, slot); - ret = -EIO; - goto out; - } - - if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { - CORRUPT("bad key order", node, root, slot); - ret = -EIO; - goto out; - } - } -out: - return ret; -} - static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, u64 phy_offset, struct page *page, u64 start, u64 end, int mirror) @@ -887,12 +610,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, * that we don't try and read the other copies of this block, just * return -EIO. */ - if (found_level == 0 && check_leaf(root, eb)) { + if (found_level == 0 && btrfs_check_leaf(root, eb)) { set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = -EIO; } - if (found_level > 0 && check_node(root, eb)) + if (found_level > 0 && btrfs_check_node(root, eb)) ret = -EIO; if (!ret) @@ -4147,7 +3870,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) buf->len, fs_info->dirty_metadata_batch); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY - if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) { + if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) { btrfs_print_leaf(buf); ASSERT(0); } diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c new file mode 100644 index 000000000000..56e25a630103 --- /dev/null +++ b/fs/btrfs/tree-checker.c @@ -0,0 +1,309 @@ +/* + * Copyright (C) Qu Wenruo 2017. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program. + */ + +/* + * The module is used to catch unexpected/corrupted tree block data. + * Such behavior can be caused either by a fuzzed image or bugs. + * + * The objective is to do leaf/node validation checks when tree block is read + * from disk, and check *every* possible member, so other code won't + * need to checking them again. + * + * Due to the potential and unwanted damage, every checker needs to be + * carefully reviewed otherwise so it does not prevent mount of valid images. + */ + +#include "ctree.h" +#include "tree-checker.h" +#include "disk-io.h" +#include "compression.h" + +#define CORRUPT(reason, eb, root, slot) \ + btrfs_crit(root->fs_info, \ + "corrupt %s, %s: block=%llu, root=%llu, slot=%d", \ + btrfs_header_level(eb) == 0 ? "leaf" : "node", \ + reason, btrfs_header_bytenr(eb), root->objectid, slot) + +static int check_extent_data_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_file_extent_item *fi; + u32 sectorsize = root->fs_info->sectorsize; + u32 item_size = btrfs_item_size_nr(leaf, slot); + + if (!IS_ALIGNED(key->offset, sectorsize)) { + CORRUPT("unaligned key offset for file extent", + leaf, root, slot); + return -EUCLEAN; + } + + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) { + CORRUPT("invalid file extent type", leaf, root, slot); + return -EUCLEAN; + } + + /* + * Support for new compression/encrption must introduce incompat flag, + * and must be caught in open_ctree(). + */ + if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) { + CORRUPT("invalid file extent compression", leaf, root, slot); + return -EUCLEAN; + } + if (btrfs_file_extent_encryption(leaf, fi)) { + CORRUPT("invalid file extent encryption", leaf, root, slot); + return -EUCLEAN; + } + if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { + /* Inline extent must have 0 as key offset */ + if (key->offset) { + CORRUPT("inline extent has non-zero key offset", + leaf, root, slot); + return -EUCLEAN; + } + + /* Compressed inline extent has no on-disk size, skip it */ + if (btrfs_file_extent_compression(leaf, fi) != + BTRFS_COMPRESS_NONE) + return 0; + + /* Uncompressed inline extent size must match item size */ + if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + + btrfs_file_extent_ram_bytes(leaf, fi)) { + CORRUPT("plaintext inline extent has invalid size", + leaf, root, slot); + return -EUCLEAN; + } + return 0; + } + + /* Regular or preallocated extent has fixed item size */ + if (item_size != sizeof(*fi)) { + CORRUPT( + "regluar or preallocated extent data item size is invalid", + leaf, root, slot); + return -EUCLEAN; + } + if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) { + CORRUPT( + "regular or preallocated extent data item has unaligned value", + leaf, root, slot); + return -EUCLEAN; + } + + return 0; +} + +static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + u32 sectorsize = root->fs_info->sectorsize; + u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy); + + if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) { + CORRUPT("invalid objectid for csum item", leaf, root, slot); + return -EUCLEAN; + } + if (!IS_ALIGNED(key->offset, sectorsize)) { + CORRUPT("unaligned key offset for csum item", leaf, root, slot); + return -EUCLEAN; + } + if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) { + CORRUPT("unaligned csum item size", leaf, root, slot); + return -EUCLEAN; + } + return 0; +} + +/* + * Common point to switch the item-specific validation. + */ +static int check_leaf_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + int ret = 0; + + switch (key->type) { + case BTRFS_EXTENT_DATA_KEY: + ret = check_extent_data_item(root, leaf, key, slot); + break; + case BTRFS_EXTENT_CSUM_KEY: + ret = check_csum_item(root, leaf, key, slot); + break; + } + return ret; +} + +int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + /* No valid key type is 0, so all key should be larger than this key */ + struct btrfs_key prev_key = {0, 0, 0}; + struct btrfs_key key; + u32 nritems = btrfs_header_nritems(leaf); + int slot; + + /* + * Extent buffers from a relocation tree have a owner field that + * corresponds to the subvolume tree they are based on. So just from an + * extent buffer alone we can not find out what is the id of the + * corresponding subvolume tree, so we can not figure out if the extent + * buffer corresponds to the root of the relocation tree or not. So + * skip this check for relocation trees. + */ + if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { + struct btrfs_root *check_root; + + key.objectid = btrfs_header_owner(leaf); + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + + check_root = btrfs_get_fs_root(fs_info, &key, false); + /* + * The only reason we also check NULL here is that during + * open_ctree() some roots has not yet been set up. + */ + if (!IS_ERR_OR_NULL(check_root)) { + struct extent_buffer *eb; + + eb = btrfs_root_node(check_root); + /* if leaf is the root, then it's fine */ + if (leaf != eb) { + CORRUPT("non-root leaf's nritems is 0", + leaf, check_root, 0); + free_extent_buffer(eb); + return -EUCLEAN; + } + free_extent_buffer(eb); + } + return 0; + } + + if (nritems == 0) + return 0; + + /* + * Check the following things to make sure this is a good leaf, and + * leaf users won't need to bother with similar sanity checks: + * + * 1) key ordering + * 2) item offset and size + * No overlap, no hole, all inside the leaf. + * 3) item content + * If possible, do comprehensive sanity check. + * NOTE: All checks must only rely on the item data itself. + */ + for (slot = 0; slot < nritems; slot++) { + u32 item_end_expected; + int ret; + + btrfs_item_key_to_cpu(leaf, &key, slot); + + /* Make sure the keys are in the right order */ + if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) { + CORRUPT("bad key order", leaf, root, slot); + return -EUCLEAN; + } + + /* + * Make sure the offset and ends are right, remember that the + * item data starts at the end of the leaf and grows towards the + * front. + */ + if (slot == 0) + item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info); + else + item_end_expected = btrfs_item_offset_nr(leaf, + slot - 1); + if (btrfs_item_end_nr(leaf, slot) != item_end_expected) { + CORRUPT("slot offset bad", leaf, root, slot); + return -EUCLEAN; + } + + /* + * Check to make sure that we don't point outside of the leaf, + * just in case all the items are consistent to each other, but + * all point outside of the leaf. + */ + if (btrfs_item_end_nr(leaf, slot) > + BTRFS_LEAF_DATA_SIZE(fs_info)) { + CORRUPT("slot end outside of leaf", leaf, root, slot); + return -EUCLEAN; + } + + /* Also check if the item pointer overlaps with btrfs item. */ + if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) > + btrfs_item_ptr_offset(leaf, slot)) { + CORRUPT("slot overlap with its data", leaf, root, slot); + return -EUCLEAN; + } + + /* Check if the item size and content meet other criteria */ + ret = check_leaf_item(root, leaf, &key, slot); + if (ret < 0) + return ret; + + prev_key.objectid = key.objectid; + prev_key.type = key.type; + prev_key.offset = key.offset; + } + + return 0; +} + +int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) +{ + unsigned long nr = btrfs_header_nritems(node); + struct btrfs_key key, next_key; + int slot; + u64 bytenr; + int ret = 0; + + if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) { + btrfs_crit(root->fs_info, + "corrupt node: block %llu root %llu nritems %lu", + node->start, root->objectid, nr); + return -EIO; + } + + for (slot = 0; slot < nr - 1; slot++) { + bytenr = btrfs_node_blockptr(node, slot); + btrfs_node_key_to_cpu(node, &key, slot); + btrfs_node_key_to_cpu(node, &next_key, slot + 1); + + if (!bytenr) { + CORRUPT("invalid item slot", node, root, slot); + ret = -EIO; + goto out; + } + + if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { + CORRUPT("bad key order", node, root, slot); + ret = -EIO; + goto out; + } + } +out: + return ret; +} diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h new file mode 100644 index 000000000000..96c486e95d70 --- /dev/null +++ b/fs/btrfs/tree-checker.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) Qu Wenruo 2017. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program. + */ + +#ifndef __BTRFS_TREE_CHECKER__ +#define __BTRFS_TREE_CHECKER__ + +#include "ctree.h" +#include "extent_io.h" + +int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf); +int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node); + +#endif From b3032dc25fb4d7ffdbb61df875cc09b3ec662c85 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 9 Oct 2017 01:51:03 +0000 Subject: [PATCH 0734/2608] btrfs: tree-checker: Enhance btrfs_check_node output commit bba4f29896c986c4cec17bc0f19f2ce644fceae1 upstream. Use inline function to replace macro since we don't need stringification. (Macro still exists until all callers get updated) And add more info about the error, and replace EIO with EUCLEAN. For nr_items error, report if it's too large or too small, and output the valid value range. For node block pointer, added a new alignment checker. For key order, also output the next key to make the problem more obvious. Signed-off-by: Qu Wenruo [ wording adjustments, unindented long strings ] Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/tree-checker.c | 68 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 56e25a630103..5acdf3355a3f 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -37,6 +37,46 @@ btrfs_header_level(eb) == 0 ? "leaf" : "node", \ reason, btrfs_header_bytenr(eb), root->objectid, slot) +/* + * Error message should follow the following format: + * corrupt : , [, ] + * + * @type: leaf or node + * @identifier: the necessary info to locate the leaf/node. + * It's recommened to decode key.objecitd/offset if it's + * meaningful. + * @reason: describe the error + * @bad_value: optional, it's recommened to output bad value and its + * expected value (range). + * + * Since comma is used to separate the components, only space is allowed + * inside each component. + */ + +/* + * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt. + * Allows callers to customize the output. + */ +__printf(4, 5) +static void generic_err(const struct btrfs_root *root, + const struct extent_buffer *eb, int slot, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + btrfs_crit(root->fs_info, + "corrupt %s: root=%llu block=%llu slot=%d, %pV", + btrfs_header_level(eb) == 0 ? "leaf" : "node", + root->objectid, btrfs_header_bytenr(eb), slot, &vaf); + va_end(args); +} + static int check_extent_data_item(struct btrfs_root *root, struct extent_buffer *leaf, struct btrfs_key *key, int slot) @@ -282,9 +322,11 @@ int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) { btrfs_crit(root->fs_info, - "corrupt node: block %llu root %llu nritems %lu", - node->start, root->objectid, nr); - return -EIO; +"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]", + root->objectid, node->start, + nr == 0 ? "small" : "large", nr, + BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)); + return -EUCLEAN; } for (slot = 0; slot < nr - 1; slot++) { @@ -293,14 +335,26 @@ int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) btrfs_node_key_to_cpu(node, &next_key, slot + 1); if (!bytenr) { - CORRUPT("invalid item slot", node, root, slot); - ret = -EIO; + generic_err(root, node, slot, + "invalid NULL node pointer"); + ret = -EUCLEAN; + goto out; + } + if (!IS_ALIGNED(bytenr, root->fs_info->sectorsize)) { + generic_err(root, node, slot, + "unaligned pointer, have %llu should be aligned to %u", + bytenr, root->fs_info->sectorsize); + ret = -EUCLEAN; goto out; } if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { - CORRUPT("bad key order", node, root, slot); - ret = -EIO; + generic_err(root, node, slot, + "bad key order, current (%llu %u %llu) next (%llu %u %llu)", + key.objectid, key.type, key.offset, + next_key.objectid, next_key.type, + next_key.offset); + ret = -EUCLEAN; goto out; } } From b6a07f903543e45d3660d5fbc8385f89da763c81 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 8 Nov 2017 08:54:24 +0800 Subject: [PATCH 0735/2608] btrfs: tree-checker: Fix false panic for sanity test commit 69fc6cbbac542c349b3d350d10f6e394c253c81d upstream. [BUG] If we run btrfs with CONFIG_BTRFS_FS_RUN_SANITY_TESTS=y, it will instantly cause kernel panic like: ------ ... assertion failed: 0, file: fs/btrfs/disk-io.c, line: 3853 ... Call Trace: btrfs_mark_buffer_dirty+0x187/0x1f0 [btrfs] setup_items_for_insert+0x385/0x650 [btrfs] __btrfs_drop_extents+0x129a/0x1870 [btrfs] ... ----- [Cause] Btrfs will call btrfs_check_leaf() in btrfs_mark_buffer_dirty() to check if the leaf is valid with CONFIG_BTRFS_FS_RUN_SANITY_TESTS=y. However quite some btrfs_mark_buffer_dirty() callers(*) don't really initialize its item data but only initialize its item pointers, leaving item data uninitialized. This makes tree-checker catch uninitialized data as error, causing such panic. *: These callers include but not limited to setup_items_for_insert() btrfs_split_item() btrfs_expand_item() [Fix] Add a new parameter @check_item_data to btrfs_check_leaf(). With @check_item_data set to false, item data check will be skipped and fallback to old btrfs_check_leaf() behavior. So we can still get early warning if we screw up item pointers, and avoid false panic. Cc: Filipe Manana Reported-by: Lakshmipathi.G Signed-off-by: Qu Wenruo Reviewed-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 10 ++++++++-- fs/btrfs/tree-checker.c | 27 ++++++++++++++++++++++----- fs/btrfs/tree-checker.h | 14 +++++++++++++- 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2df5e906db08..e42673477c25 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -610,7 +610,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, * that we don't try and read the other copies of this block, just * return -EIO. */ - if (found_level == 0 && btrfs_check_leaf(root, eb)) { + if (found_level == 0 && btrfs_check_leaf_full(root, eb)) { set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = -EIO; } @@ -3870,7 +3870,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) buf->len, fs_info->dirty_metadata_batch); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY - if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) { + /* + * Since btrfs_mark_buffer_dirty() can be called with item pointer set + * but item data not updated. + * So here we should only check item pointers, not item data. + */ + if (btrfs_header_level(buf) == 0 && + btrfs_check_leaf_relaxed(root, buf)) { btrfs_print_leaf(buf); ASSERT(0); } diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 5acdf3355a3f..ff4fa8f905d3 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -195,7 +195,8 @@ static int check_leaf_item(struct btrfs_root *root, return ret; } -int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) +static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, + bool check_item_data) { struct btrfs_fs_info *fs_info = root->fs_info; /* No valid key type is 0, so all key should be larger than this key */ @@ -299,10 +300,15 @@ int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) return -EUCLEAN; } - /* Check if the item size and content meet other criteria */ - ret = check_leaf_item(root, leaf, &key, slot); - if (ret < 0) - return ret; + if (check_item_data) { + /* + * Check if the item size and content meet other + * criteria + */ + ret = check_leaf_item(root, leaf, &key, slot); + if (ret < 0) + return ret; + } prev_key.objectid = key.objectid; prev_key.type = key.type; @@ -312,6 +318,17 @@ int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) return 0; } +int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf) +{ + return check_leaf(root, leaf, true); +} + +int btrfs_check_leaf_relaxed(struct btrfs_root *root, + struct extent_buffer *leaf) +{ + return check_leaf(root, leaf, false); +} + int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) { unsigned long nr = btrfs_header_nritems(node); diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index 96c486e95d70..3d53e8d6fda0 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -20,7 +20,19 @@ #include "ctree.h" #include "extent_io.h" -int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf); +/* + * Comprehensive leaf checker. + * Will check not only the item pointers, but also every possible member + * in item data. + */ +int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf); + +/* + * Less strict leaf checker. + * Will only check item pointers, not reading item data. + */ +int btrfs_check_leaf_relaxed(struct btrfs_root *root, + struct extent_buffer *leaf); int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node); #endif From fe09fe216e502b17716689721a5c59d6a21277ac Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 8 Nov 2017 08:54:25 +0800 Subject: [PATCH 0736/2608] btrfs: tree-checker: Add checker for dir item commit ad7b0368f33cffe67fecd302028915926e50ef7e upstream. Add checker for dir item, for key types DIR_ITEM, DIR_INDEX and XATTR_ITEM. This checker does comprehensive checks for: 1) dir_item header and its data size Against item boundary and maximum name/xattr length. This part is mostly the same as old verify_dir_item(). 2) dir_type Against maximum file types, and against key type. Since XATTR key should only have FT_XATTR dir item, and normal dir item type should not have XATTR key. The check between key->type and dir_type is newly introduced by this patch. 3) name hash For XATTR and DIR_ITEM key, key->offset is name hash (crc32c). Check the hash of the name against the key to ensure it's correct. The name hash check is only found in btrfs-progs before this patch. Signed-off-by: Qu Wenruo Reviewed-by: Nikolay Borisov Reviewed-by: Su Yue Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/tree-checker.c | 141 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index ff4fa8f905d3..b32df86de9bf 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -30,6 +30,7 @@ #include "tree-checker.h" #include "disk-io.h" #include "compression.h" +#include "hash.h" #define CORRUPT(reason, eb, root, slot) \ btrfs_crit(root->fs_info, \ @@ -175,6 +176,141 @@ static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, return 0; } +/* + * Customized reported for dir_item, only important new info is key->objectid, + * which represents inode number + */ +__printf(4, 5) +static void dir_item_err(const struct btrfs_root *root, + const struct extent_buffer *eb, int slot, + const char *fmt, ...) +{ + struct btrfs_key key; + struct va_format vaf; + va_list args; + + btrfs_item_key_to_cpu(eb, &key, slot); + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + btrfs_crit(root->fs_info, + "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV", + btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid, + btrfs_header_bytenr(eb), slot, key.objectid, &vaf); + va_end(args); +} + +static int check_dir_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_dir_item *di; + u32 item_size = btrfs_item_size_nr(leaf, slot); + u32 cur = 0; + + di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); + while (cur < item_size) { + char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; + u32 name_len; + u32 data_len; + u32 max_name_len; + u32 total_size; + u32 name_hash; + u8 dir_type; + + /* header itself should not cross item boundary */ + if (cur + sizeof(*di) > item_size) { + dir_item_err(root, leaf, slot, + "dir item header crosses item boundary, have %lu boundary %u", + cur + sizeof(*di), item_size); + return -EUCLEAN; + } + + /* dir type check */ + dir_type = btrfs_dir_type(leaf, di); + if (dir_type >= BTRFS_FT_MAX) { + dir_item_err(root, leaf, slot, + "invalid dir item type, have %u expect [0, %u)", + dir_type, BTRFS_FT_MAX); + return -EUCLEAN; + } + + if (key->type == BTRFS_XATTR_ITEM_KEY && + dir_type != BTRFS_FT_XATTR) { + dir_item_err(root, leaf, slot, + "invalid dir item type for XATTR key, have %u expect %u", + dir_type, BTRFS_FT_XATTR); + return -EUCLEAN; + } + if (dir_type == BTRFS_FT_XATTR && + key->type != BTRFS_XATTR_ITEM_KEY) { + dir_item_err(root, leaf, slot, + "xattr dir type found for non-XATTR key"); + return -EUCLEAN; + } + if (dir_type == BTRFS_FT_XATTR) + max_name_len = XATTR_NAME_MAX; + else + max_name_len = BTRFS_NAME_LEN; + + /* Name/data length check */ + name_len = btrfs_dir_name_len(leaf, di); + data_len = btrfs_dir_data_len(leaf, di); + if (name_len > max_name_len) { + dir_item_err(root, leaf, slot, + "dir item name len too long, have %u max %u", + name_len, max_name_len); + return -EUCLEAN; + } + if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info)) { + dir_item_err(root, leaf, slot, + "dir item name and data len too long, have %u max %u", + name_len + data_len, + BTRFS_MAX_XATTR_SIZE(root->fs_info)); + return -EUCLEAN; + } + + if (data_len && dir_type != BTRFS_FT_XATTR) { + dir_item_err(root, leaf, slot, + "dir item with invalid data len, have %u expect 0", + data_len); + return -EUCLEAN; + } + + total_size = sizeof(*di) + name_len + data_len; + + /* header and name/data should not cross item boundary */ + if (cur + total_size > item_size) { + dir_item_err(root, leaf, slot, + "dir item data crosses item boundary, have %u boundary %u", + cur + total_size, item_size); + return -EUCLEAN; + } + + /* + * Special check for XATTR/DIR_ITEM, as key->offset is name + * hash, should match its name + */ + if (key->type == BTRFS_DIR_ITEM_KEY || + key->type == BTRFS_XATTR_ITEM_KEY) { + read_extent_buffer(leaf, namebuf, + (unsigned long)(di + 1), name_len); + name_hash = btrfs_name_hash(namebuf, name_len); + if (key->offset != name_hash) { + dir_item_err(root, leaf, slot, + "name hash mismatch with key, have 0x%016x expect 0x%016llx", + name_hash, key->offset); + return -EUCLEAN; + } + } + cur += total_size; + di = (struct btrfs_dir_item *)((void *)di + total_size); + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -191,6 +327,11 @@ static int check_leaf_item(struct btrfs_root *root, case BTRFS_EXTENT_CSUM_KEY: ret = check_csum_item(root, leaf, key, slot); break; + case BTRFS_DIR_ITEM_KEY: + case BTRFS_DIR_INDEX_KEY: + case BTRFS_XATTR_ITEM_KEY: + ret = check_dir_item(root, leaf, key, slot); + break; } return ret; } From 52ea16655aeed2571b9042c18db3eb089c37910c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 6 Dec 2017 15:18:14 +0100 Subject: [PATCH 0737/2608] btrfs: tree-checker: use %zu format string for size_t commit 7cfad65297bfe0aa2996cd72d21c898aa84436d9 upstream. The return value of sizeof() is of type size_t, so we must print it using the %z format modifier rather than %l to avoid this warning on some architectures: fs/btrfs/tree-checker.c: In function 'check_dir_item': fs/btrfs/tree-checker.c:273:50: error: format '%lu' expects argument of type 'long unsigned int', but argument 5 has type 'u32' {aka 'unsigned int'} [-Werror=format=] Fixes: 005887f2e3e0 ("btrfs: tree-checker: Add checker for dir item") Signed-off-by: Arnd Bergmann Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/tree-checker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index b32df86de9bf..9376739a4cc8 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -223,7 +223,7 @@ static int check_dir_item(struct btrfs_root *root, /* header itself should not cross item boundary */ if (cur + sizeof(*di) > item_size) { dir_item_err(root, leaf, slot, - "dir item header crosses item boundary, have %lu boundary %u", + "dir item header crosses item boundary, have %zu boundary %u", cur + sizeof(*di), item_size); return -EUCLEAN; } From e07e1c7561a7e087a5e512c972aa7d3e1388c057 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 10 Jan 2018 15:13:07 +0100 Subject: [PATCH 0738/2608] btrfs: tree-check: reduce stack consumption in check_dir_item commit e2683fc9d219430f5b78889b50cde7f40efeba7b upstream. I've noticed that the updated item checker stack consumption increased dramatically in 542f5385e20cf97447 ("btrfs: tree-checker: Add checker for dir item") tree-checker.c:check_leaf +552 (176 -> 728) The array is 255 bytes long, dynamic allocation would slow down the sanity checks so it's more reasonable to keep it on-stack. Moving the variable to the scope of use reduces the stack usage again tree-checker.c:check_leaf -264 (728 -> 464) Reviewed-by: Josef Bacik Reviewed-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/tree-checker.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 9376739a4cc8..f7e7a455b710 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -212,7 +212,6 @@ static int check_dir_item(struct btrfs_root *root, di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); while (cur < item_size) { - char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; u32 name_len; u32 data_len; u32 max_name_len; @@ -295,6 +294,8 @@ static int check_dir_item(struct btrfs_root *root, */ if (key->type == BTRFS_DIR_ITEM_KEY || key->type == BTRFS_XATTR_ITEM_KEY) { + char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; + read_extent_buffer(leaf, namebuf, (unsigned long)(di + 1), name_len); name_hash = btrfs_name_hash(namebuf, name_len); From 9f268b5cf2d6a716779dfe11f4bc02d6461db693 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 3 Jul 2018 17:10:05 +0800 Subject: [PATCH 0739/2608] btrfs: tree-checker: Verify block_group_item commit fce466eab7ac6baa9d2dcd88abcf945be3d4a089 upstream. A crafted image with invalid block group items could make free space cache code to cause panic. We could detect such invalid block group item by checking: 1) Item size Known fixed value. 2) Block group size (key.offset) We have an upper limit on block group item (10G) 3) Chunk objectid Known fixed value. 4) Type Only 4 valid type values, DATA, METADATA, SYSTEM and DATA|METADATA. No more than 1 bit set for profile type. 5) Used space No more than the block group size. This should allow btrfs to detect and refuse to mount the crafted image. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199849 Reported-by: Xu Wen Signed-off-by: Qu Wenruo Reviewed-by: Gu Jinxiang Reviewed-by: Nikolay Borisov Tested-by: Gu Jinxiang Reviewed-by: David Sterba Signed-off-by: David Sterba [bwh: Backported to 4.14: - In check_leaf_item(), pass root->fs_info to check_block_group_item() - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/tree-checker.c | 100 ++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.c | 2 +- fs/btrfs/volumes.h | 2 + 3 files changed, 103 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index f7e7a455b710..cf9b10a07134 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -31,6 +31,7 @@ #include "disk-io.h" #include "compression.h" #include "hash.h" +#include "volumes.h" #define CORRUPT(reason, eb, root, slot) \ btrfs_crit(root->fs_info, \ @@ -312,6 +313,102 @@ static int check_dir_item(struct btrfs_root *root, return 0; } +__printf(4, 5) +__cold +static void block_group_err(const struct btrfs_fs_info *fs_info, + const struct extent_buffer *eb, int slot, + const char *fmt, ...) +{ + struct btrfs_key key; + struct va_format vaf; + va_list args; + + btrfs_item_key_to_cpu(eb, &key, slot); + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + btrfs_crit(fs_info, + "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV", + btrfs_header_level(eb) == 0 ? "leaf" : "node", + btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, + key.objectid, key.offset, &vaf); + va_end(args); +} + +static int check_block_group_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_block_group_item bgi; + u32 item_size = btrfs_item_size_nr(leaf, slot); + u64 flags; + u64 type; + + /* + * Here we don't really care about alignment since extent allocator can + * handle it. We care more about the size, as if one block group is + * larger than maximum size, it's must be some obvious corruption. + */ + if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) { + block_group_err(fs_info, leaf, slot, + "invalid block group size, have %llu expect (0, %llu]", + key->offset, BTRFS_MAX_DATA_CHUNK_SIZE); + return -EUCLEAN; + } + + if (item_size != sizeof(bgi)) { + block_group_err(fs_info, leaf, slot, + "invalid item size, have %u expect %zu", + item_size, sizeof(bgi)); + return -EUCLEAN; + } + + read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot), + sizeof(bgi)); + if (btrfs_block_group_chunk_objectid(&bgi) != + BTRFS_FIRST_CHUNK_TREE_OBJECTID) { + block_group_err(fs_info, leaf, slot, + "invalid block group chunk objectid, have %llu expect %llu", + btrfs_block_group_chunk_objectid(&bgi), + BTRFS_FIRST_CHUNK_TREE_OBJECTID); + return -EUCLEAN; + } + + if (btrfs_block_group_used(&bgi) > key->offset) { + block_group_err(fs_info, leaf, slot, + "invalid block group used, have %llu expect [0, %llu)", + btrfs_block_group_used(&bgi), key->offset); + return -EUCLEAN; + } + + flags = btrfs_block_group_flags(&bgi); + if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) { + block_group_err(fs_info, leaf, slot, +"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set", + flags & BTRFS_BLOCK_GROUP_PROFILE_MASK, + hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)); + return -EUCLEAN; + } + + type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; + if (type != BTRFS_BLOCK_GROUP_DATA && + type != BTRFS_BLOCK_GROUP_METADATA && + type != BTRFS_BLOCK_GROUP_SYSTEM && + type != (BTRFS_BLOCK_GROUP_METADATA | + BTRFS_BLOCK_GROUP_DATA)) { + block_group_err(fs_info, leaf, slot, +"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx", + type, hweight64(type), + BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA, + BTRFS_BLOCK_GROUP_SYSTEM, + BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA); + return -EUCLEAN; + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -333,6 +430,9 @@ static int check_leaf_item(struct btrfs_root *root, case BTRFS_XATTR_ITEM_KEY: ret = check_dir_item(root, leaf, key, slot); break; + case BTRFS_BLOCK_GROUP_ITEM_KEY: + ret = check_block_group_item(root->fs_info, leaf, key, slot); + break; } return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cfd5728e7519..9663b6aa2a56 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4647,7 +4647,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (type & BTRFS_BLOCK_GROUP_DATA) { max_stripe_size = SZ_1G; - max_chunk_size = 10 * max_stripe_size; + max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE; if (!devs_max) devs_max = BTRFS_MAX_DEVS(info->chunk_root); } else if (type & BTRFS_BLOCK_GROUP_METADATA) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index c5dd48eb7b3d..76fb6e84f201 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -24,6 +24,8 @@ #include #include "async-thread.h" +#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G) + extern struct mutex uuid_mutex; #define BTRFS_STRIPE_LEN SZ_64K From c0dfb99847851fb830d1e8ea7d5e0571f50c325a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 3 Jul 2018 17:10:06 +0800 Subject: [PATCH 0740/2608] btrfs: tree-checker: Detect invalid and empty essential trees commit ba480dd4db9f1798541eb2d1c423fc95feee8d36 upstream. A crafted image has empty root tree block, which will later cause NULL pointer dereference. The following trees should never be empty: 1) Tree root Must contain at least root items for extent tree, device tree and fs tree 2) Chunk tree Or we can't even bootstrap as it contains the mapping. 3) Fs tree At least inode item for top level inode (.). 4) Device tree Dev extents for chunks 5) Extent tree Must have corresponding extent for each chunk. If any of them is empty, we are sure the fs is corrupted and no need to mount it. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199847 Reported-by: Xu Wen Signed-off-by: Qu Wenruo Tested-by: Gu Jinxiang Reviewed-by: David Sterba Signed-off-by: David Sterba [bwh: Backported to 4.14: Pass root instead of fs_info to generic_err()] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/tree-checker.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index cf9b10a07134..31756bac75b4 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -456,9 +456,22 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, * skip this check for relocation trees. */ if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { + u64 owner = btrfs_header_owner(leaf); struct btrfs_root *check_root; - key.objectid = btrfs_header_owner(leaf); + /* These trees must never be empty */ + if (owner == BTRFS_ROOT_TREE_OBJECTID || + owner == BTRFS_CHUNK_TREE_OBJECTID || + owner == BTRFS_EXTENT_TREE_OBJECTID || + owner == BTRFS_DEV_TREE_OBJECTID || + owner == BTRFS_FS_TREE_OBJECTID || + owner == BTRFS_DATA_RELOC_TREE_OBJECTID) { + generic_err(root, leaf, 0, + "invalid root, root %llu must never be empty", + owner); + return -EUCLEAN; + } + key.objectid = owner; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; From 34407a175a59b668a1a2bbf0d0e495d87a7777d8 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 1 Aug 2018 10:37:16 +0800 Subject: [PATCH 0741/2608] btrfs: Check that each block group has corresponding chunk at mount time commit 514c7dca85a0bf40be984dab0b477403a6db901f upstream. A crafted btrfs image with incorrect chunk<->block group mapping will trigger a lot of unexpected things as the mapping is essential. Although the problem can be caught by block group item checker added in "btrfs: tree-checker: Verify block_group_item", it's still not sufficient. A sufficiently valid block group item can pass the check added by the mentioned patch but could fail to match the existing chunk. This patch will add extra block group -> chunk mapping check, to ensure we have a completely matching (start, len, flags) chunk for each block group at mount time. Here we reuse the original helper find_first_block_group(), which is already doing the basic bg -> chunk checks, adding further checks of the start/len and type flags. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199837 Reported-by: Xu Wen Signed-off-by: Qu Wenruo Reviewed-by: Su Yue Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/extent-tree.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fdc42eddccc2..83791d13c204 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9828,6 +9828,8 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info, int ret = 0; struct btrfs_key found_key; struct extent_buffer *leaf; + struct btrfs_block_group_item bg; + u64 flags; int slot; ret = btrfs_search_slot(NULL, root, key, path, 0, 0); @@ -9862,8 +9864,32 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info, "logical %llu len %llu found bg but no related chunk", found_key.objectid, found_key.offset); ret = -ENOENT; + } else if (em->start != found_key.objectid || + em->len != found_key.offset) { + btrfs_err(fs_info, + "block group %llu len %llu mismatch with chunk %llu len %llu", + found_key.objectid, found_key.offset, + em->start, em->len); + ret = -EUCLEAN; } else { - ret = 0; + read_extent_buffer(leaf, &bg, + btrfs_item_ptr_offset(leaf, slot), + sizeof(bg)); + flags = btrfs_block_group_flags(&bg) & + BTRFS_BLOCK_GROUP_TYPE_MASK; + + if (flags != (em->map_lookup->type & + BTRFS_BLOCK_GROUP_TYPE_MASK)) { + btrfs_err(fs_info, +"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx", + found_key.objectid, + found_key.offset, flags, + (BTRFS_BLOCK_GROUP_TYPE_MASK & + em->map_lookup->type)); + ret = -EUCLEAN; + } else { + ret = 0; + } } free_extent_map(em); goto out; From cf968bbccba9860eef9480ae560711fbc5cbc1b5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Sep 2018 07:59:34 +0800 Subject: [PATCH 0742/2608] btrfs: tree-checker: Check level for leaves and nodes commit f556faa46eb4e96d0d0772e74ecf66781e132f72 upstream. Although we have tree level check at tree read runtime, it's completely based on its parent level. We still need to do accurate level check to avoid invalid tree blocks sneak into kernel space. The check itself is simple, for leaf its level should always be 0. For nodes its level should be in range [1, BTRFS_MAX_LEVEL - 1]. Signed-off-by: Qu Wenruo Reviewed-by: Su Yue Reviewed-by: David Sterba Signed-off-by: David Sterba [bwh: Backported to 4.14: - Pass root instead of fs_info to generic_err() - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/tree-checker.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 31756bac75b4..fa8f64119e6f 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -447,6 +447,13 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, u32 nritems = btrfs_header_nritems(leaf); int slot; + if (btrfs_header_level(leaf) != 0) { + generic_err(root, leaf, 0, + "invalid level for leaf, have %d expect 0", + btrfs_header_level(leaf)); + return -EUCLEAN; + } + /* * Extent buffers from a relocation tree have a owner field that * corresponds to the subvolume tree they are based on. So just from an @@ -589,9 +596,16 @@ int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) unsigned long nr = btrfs_header_nritems(node); struct btrfs_key key, next_key; int slot; + int level = btrfs_header_level(node); u64 bytenr; int ret = 0; + if (level <= 0 || level >= BTRFS_MAX_LEVEL) { + generic_err(root, node, 0, + "invalid level for node, have %d expect [1, %d]", + level, BTRFS_MAX_LEVEL - 1); + return -EUCLEAN; + } if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) { btrfs_crit(root->fs_info, "corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]", From 4b356df11ba2b44627e2fad7441e05abee46a628 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Mon, 5 Nov 2018 18:49:09 +0800 Subject: [PATCH 0743/2608] btrfs: tree-checker: Fix misleading group system information commit 761333f2f50ccc887aa9957ae829300262c0d15b upstream. block_group_err shows the group system as a decimal value with a '0x' prefix, which is somewhat misleading. Fix it to print hexadecimal, as was intended. Fixes: fce466eab7ac6 ("btrfs: tree-checker: Verify block_group_item") Reviewed-by: Nikolay Borisov Reviewed-by: Qu Wenruo Signed-off-by: Shaokun Zhang Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/tree-checker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index fa8f64119e6f..f206aec1525d 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -399,7 +399,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info, type != (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA)) { block_group_err(fs_info, leaf, slot, -"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx", +"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx", type, hweight64(type), BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA, BTRFS_BLOCK_GROUP_SYSTEM, From aec6ccb3dcc1ad9b8cfe12d086430ffcce887dda Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 8 Mar 2018 16:29:13 +0800 Subject: [PATCH 0744/2608] f2fs: check blkaddr more accuratly before issue a bio commit 0833721ec3658a4e9d5e58b6fa82cf9edc431e59 upstream. This patch check blkaddr more accuratly before issue a write or read bio. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 2 ++ fs/f2fs/data.c | 5 +++-- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.h | 25 +++++++++++++++++++------ 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 41fce930f44c..1bd4cd8c79c6 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -69,6 +69,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, + .is_meta = is_meta, }; if (unlikely(!is_meta)) @@ -163,6 +164,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, .in_list = false, + .is_meta = (type != META_POR), }; struct blk_plug plug; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6fbb6d75318a..5913de3f661d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -369,6 +369,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; + verify_block_addr(fio, fio->new_blkaddr); trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); @@ -413,8 +414,8 @@ next: } if (fio->old_blkaddr != NEW_ADDR) - verify_block_addr(sbi, fio->old_blkaddr); - verify_block_addr(sbi, fio->new_blkaddr); + verify_block_addr(fio, fio->old_blkaddr); + verify_block_addr(fio, fio->new_blkaddr); bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 54f8520ad7a2..aa7b033af1b0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -910,6 +910,7 @@ struct f2fs_io_info { bool submitted; /* indicate IO submission */ int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ + bool is_meta; /* indicate borrow meta inode mapping or not */ enum iostat_type io_type; /* io type */ }; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 4dfb5080098f..4b635e8c91b0 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -53,13 +53,19 @@ ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ (sbi)->segs_per_sec)) \ -#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr) -#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr) +#define MAIN_BLKADDR(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr)) +#define SEG0_BLKADDR(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->seg0_blkaddr : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment0_blkaddr)) #define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) #define MAIN_SECS(sbi) ((sbi)->total_sections) -#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count) +#define TOTAL_SEGS(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->segment_count : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count)) #define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg) #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) @@ -619,10 +625,17 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1); } -static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) +static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) { - BUG_ON(blk_addr < SEG0_BLKADDR(sbi) - || blk_addr >= MAX_BLKADDR(sbi)); + struct f2fs_sb_info *sbi = fio->sbi; + + if (PAGE_TYPE_OF_BIO(fio->type) == META && + (!is_read_io(fio->op) || fio->is_meta)) + BUG_ON(blk_addr < SEG0_BLKADDR(sbi) || + blk_addr >= MAIN_BLKADDR(sbi)); + else + BUG_ON(blk_addr < MAIN_BLKADDR(sbi) || + blk_addr >= MAX_BLKADDR(sbi)); } /* From a8f40be69f80c7c5f969ae5cd93497dc7a764358 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 24 Apr 2018 15:44:16 -0600 Subject: [PATCH 0745/2608] f2fs: sanity check on sit entry commit b2ca374f33bd33fd822eb871876e4888cf79dc97 upstream. syzbot hit the following crash on upstream commit 87ef12027b9b1dd0e0b12cf311fbcb19f9d92539 (Wed Apr 18 19:48:17 2018 +0000) Merge tag 'ceph-for-4.17-rc2' of git://github.com/ceph/ceph-client syzbot dashboard link: https://syzkaller.appspot.com/bug?extid=83699adeb2d13579c31e C reproducer: https://syzkaller.appspot.com/x/repro.c?id=5805208181407744 syzkaller reproducer: https://syzkaller.appspot.com/x/repro.syz?id=6005073343676416 Raw console output: https://syzkaller.appspot.com/x/log.txt?id=6555047731134464 Kernel config: https://syzkaller.appspot.com/x/.config?id=1808800213120130118 compiler: gcc (GCC) 8.0.1 20180413 (experimental) IMPORTANT: if you fix the bug, please add the following tag to the commit: Reported-by: syzbot+83699adeb2d13579c31e@syzkaller.appspotmail.com It will help syzbot understand when the bug is fixed. See footer for details. If you forward the report, please keep this part and the footer. F2FS-fs (loop0): Magic Mismatch, valid(0xf2f52010) - read(0x0) F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock F2FS-fs (loop0): invalid crc value BUG: unable to handle kernel paging request at ffffed006b2a50c0 PGD 21ffee067 P4D 21ffee067 PUD 21fbeb067 PMD 0 Oops: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 4514 Comm: syzkaller989480 Not tainted 4.17.0-rc1+ #8 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:build_sit_entries fs/f2fs/segment.c:3653 [inline] RIP: 0010:build_segment_manager+0x7ef7/0xbf70 fs/f2fs/segment.c:3852 RSP: 0018:ffff8801b102e5b0 EFLAGS: 00010a06 RAX: 1ffff1006b2a50c0 RBX: 0000000000000004 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8801ac74243e RBP: ffff8801b102f410 R08: ffff8801acbd46c0 R09: fffffbfff14d9af8 R10: fffffbfff14d9af8 R11: ffff8801acbd46c0 R12: ffff8801ac742a80 R13: ffff8801d9519100 R14: dffffc0000000000 R15: ffff880359528600 FS: 0000000001e04880(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffed006b2a50c0 CR3: 00000001ac6ac000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: f2fs_fill_super+0x4095/0x7bf0 fs/f2fs/super.c:2803 mount_bdev+0x30c/0x3e0 fs/super.c:1165 f2fs_mount+0x34/0x40 fs/f2fs/super.c:3020 mount_fs+0xae/0x328 fs/super.c:1268 vfs_kern_mount.part.34+0xd4/0x4d0 fs/namespace.c:1037 vfs_kern_mount fs/namespace.c:1027 [inline] do_new_mount fs/namespace.c:2517 [inline] do_mount+0x564/0x3070 fs/namespace.c:2847 ksys_mount+0x12d/0x140 fs/namespace.c:3063 __do_sys_mount fs/namespace.c:3077 [inline] __se_sys_mount fs/namespace.c:3074 [inline] __x64_sys_mount+0xbe/0x150 fs/namespace.c:3074 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x443d6a RSP: 002b:00007ffd312813c8 EFLAGS: 00000297 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 0000000020000c00 RCX: 0000000000443d6a RDX: 0000000020000000 RSI: 0000000020000100 RDI: 00007ffd312813d0 RBP: 0000000000000003 R08: 0000000020016a00 R09: 000000000000000a R10: 0000000000000000 R11: 0000000000000297 R12: 0000000000000004 R13: 0000000000402c60 R14: 0000000000000000 R15: 0000000000000000 RIP: build_sit_entries fs/f2fs/segment.c:3653 [inline] RSP: ffff8801b102e5b0 RIP: build_segment_manager+0x7ef7/0xbf70 fs/f2fs/segment.c:3852 RSP: ffff8801b102e5b0 CR2: ffffed006b2a50c0 ---[ end trace a2034989e196ff17 ]--- Reported-and-tested-by: syzbot+83699adeb2d13579c31e@syzkaller.appspotmail.com Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/segment.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3c7bbbae0afa..1104a6c80251 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3304,6 +3304,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) unsigned int old_valid_blocks; start = le32_to_cpu(segno_in_journal(journal, i)); + if (start >= MAIN_SEGS(sbi)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Wrong journal entry on segno %u", + start); + set_sbi_flag(sbi, SBI_NEED_FSCK); + err = -EINVAL; + break; + } + se = &sit_i->sentries[start]; sit = sit_in_journal(journal, i); From e60b97231950fb5b55d2a981b080e3a6f7516a00 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 27 Apr 2018 19:03:22 -0700 Subject: [PATCH 0746/2608] f2fs: enhance sanity_check_raw_super() to avoid potential overflow commit 0cfe75c5b011994651a4ca6d74f20aa997bfc69a upstream. In order to avoid the below overflow issue, we should have checked the boundaries in superblock before reaching out to allocation. As Linus suggested, the right place should be sanity_check_raw_super(). Dr Silvio Cesare of InfoSect reported: There are integer overflows with using the cp_payload superblock field in the f2fs filesystem potentially leading to memory corruption. include/linux/f2fs_fs.h struct f2fs_super_block { ... __le32 cp_payload; fs/f2fs/f2fs.h typedef u32 block_t; /* * should not change u32, since it is the on-disk block * address format, __le32. */ ... static inline block_t __cp_payload(struct f2fs_sb_info *sbi) { return le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); } fs/f2fs/checkpoint.c block_t start_blk, orphan_blocks, i, j; ... start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); +++ integer overflows ... unsigned int cp_blks = 1 + __cp_payload(sbi); ... sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL); +++ integer overflow leading to incorrect heap allocation. int cp_payload_blks = __cp_payload(sbi); ... ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); +++ sign bug and integer overflow ... for (i = 1; i < 1 + cp_payload_blks; i++) +++ integer overflow ... sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - NR_CURSEG_TYPE - __cp_payload(sbi)) * F2FS_ORPHANS_PER_BLOCK; +++ integer overflow Reported-by: Greg KH Reported-by: Silvio Cesare Suggested-by: Linus Torvalds Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim [bwh: Backported to 4.14: No hot file extension support] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/super.c | 71 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7cda685296b2..c3e1090e7c0a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1807,6 +1807,8 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, static int sanity_check_raw_super(struct f2fs_sb_info *sbi, struct buffer_head *bh) { + block_t segment_count, segs_per_sec, secs_per_zone; + block_t total_sections, blocks_per_seg; struct f2fs_super_block *raw_super = (struct f2fs_super_block *) (bh->b_data + F2FS_SUPER_OFFSET); struct super_block *sb = sbi->sb; @@ -1863,6 +1865,68 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return 1; } + segment_count = le32_to_cpu(raw_super->segment_count); + segs_per_sec = le32_to_cpu(raw_super->segs_per_sec); + secs_per_zone = le32_to_cpu(raw_super->secs_per_zone); + total_sections = le32_to_cpu(raw_super->section_count); + + /* blocks_per_seg should be 512, given the above check */ + blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg); + + if (segment_count > F2FS_MAX_SEGMENT || + segment_count < F2FS_MIN_SEGMENTS) { + f2fs_msg(sb, KERN_INFO, + "Invalid segment count (%u)", + segment_count); + return 1; + } + + if (total_sections > segment_count || + total_sections < F2FS_MIN_SEGMENTS || + segs_per_sec > segment_count || !segs_per_sec) { + f2fs_msg(sb, KERN_INFO, + "Invalid segment/section count (%u, %u x %u)", + segment_count, total_sections, segs_per_sec); + return 1; + } + + if ((segment_count / segs_per_sec) < total_sections) { + f2fs_msg(sb, KERN_INFO, + "Small segment_count (%u < %u * %u)", + segment_count, segs_per_sec, total_sections); + return 1; + } + + if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) { + f2fs_msg(sb, KERN_INFO, + "Wrong segment_count / block_count (%u > %u)", + segment_count, le32_to_cpu(raw_super->block_count)); + return 1; + } + + if (secs_per_zone > total_sections) { + f2fs_msg(sb, KERN_INFO, + "Wrong secs_per_zone (%u > %u)", + secs_per_zone, total_sections); + return 1; + } + if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION) { + f2fs_msg(sb, KERN_INFO, + "Corrupted extension count (%u > %u)", + le32_to_cpu(raw_super->extension_count), + F2FS_MAX_EXTENSION); + return 1; + } + + if (le32_to_cpu(raw_super->cp_payload) > + (blocks_per_seg - F2FS_CP_PACKS)) { + f2fs_msg(sb, KERN_INFO, + "Insane cp_payload (%u > %u)", + le32_to_cpu(raw_super->cp_payload), + blocks_per_seg - F2FS_CP_PACKS); + return 1; + } + /* check reserved ino info */ if (le32_to_cpu(raw_super->node_ino) != 1 || le32_to_cpu(raw_super->meta_ino) != 2 || @@ -1875,13 +1939,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return 1; } - if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) { - f2fs_msg(sb, KERN_INFO, - "Invalid segment count (%u)", - le32_to_cpu(raw_super->segment_count)); - return 1; - } - /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ if (sanity_check_area_boundary(sbi, bh)) return 1; From 9e6c4a8557e27c4f074a47e27ce981edfb1f8b91 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 23 May 2018 22:25:08 +0800 Subject: [PATCH 0747/2608] f2fs: clean up with is_valid_blkaddr() commit 7b525dd01365c6764018e374d391c92466be1b7a upstream. - rename is_valid_blkaddr() to is_valid_meta_blkaddr() for readability. - introduce is_valid_blkaddr() for cleanup. No logic change in this patch. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/data.c | 18 +++++------------- fs/f2fs/f2fs.h | 9 ++++++++- fs/f2fs/file.c | 2 +- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 5 ++--- fs/f2fs/recovery.c | 6 +++--- fs/f2fs/segment.c | 4 ++-- fs/f2fs/segment.h | 2 +- 9 files changed, 25 insertions(+), 27 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 1bd4cd8c79c6..2b951046657e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -118,7 +118,7 @@ struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) return __get_meta_page(sbi, index, false); } -bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) +bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { switch (type) { case META_NAT: @@ -174,7 +174,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, blk_start_plug(&plug); for (; nrpages-- > 0; blkno++) { - if (!is_valid_blkaddr(sbi, blkno, type)) + if (!is_valid_meta_blkaddr(sbi, blkno, type)) goto out; switch (type) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5913de3f661d..f7b2909e9c5f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -413,7 +413,7 @@ next: spin_unlock(&io->io_lock); } - if (fio->old_blkaddr != NEW_ADDR) + if (is_valid_blkaddr(fio->old_blkaddr)) verify_block_addr(fio, fio->old_blkaddr); verify_block_addr(fio, fio->new_blkaddr); @@ -946,7 +946,7 @@ next_dnode: next_block: blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) { + if (!is_valid_blkaddr(blkaddr)) { if (create) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -1388,15 +1388,6 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio) return need_inplace_update_policy(inode, fio); } -static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio) -{ - if (fio->old_blkaddr == NEW_ADDR) - return false; - if (fio->old_blkaddr == NULL_ADDR) - return false; - return true; -} - int do_write_data_page(struct f2fs_io_info *fio) { struct page *page = fio->page; @@ -1411,7 +1402,7 @@ int do_write_data_page(struct f2fs_io_info *fio) f2fs_lookup_extent_cache(inode, page->index, &ei)) { fio->old_blkaddr = ei.blk + page->index - ei.fofs; - if (valid_ipu_blkaddr(fio)) { + if (is_valid_blkaddr(fio->old_blkaddr)) { ipu_force = true; fio->need_lock = LOCK_DONE; goto got_it; @@ -1438,7 +1429,8 @@ got_it: * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) { + if (ipu_force || (is_valid_blkaddr(fio->old_blkaddr) && + need_inplace_update(fio))) { err = encrypt_one_page(fio); if (err) goto out_writepage; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index aa7b033af1b0..d74c77b51b71 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2355,6 +2355,13 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, spin_unlock(&sbi->iostat_lock); } +static inline bool is_valid_blkaddr(block_t blkaddr) +{ + if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) + return false; + return true; +} + /* * file.c */ @@ -2565,7 +2572,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io); struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); -bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); +bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6f589730782d..d368eda462bb 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -334,7 +334,7 @@ static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs, switch (whence) { case SEEK_DATA: if ((blkaddr == NEW_ADDR && dirty == pgofs) || - (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR)) + is_valid_blkaddr(blkaddr)) return true; break; case SEEK_HOLE: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 259b0aa283f0..2bcbbf566f0c 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -66,7 +66,7 @@ static bool __written_first_block(struct f2fs_inode *ri) { block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]); - if (addr != NEW_ADDR && addr != NULL_ADDR) + if (is_valid_blkaddr(addr)) return true; return false; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 712505ec5de4..5f212fb2d62b 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -334,8 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, new_blkaddr == NULL_ADDR); f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && new_blkaddr == NEW_ADDR); - f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR && - nat_get_blkaddr(e) != NULL_ADDR && + f2fs_bug_on(sbi, is_valid_blkaddr(nat_get_blkaddr(e)) && new_blkaddr == NEW_ADDR); /* increment version no as node is removed */ @@ -350,7 +349,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, /* change address */ nat_set_blkaddr(e, new_blkaddr); - if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR) + if (!is_valid_blkaddr(new_blkaddr)) set_nat_flag(e, IS_CHECKPOINTED, false); __set_nat_cache_dirty(nm_i, e); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 765fadf954af..53f41ad4cbe1 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -236,7 +236,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, while (1) { struct fsync_inode_entry *entry; - if (!is_valid_blkaddr(sbi, blkaddr, META_POR)) + if (!is_valid_meta_blkaddr(sbi, blkaddr, META_POR)) return 0; page = get_tmp_page(sbi, blkaddr); @@ -479,7 +479,7 @@ retry_dn: } /* dest is valid block, try to recover from src to dest */ - if (is_valid_blkaddr(sbi, dest, META_POR)) { + if (is_valid_meta_blkaddr(sbi, dest, META_POR)) { if (src == NULL_ADDR) { err = reserve_new_block(&dn); @@ -540,7 +540,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, while (1) { struct fsync_inode_entry *entry; - if (!is_valid_blkaddr(sbi, blkaddr, META_POR)) + if (!is_valid_meta_blkaddr(sbi, blkaddr, META_POR)) break; ra_meta_pages_cond(sbi, blkaddr); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1104a6c80251..483d7a869679 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1758,7 +1758,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) struct seg_entry *se; bool is_cp = false; - if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) + if (!is_valid_blkaddr(blkaddr)) return true; mutex_lock(&sit_i->sentry_lock); @@ -2571,7 +2571,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) { struct page *cpage; - if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) + if (!is_valid_blkaddr(blkaddr)) return; cpage = find_lock_page(META_MAPPING(sbi), blkaddr); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 4b635e8c91b0..f977774338c3 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -85,7 +85,7 @@ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1)) #define GET_SEGNO(sbi, blk_addr) \ - ((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \ + ((!is_valid_blkaddr(blk_addr)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) #define BLKS_PER_SEC(sbi) \ From eea715704673314769f2613b1ead310306b7de05 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 5 Jun 2018 17:44:11 +0800 Subject: [PATCH 0748/2608] f2fs: introduce and spread verify_blkaddr commit e1da7872f6eda977bd812346bf588c35e4495a1e upstream. This patch introduces verify_blkaddr to check meta/data block address with valid range to detect bug earlier. In addition, once we encounter an invalid blkaddr, notice user to run fsck to fix, and let the kernel panic. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim [bwh: Backported to 4.14: I skipped an earlier renaming of is_valid_meta_blkaddr() to f2fs_is_valid_meta_blkaddr()] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 11 +++++++++-- fs/f2fs/data.c | 8 ++++---- fs/f2fs/f2fs.h | 32 +++++++++++++++++++++++++++++--- fs/f2fs/file.c | 9 +++++---- fs/f2fs/inode.c | 7 ++++--- fs/f2fs/node.c | 4 ++-- fs/f2fs/recovery.c | 6 +++--- fs/f2fs/segment.c | 4 ++-- fs/f2fs/segment.h | 8 +++----- 9 files changed, 61 insertions(+), 28 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2b951046657e..d7bd9745e883 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -118,7 +118,8 @@ struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) return __get_meta_page(sbi, index, false); } -bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) { switch (type) { case META_NAT: @@ -138,10 +139,16 @@ bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) return false; break; case META_POR: + case DATA_GENERIC: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || blkaddr < MAIN_BLKADDR(sbi))) return false; break; + case META_GENERIC: + if (unlikely(blkaddr < SEG0_BLKADDR(sbi) || + blkaddr >= MAIN_BLKADDR(sbi))) + return false; + break; default: BUG(); } @@ -174,7 +181,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, blk_start_plug(&plug); for (; nrpages-- > 0; blkno++) { - if (!is_valid_meta_blkaddr(sbi, blkno, type)) + if (!f2fs_is_valid_blkaddr(sbi, blkno, type)) goto out; switch (type) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f7b2909e9c5f..615878806611 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -413,7 +413,7 @@ next: spin_unlock(&io->io_lock); } - if (is_valid_blkaddr(fio->old_blkaddr)) + if (__is_valid_data_blkaddr(fio->old_blkaddr)) verify_block_addr(fio, fio->old_blkaddr); verify_block_addr(fio, fio->new_blkaddr); @@ -946,7 +946,7 @@ next_dnode: next_block: blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - if (!is_valid_blkaddr(blkaddr)) { + if (!is_valid_data_blkaddr(sbi, blkaddr)) { if (create) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -1402,7 +1402,7 @@ int do_write_data_page(struct f2fs_io_info *fio) f2fs_lookup_extent_cache(inode, page->index, &ei)) { fio->old_blkaddr = ei.blk + page->index - ei.fofs; - if (is_valid_blkaddr(fio->old_blkaddr)) { + if (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr)) { ipu_force = true; fio->need_lock = LOCK_DONE; goto got_it; @@ -1429,7 +1429,7 @@ got_it: * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (ipu_force || (is_valid_blkaddr(fio->old_blkaddr) && + if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) && need_inplace_update(fio))) { err = encrypt_one_page(fio); if (err) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d74c77b51b71..d15d79457f5c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -162,7 +162,7 @@ struct cp_control { }; /* - * For CP/NAT/SIT/SSA readahead + * indicate meta/data type */ enum { META_CP, @@ -170,6 +170,8 @@ enum { META_SIT, META_SSA, META_POR, + DATA_GENERIC, + META_GENERIC, }; /* for the list of ino */ @@ -2355,13 +2357,36 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, spin_unlock(&sbi->iostat_lock); } -static inline bool is_valid_blkaddr(block_t blkaddr) +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type); +void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); +static inline void verify_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) +{ + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) { + f2fs_msg(sbi->sb, KERN_ERR, + "invalid blkaddr: %u, type: %d, run fsck to fix.", + blkaddr, type); + f2fs_bug_on(sbi, 1); + } +} + +static inline bool __is_valid_data_blkaddr(block_t blkaddr) { if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) return false; return true; } +static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr) +{ + if (!__is_valid_data_blkaddr(blkaddr)) + return false; + verify_blkaddr(sbi, blkaddr, DATA_GENERIC); + return true; +} + /* * file.c */ @@ -2572,7 +2597,8 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io); struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); -bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type); int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d368eda462bb..e9c575ef70b5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -328,13 +328,13 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping, return pgofs; } -static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs, - int whence) +static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr, + pgoff_t dirty, pgoff_t pgofs, int whence) { switch (whence) { case SEEK_DATA: if ((blkaddr == NEW_ADDR && dirty == pgofs) || - is_valid_blkaddr(blkaddr)) + is_valid_data_blkaddr(sbi, blkaddr)) return true; break; case SEEK_HOLE: @@ -397,7 +397,8 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - if (__found_offset(blkaddr, dirty, pgofs, whence)) { + if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty, + pgofs, whence)) { f2fs_put_dnode(&dn); goto found; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2bcbbf566f0c..b1c0eb433841 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -62,11 +62,12 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } -static bool __written_first_block(struct f2fs_inode *ri) +static bool __written_first_block(struct f2fs_sb_info *sbi, + struct f2fs_inode *ri) { block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]); - if (is_valid_blkaddr(addr)) + if (is_valid_data_blkaddr(sbi, addr)) return true; return false; } @@ -235,7 +236,7 @@ static int do_read_inode(struct inode *inode) /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); - if (__written_first_block(ri)) + if (__written_first_block(sbi, ri)) set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); if (!need_inode_block_update(sbi, inode->i_ino)) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 5f212fb2d62b..999814c8cbea 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -334,7 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, new_blkaddr == NULL_ADDR); f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && new_blkaddr == NEW_ADDR); - f2fs_bug_on(sbi, is_valid_blkaddr(nat_get_blkaddr(e)) && + f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) && new_blkaddr == NEW_ADDR); /* increment version no as node is removed */ @@ -349,7 +349,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, /* change address */ nat_set_blkaddr(e, new_blkaddr); - if (!is_valid_blkaddr(new_blkaddr)) + if (!is_valid_data_blkaddr(sbi, new_blkaddr)) set_nat_flag(e, IS_CHECKPOINTED, false); __set_nat_cache_dirty(nm_i, e); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 53f41ad4cbe1..6ea445377767 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -236,7 +236,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, while (1) { struct fsync_inode_entry *entry; - if (!is_valid_meta_blkaddr(sbi, blkaddr, META_POR)) + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) return 0; page = get_tmp_page(sbi, blkaddr); @@ -479,7 +479,7 @@ retry_dn: } /* dest is valid block, try to recover from src to dest */ - if (is_valid_meta_blkaddr(sbi, dest, META_POR)) { + if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { if (src == NULL_ADDR) { err = reserve_new_block(&dn); @@ -540,7 +540,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, while (1) { struct fsync_inode_entry *entry; - if (!is_valid_meta_blkaddr(sbi, blkaddr, META_POR)) + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) break; ra_meta_pages_cond(sbi, blkaddr); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 483d7a869679..5c698757e116 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1758,7 +1758,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) struct seg_entry *se; bool is_cp = false; - if (!is_valid_blkaddr(blkaddr)) + if (!is_valid_data_blkaddr(sbi, blkaddr)) return true; mutex_lock(&sit_i->sentry_lock); @@ -2571,7 +2571,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) { struct page *cpage; - if (!is_valid_blkaddr(blkaddr)) + if (!is_valid_data_blkaddr(sbi, blkaddr)) return; cpage = find_lock_page(META_MAPPING(sbi), blkaddr); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index f977774338c3..dd8f977fc273 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -85,7 +85,7 @@ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1)) #define GET_SEGNO(sbi, blk_addr) \ - ((!is_valid_blkaddr(blk_addr)) ? \ + ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) #define BLKS_PER_SEC(sbi) \ @@ -631,11 +631,9 @@ static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) if (PAGE_TYPE_OF_BIO(fio->type) == META && (!is_read_io(fio->op) || fio->is_meta)) - BUG_ON(blk_addr < SEG0_BLKADDR(sbi) || - blk_addr >= MAIN_BLKADDR(sbi)); + verify_blkaddr(sbi, blk_addr, META_GENERIC); else - BUG_ON(blk_addr < MAIN_BLKADDR(sbi) || - blk_addr >= MAX_BLKADDR(sbi)); + verify_blkaddr(sbi, blk_addr, DATA_GENERIC); } /* From f3d6361a96a455c8ba12226a04efa67a0ada4966 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 23 Jun 2018 00:12:36 +0800 Subject: [PATCH 0749/2608] f2fs: fix to do sanity check with secs_per_zone commit 42bf546c1fe3f3654bdf914e977acbc2b80a5be5 upstream. As Wen Xu reported in below link: https://bugzilla.kernel.org/show_bug.cgi?id=200183 - Overview Divide zero in reset_curseg() when mounting a crafted f2fs image - Reproduce - Kernel message [ 588.281510] divide error: 0000 [#1] SMP KASAN PTI [ 588.282701] CPU: 0 PID: 1293 Comm: mount Not tainted 4.18.0-rc1+ #4 [ 588.284000] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 588.286178] RIP: 0010:reset_curseg+0x94/0x1a0 [ 588.298166] RSP: 0018:ffff8801e88d7940 EFLAGS: 00010246 [ 588.299360] RAX: 0000000000000014 RBX: ffff8801e1d46d00 RCX: ffffffffb88bf60b [ 588.300809] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e1d46d64 [ 588.305272] R13: 0000000000000000 R14: 0000000000000014 R15: 0000000000000000 [ 588.306822] FS: 00007fad85008840(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 588.308456] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 588.309623] CR2: 0000000001705078 CR3: 00000001f30f8000 CR4: 00000000000006f0 [ 588.311085] Call Trace: [ 588.311637] f2fs_build_segment_manager+0x103f/0x3410 [ 588.316136] ? f2fs_commit_super+0x1b0/0x1b0 [ 588.317031] ? set_blocksize+0x90/0x140 [ 588.319473] f2fs_mount+0x15/0x20 [ 588.320166] mount_fs+0x60/0x1a0 [ 588.320847] ? alloc_vfsmnt+0x309/0x360 [ 588.321647] vfs_kern_mount+0x6b/0x1a0 [ 588.322432] do_mount+0x34a/0x18c0 [ 588.323175] ? strndup_user+0x46/0x70 [ 588.323937] ? copy_mount_string+0x20/0x20 [ 588.324793] ? memcg_kmem_put_cache+0x1b/0xa0 [ 588.325702] ? kasan_check_write+0x14/0x20 [ 588.326562] ? _copy_from_user+0x6a/0x90 [ 588.327375] ? memdup_user+0x42/0x60 [ 588.328118] ksys_mount+0x83/0xd0 [ 588.328808] __x64_sys_mount+0x67/0x80 [ 588.329607] do_syscall_64+0x78/0x170 [ 588.330400] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 588.331461] RIP: 0033:0x7fad848e8b9a [ 588.336022] RSP: 002b:00007ffd7c5b6be8 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 588.337547] RAX: ffffffffffffffda RBX: 00000000016f8030 RCX: 00007fad848e8b9a [ 588.338999] RDX: 00000000016f8210 RSI: 00000000016f9f30 RDI: 0000000001700ec0 [ 588.340442] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 588.341887] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001700ec0 [ 588.343341] R13: 00000000016f8210 R14: 0000000000000000 R15: 0000000000000003 [ 588.354891] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 588.355862] RIP: 0010:reset_curseg+0x94/0x1a0 [ 588.360742] RSP: 0018:ffff8801e88d7940 EFLAGS: 00010246 [ 588.361812] RAX: 0000000000000014 RBX: ffff8801e1d46d00 RCX: ffffffffb88bf60b [ 588.363485] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e1d46d64 [ 588.365213] RBP: ffff8801e88d7968 R08: ffffed003c32266f R09: ffffed003c32266f [ 588.366661] R10: 0000000000000001 R11: ffffed003c32266e R12: ffff8801f0337700 [ 588.368110] R13: 0000000000000000 R14: 0000000000000014 R15: 0000000000000000 [ 588.370057] FS: 00007fad85008840(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 588.372099] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 588.373291] CR2: 0000000001705078 CR3: 00000001f30f8000 CR4: 00000000000006f0 - Location https://elixir.bootlin.com/linux/latest/source/fs/f2fs/segment.c#L2147 curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); If secs_per_zone is corrupted due to fuzzing test, it will cause divide zero operation when using GET_ZONE_FROM_SEG macro, so we should do more sanity check with secs_per_zone during mount to avoid this issue. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c3e1090e7c0a..084ff9c82a37 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1904,9 +1904,9 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return 1; } - if (secs_per_zone > total_sections) { + if (secs_per_zone > total_sections || !secs_per_zone) { f2fs_msg(sb, KERN_INFO, - "Wrong secs_per_zone (%u > %u)", + "Wrong secs_per_zone / total_sections (%u, %u)", secs_per_zone, total_sections); return 1; } From 2598fc56ed65f69e303ccee6d5fe756a56f87779 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 29 Nov 2018 19:17:34 +0000 Subject: [PATCH 0750/2608] f2fs: Add sanity_check_inode() function This was done as part of commit 5d64600d4f33 "f2fs: avoid bug_on on corrupted inode" upstream, but the specific check that commit added is not applicable to 4.14. Cc: Jaegeuk Kim Cc: Chao Yu Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/inode.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index b1c0eb433841..917a6c3d5649 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -180,6 +180,13 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page)); } +static bool sanity_check_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + return true; +} + static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -281,6 +288,10 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) ret = do_read_inode(inode); if (ret) goto bad_inode; + if (!sanity_check_inode(inode)) { + ret = -EINVAL; + goto bad_inode; + } make_now: if (ino == F2FS_NODE_INO(sbi)) { inode->i_mapping->a_ops = &f2fs_node_aops; From 0081c90ebacebb3a82d0d24bf0f42273ce2d902e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 25 Jun 2018 23:29:49 +0800 Subject: [PATCH 0751/2608] f2fs: fix to do sanity check with extra_attr feature commit 76d56d4ab4f2a9e4f085c7d77172194ddaccf7d2 upstream. If FI_EXTRA_ATTR is set in inode by fuzzing, inode.i_addr[0] will be parsed as inode.i_extra_isize, then in __recover_inline_status, inline data address will beyond boundary of page, result in accessing invalid memory. So in this condition, during reading inode page, let's do sanity check with EXTRA_ATTR feature of fs and extra_attr bit of inode, if they're inconsistent, deny to load this inode. - Overview Out-of-bound access in f2fs_iget() when mounting a corrupted f2fs image - Reproduce The following message will be got in KASAN build of 4.18 upstream kernel. [ 819.392227] ================================================================== [ 819.393901] BUG: KASAN: slab-out-of-bounds in f2fs_iget+0x736/0x1530 [ 819.395329] Read of size 4 at addr ffff8801f099c968 by task mount/1292 [ 819.397079] CPU: 1 PID: 1292 Comm: mount Not tainted 4.18.0-rc1+ #4 [ 819.397082] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 819.397088] Call Trace: [ 819.397124] dump_stack+0x7b/0xb5 [ 819.397154] print_address_description+0x70/0x290 [ 819.397159] kasan_report+0x291/0x390 [ 819.397163] ? f2fs_iget+0x736/0x1530 [ 819.397176] check_memory_region+0x139/0x190 [ 819.397182] __asan_loadN+0xf/0x20 [ 819.397185] f2fs_iget+0x736/0x1530 [ 819.397197] f2fs_fill_super+0x1b4f/0x2b40 [ 819.397202] ? f2fs_fill_super+0x1b4f/0x2b40 [ 819.397208] ? f2fs_commit_super+0x1b0/0x1b0 [ 819.397227] ? set_blocksize+0x90/0x140 [ 819.397241] mount_bdev+0x1c5/0x210 [ 819.397245] ? f2fs_commit_super+0x1b0/0x1b0 [ 819.397252] f2fs_mount+0x15/0x20 [ 819.397256] mount_fs+0x60/0x1a0 [ 819.397267] ? alloc_vfsmnt+0x309/0x360 [ 819.397272] vfs_kern_mount+0x6b/0x1a0 [ 819.397282] do_mount+0x34a/0x18c0 [ 819.397300] ? lockref_put_or_lock+0xcf/0x160 [ 819.397306] ? copy_mount_string+0x20/0x20 [ 819.397318] ? memcg_kmem_put_cache+0x1b/0xa0 [ 819.397324] ? kasan_check_write+0x14/0x20 [ 819.397334] ? _copy_from_user+0x6a/0x90 [ 819.397353] ? memdup_user+0x42/0x60 [ 819.397359] ksys_mount+0x83/0xd0 [ 819.397365] __x64_sys_mount+0x67/0x80 [ 819.397388] do_syscall_64+0x78/0x170 [ 819.397403] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 819.397422] RIP: 0033:0x7f54c667cb9a [ 819.397424] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 [ 819.397483] RSP: 002b:00007ffd8f46cd08 EFLAGS: 00000202 ORIG_RAX: 00000000000000a5 [ 819.397496] RAX: ffffffffffffffda RBX: 0000000000dfa030 RCX: 00007f54c667cb9a [ 819.397498] RDX: 0000000000dfa210 RSI: 0000000000dfbf30 RDI: 0000000000e02ec0 [ 819.397501] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 819.397503] R10: 00000000c0ed0000 R11: 0000000000000202 R12: 0000000000e02ec0 [ 819.397505] R13: 0000000000dfa210 R14: 0000000000000000 R15: 0000000000000003 [ 819.397866] Allocated by task 139: [ 819.398702] save_stack+0x46/0xd0 [ 819.398705] kasan_kmalloc+0xad/0xe0 [ 819.398709] kasan_slab_alloc+0x11/0x20 [ 819.398713] kmem_cache_alloc+0xd1/0x1e0 [ 819.398717] dup_fd+0x50/0x4c0 [ 819.398740] copy_process.part.37+0xbed/0x32e0 [ 819.398744] _do_fork+0x16e/0x590 [ 819.398748] __x64_sys_clone+0x69/0x80 [ 819.398752] do_syscall_64+0x78/0x170 [ 819.398756] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 819.399097] Freed by task 159: [ 819.399743] save_stack+0x46/0xd0 [ 819.399747] __kasan_slab_free+0x13c/0x1a0 [ 819.399750] kasan_slab_free+0xe/0x10 [ 819.399754] kmem_cache_free+0x89/0x1e0 [ 819.399757] put_files_struct+0x132/0x150 [ 819.399761] exit_files+0x62/0x70 [ 819.399766] do_exit+0x47b/0x1390 [ 819.399770] do_group_exit+0x86/0x130 [ 819.399774] __x64_sys_exit_group+0x2c/0x30 [ 819.399778] do_syscall_64+0x78/0x170 [ 819.399782] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 819.400115] The buggy address belongs to the object at ffff8801f099c680 which belongs to the cache files_cache of size 704 [ 819.403234] The buggy address is located 40 bytes to the right of 704-byte region [ffff8801f099c680, ffff8801f099c940) [ 819.405689] The buggy address belongs to the page: [ 819.406709] page:ffffea0007c26700 count:1 mapcount:0 mapping:ffff8801f69a3340 index:0xffff8801f099d380 compound_mapcount: 0 [ 819.408984] flags: 0x2ffff0000008100(slab|head) [ 819.409932] raw: 02ffff0000008100 ffffea00077fb600 0000000200000002 ffff8801f69a3340 [ 819.411514] raw: ffff8801f099d380 0000000080130000 00000001ffffffff 0000000000000000 [ 819.413073] page dumped because: kasan: bad access detected [ 819.414539] Memory state around the buggy address: [ 819.415521] ffff8801f099c800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 819.416981] ffff8801f099c880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 819.418454] >ffff8801f099c900: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 819.419921] ^ [ 819.421265] ffff8801f099c980: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb [ 819.422745] ffff8801f099ca00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 819.424206] ================================================================== [ 819.425668] Disabling lock debugging due to kernel taint [ 819.457463] F2FS-fs (loop0): Mounted with checkpoint version = 3 The kernel still mounts the image. If you run the following program on the mounted folder mnt, (poc.c) static void activity(char *mpoint) { char *foo_bar_baz; int err; static int buf[8192]; memset(buf, 0, sizeof(buf)); err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint); int fd = open(foo_bar_baz, O_RDONLY, 0); if (fd >= 0) { read(fd, (char *)buf, 11); close(fd); } } int main(int argc, char *argv[]) { activity(argv[1]); return 0; } You can get kernel crash: [ 819.457463] F2FS-fs (loop0): Mounted with checkpoint version = 3 [ 918.028501] BUG: unable to handle kernel paging request at ffffed0048000d82 [ 918.044020] PGD 23ffee067 P4D 23ffee067 PUD 23fbef067 PMD 0 [ 918.045207] Oops: 0000 [#1] SMP KASAN PTI [ 918.046048] CPU: 0 PID: 1309 Comm: poc Tainted: G B 4.18.0-rc1+ #4 [ 918.047573] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 918.049552] RIP: 0010:check_memory_region+0x5e/0x190 [ 918.050565] Code: f8 49 c1 e8 03 49 89 db 49 c1 eb 03 4d 01 cb 4d 01 c1 4d 8d 63 01 4c 89 c8 4d 89 e2 4d 29 ca 49 83 fa 10 7f 3d 4d 85 d2 74 32 <41> 80 39 00 75 23 48 b8 01 00 00 00 00 fc ff df 4d 01 d1 49 01 c0 [ 918.054322] RSP: 0018:ffff8801e3a1f258 EFLAGS: 00010202 [ 918.055400] RAX: ffffed0048000d82 RBX: ffff880240006c11 RCX: ffffffffb8867d14 [ 918.056832] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff880240006c10 [ 918.058253] RBP: ffff8801e3a1f268 R08: 1ffff10048000d82 R09: ffffed0048000d82 [ 918.059717] R10: 0000000000000001 R11: ffffed0048000d82 R12: ffffed0048000d83 [ 918.061159] R13: ffff8801e3a1f390 R14: 0000000000000000 R15: ffff880240006c08 [ 918.062614] FS: 00007fac9732c700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 918.064246] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 918.065412] CR2: ffffed0048000d82 CR3: 00000001df77a000 CR4: 00000000000006f0 [ 918.066882] Call Trace: [ 918.067410] __asan_loadN+0xf/0x20 [ 918.068149] f2fs_find_target_dentry+0xf4/0x270 [ 918.069083] ? __get_node_page+0x331/0x5b0 [ 918.069925] f2fs_find_in_inline_dir+0x24b/0x310 [ 918.070881] ? f2fs_recover_inline_data+0x4c0/0x4c0 [ 918.071905] ? unwind_next_frame.part.5+0x34f/0x490 [ 918.072901] ? unwind_dump+0x290/0x290 [ 918.073695] ? is_bpf_text_address+0xe/0x20 [ 918.074566] __f2fs_find_entry+0x599/0x670 [ 918.075408] ? kasan_unpoison_shadow+0x36/0x50 [ 918.076315] ? kasan_kmalloc+0xad/0xe0 [ 918.077100] ? memcg_kmem_put_cache+0x55/0xa0 [ 918.077998] ? f2fs_find_target_dentry+0x270/0x270 [ 918.079006] ? d_set_d_op+0x30/0x100 [ 918.079749] ? __d_lookup_rcu+0x69/0x2e0 [ 918.080556] ? __d_alloc+0x275/0x450 [ 918.081297] ? kasan_check_write+0x14/0x20 [ 918.082135] ? memset+0x31/0x40 [ 918.082820] ? fscrypt_setup_filename+0x1ec/0x4c0 [ 918.083782] ? d_alloc_parallel+0x5bb/0x8c0 [ 918.084640] f2fs_find_entry+0xe9/0x110 [ 918.085432] ? __f2fs_find_entry+0x670/0x670 [ 918.086308] ? kasan_check_write+0x14/0x20 [ 918.087163] f2fs_lookup+0x297/0x590 [ 918.087902] ? f2fs_link+0x2b0/0x2b0 [ 918.088646] ? legitimize_path.isra.29+0x61/0xa0 [ 918.089589] __lookup_slow+0x12e/0x240 [ 918.090371] ? may_delete+0x2b0/0x2b0 [ 918.091123] ? __nd_alloc_stack+0xa0/0xa0 [ 918.091944] lookup_slow+0x44/0x60 [ 918.092642] walk_component+0x3ee/0xa40 [ 918.093428] ? is_bpf_text_address+0xe/0x20 [ 918.094283] ? pick_link+0x3e0/0x3e0 [ 918.095047] ? in_group_p+0xa5/0xe0 [ 918.095771] ? generic_permission+0x53/0x1e0 [ 918.096666] ? security_inode_permission+0x1d/0x70 [ 918.097646] ? inode_permission+0x7a/0x1f0 [ 918.098497] link_path_walk+0x2a2/0x7b0 [ 918.099298] ? apparmor_capget+0x3d0/0x3d0 [ 918.100140] ? walk_component+0xa40/0xa40 [ 918.100958] ? path_init+0x2e6/0x580 [ 918.101695] path_openat+0x1bb/0x2160 [ 918.102471] ? __save_stack_trace+0x92/0x100 [ 918.103352] ? save_stack+0xb5/0xd0 [ 918.104070] ? vfs_unlink+0x250/0x250 [ 918.104822] ? save_stack+0x46/0xd0 [ 918.105538] ? kasan_slab_alloc+0x11/0x20 [ 918.106370] ? kmem_cache_alloc+0xd1/0x1e0 [ 918.107213] ? getname_flags+0x76/0x2c0 [ 918.107997] ? getname+0x12/0x20 [ 918.108677] ? do_sys_open+0x14b/0x2c0 [ 918.109450] ? __x64_sys_open+0x4c/0x60 [ 918.110255] ? do_syscall_64+0x78/0x170 [ 918.111083] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 918.112148] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 918.113204] ? f2fs_empty_inline_dir+0x1e0/0x1e0 [ 918.114150] ? timespec64_trunc+0x5c/0x90 [ 918.114993] ? wb_io_lists_depopulated+0x1a/0xc0 [ 918.115937] ? inode_io_list_move_locked+0x102/0x110 [ 918.116949] do_filp_open+0x12b/0x1d0 [ 918.117709] ? may_open_dev+0x50/0x50 [ 918.118475] ? kasan_kmalloc+0xad/0xe0 [ 918.119246] do_sys_open+0x17c/0x2c0 [ 918.119983] ? do_sys_open+0x17c/0x2c0 [ 918.120751] ? filp_open+0x60/0x60 [ 918.121463] ? task_work_run+0x4d/0xf0 [ 918.122237] __x64_sys_open+0x4c/0x60 [ 918.123001] do_syscall_64+0x78/0x170 [ 918.123759] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 918.124802] RIP: 0033:0x7fac96e3e040 [ 918.125537] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 09 27 2d 00 00 75 10 b8 02 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 7e e0 01 00 48 89 04 24 [ 918.129341] RSP: 002b:00007fff1b37f848 EFLAGS: 00000246 ORIG_RAX: 0000000000000002 [ 918.130870] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fac96e3e040 [ 918.132295] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000000122d080 [ 918.133748] RBP: 00007fff1b37f9b0 R08: 00007fac9710bbd8 R09: 0000000000000001 [ 918.135209] R10: 000000000000069d R11: 0000000000000246 R12: 0000000000400c20 [ 918.136650] R13: 00007fff1b37fab0 R14: 0000000000000000 R15: 0000000000000000 [ 918.138093] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 918.147924] CR2: ffffed0048000d82 [ 918.148619] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 918.149563] RIP: 0010:check_memory_region+0x5e/0x190 [ 918.150576] Code: f8 49 c1 e8 03 49 89 db 49 c1 eb 03 4d 01 cb 4d 01 c1 4d 8d 63 01 4c 89 c8 4d 89 e2 4d 29 ca 49 83 fa 10 7f 3d 4d 85 d2 74 32 <41> 80 39 00 75 23 48 b8 01 00 00 00 00 fc ff df 4d 01 d1 49 01 c0 [ 918.154360] RSP: 0018:ffff8801e3a1f258 EFLAGS: 00010202 [ 918.155411] RAX: ffffed0048000d82 RBX: ffff880240006c11 RCX: ffffffffb8867d14 [ 918.156833] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff880240006c10 [ 918.158257] RBP: ffff8801e3a1f268 R08: 1ffff10048000d82 R09: ffffed0048000d82 [ 918.159722] R10: 0000000000000001 R11: ffffed0048000d82 R12: ffffed0048000d83 [ 918.161149] R13: ffff8801e3a1f390 R14: 0000000000000000 R15: ffff880240006c08 [ 918.162587] FS: 00007fac9732c700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 918.164203] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 918.165356] CR2: ffffed0048000d82 CR3: 00000001df77a000 CR4: 00000000000006f0 Reported-by: Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim [bwh: Backported to 4.14: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/inode.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 917a6c3d5649..428d502f23e8 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -184,6 +184,15 @@ static bool sanity_check_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + if (f2fs_has_extra_attr(inode) && + !f2fs_sb_has_extra_attr(sbi->sb)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: inode (ino=%lx) is with extra_attr, " + "but extra_attr feature is off", + __func__, inode->i_ino); + return false; + } return true; } @@ -233,6 +242,11 @@ static int do_read_inode(struct inode *inode) get_inline_info(inode, ri); + if (!sanity_check_inode(inode)) { + f2fs_put_page(node_page, 1); + return -EINVAL; + } + fi->i_extra_isize = f2fs_has_extra_attr(inode) ? le16_to_cpu(ri->i_extra_isize) : 0; @@ -288,10 +302,6 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) ret = do_read_inode(inode); if (ret) goto bad_inode; - if (!sanity_check_inode(inode)) { - ret = -EINVAL; - goto bad_inode; - } make_now: if (ino == F2FS_NODE_INO(sbi)) { inode->i_mapping->a_ops = &f2fs_node_aops; From f9cf5462b51d98026275cc51437fc531e808b64a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 27 Jun 2018 18:05:54 +0800 Subject: [PATCH 0752/2608] f2fs: fix to do sanity check with user_block_count commit 9dc956b2c8523aed39d1e6508438be9fea28c8fc upstream. This patch fixs to do sanity check with user_block_count. - Overview Divide zero in utilization when mount() a corrupted f2fs image - Reproduce (4.18 upstream kernel) - Kernel message [ 564.099503] F2FS-fs (loop0): invalid crc value [ 564.101991] divide error: 0000 [#1] SMP KASAN PTI [ 564.103103] CPU: 1 PID: 1298 Comm: f2fs_discard-7: Not tainted 4.18.0-rc1+ #4 [ 564.104584] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 564.106624] RIP: 0010:issue_discard_thread+0x248/0x5c0 [ 564.107692] Code: ff ff 48 8b bd e8 fe ff ff 41 8b 9d 4c 04 00 00 e8 cd b8 ad ff 41 8b 85 50 04 00 00 31 d2 48 8d 04 80 48 8d 04 80 48 c1 e0 02 <48> f7 f3 83 f8 50 7e 16 41 c7 86 7c ff ff ff 01 00 00 00 41 c7 86 [ 564.111686] RSP: 0018:ffff8801f3117dc0 EFLAGS: 00010206 [ 564.112775] RAX: 0000000000000384 RBX: 0000000000000000 RCX: ffffffffb88c1e03 [ 564.114250] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e3aa4850 [ 564.115706] RBP: ffff8801f3117f00 R08: 1ffffffff751a1d0 R09: fffffbfff751a1d0 [ 564.117177] R10: 0000000000000001 R11: fffffbfff751a1d0 R12: 00000000fffffffc [ 564.118634] R13: ffff8801e3aa4400 R14: ffff8801f3117ed8 R15: ffff8801e2050000 [ 564.120094] FS: 0000000000000000(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 [ 564.121748] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 564.122923] CR2: 000000000202b078 CR3: 00000001f11ac000 CR4: 00000000000006e0 [ 564.124383] Call Trace: [ 564.124924] ? __issue_discard_cmd+0x480/0x480 [ 564.125882] ? __sched_text_start+0x8/0x8 [ 564.126756] ? __kthread_parkme+0xcb/0x100 [ 564.127620] ? kthread_blkcg+0x70/0x70 [ 564.128412] kthread+0x180/0x1d0 [ 564.129105] ? __issue_discard_cmd+0x480/0x480 [ 564.130029] ? kthread_associate_blkcg+0x150/0x150 [ 564.131033] ret_from_fork+0x35/0x40 [ 564.131794] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 564.141798] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 564.142773] RIP: 0010:issue_discard_thread+0x248/0x5c0 [ 564.143885] Code: ff ff 48 8b bd e8 fe ff ff 41 8b 9d 4c 04 00 00 e8 cd b8 ad ff 41 8b 85 50 04 00 00 31 d2 48 8d 04 80 48 8d 04 80 48 c1 e0 02 <48> f7 f3 83 f8 50 7e 16 41 c7 86 7c ff ff ff 01 00 00 00 41 c7 86 [ 564.147776] RSP: 0018:ffff8801f3117dc0 EFLAGS: 00010206 [ 564.148856] RAX: 0000000000000384 RBX: 0000000000000000 RCX: ffffffffb88c1e03 [ 564.150424] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e3aa4850 [ 564.151906] RBP: ffff8801f3117f00 R08: 1ffffffff751a1d0 R09: fffffbfff751a1d0 [ 564.153463] R10: 0000000000000001 R11: fffffbfff751a1d0 R12: 00000000fffffffc [ 564.154915] R13: ffff8801e3aa4400 R14: ffff8801f3117ed8 R15: ffff8801e2050000 [ 564.156405] FS: 0000000000000000(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 [ 564.158070] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 564.159279] CR2: 000000000202b078 CR3: 00000001f11ac000 CR4: 00000000000006e0 [ 564.161043] ================================================================== [ 564.162587] BUG: KASAN: stack-out-of-bounds in from_kuid_munged+0x1d/0x50 [ 564.163994] Read of size 4 at addr ffff8801f3117c84 by task f2fs_discard-7:/1298 [ 564.165852] CPU: 1 PID: 1298 Comm: f2fs_discard-7: Tainted: G D 4.18.0-rc1+ #4 [ 564.167593] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 564.169522] Call Trace: [ 564.170057] dump_stack+0x7b/0xb5 [ 564.170778] print_address_description+0x70/0x290 [ 564.171765] kasan_report+0x291/0x390 [ 564.172540] ? from_kuid_munged+0x1d/0x50 [ 564.173408] __asan_load4+0x78/0x80 [ 564.174148] from_kuid_munged+0x1d/0x50 [ 564.174962] do_notify_parent+0x1f5/0x4f0 [ 564.175808] ? send_sigqueue+0x390/0x390 [ 564.176639] ? css_set_move_task+0x152/0x340 [ 564.184197] do_exit+0x1290/0x1390 [ 564.184950] ? __issue_discard_cmd+0x480/0x480 [ 564.185884] ? mm_update_next_owner+0x380/0x380 [ 564.186829] ? __sched_text_start+0x8/0x8 [ 564.187672] ? __kthread_parkme+0xcb/0x100 [ 564.188528] ? kthread_blkcg+0x70/0x70 [ 564.189333] ? kthread+0x180/0x1d0 [ 564.190052] ? __issue_discard_cmd+0x480/0x480 [ 564.190983] rewind_stack_do_exit+0x17/0x20 [ 564.192190] The buggy address belongs to the page: [ 564.193213] page:ffffea0007cc45c0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 564.194856] flags: 0x2ffff0000000000() [ 564.195644] raw: 02ffff0000000000 0000000000000000 dead000000000200 0000000000000000 [ 564.197247] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 564.198826] page dumped because: kasan: bad access detected [ 564.200299] Memory state around the buggy address: [ 564.201306] ffff8801f3117b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 564.202779] ffff8801f3117c00: 00 00 00 00 00 00 00 00 00 00 00 f3 f3 f3 f3 f3 [ 564.204252] >ffff8801f3117c80: f3 f3 f3 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 [ 564.205742] ^ [ 564.206424] ffff8801f3117d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 564.207908] ffff8801f3117d80: f3 f3 f3 f3 f3 f3 f3 f3 00 00 00 00 00 00 00 00 [ 564.209389] ================================================================== [ 564.231795] F2FS-fs (loop0): Mounted with checkpoint version = 2 - Location https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.h#L586 return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count); Missing checks on sbi->user_block_count. Reported-by: Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/super.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 084ff9c82a37..9fafb1404f39 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1956,6 +1956,8 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int sit_segs, nat_segs; unsigned int sit_bitmap_size, nat_bitmap_size; unsigned int log_blocks_per_seg; + unsigned int segment_count_main; + block_t user_block_count; int i; total = le32_to_cpu(raw_super->segment_count); @@ -1980,6 +1982,16 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) return 1; } + user_block_count = le64_to_cpu(ckpt->user_block_count); + segment_count_main = le32_to_cpu(raw_super->segment_count_main); + log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); + if (!user_block_count || user_block_count >= + segment_count_main << log_blocks_per_seg) { + f2fs_msg(sbi->sb, KERN_ERR, + "Wrong user_block_count: %u", user_block_count); + return 1; + } + main_segs = le32_to_cpu(raw_super->segment_count_main); blocks_per_seg = sbi->blocks_per_seg; @@ -1996,7 +2008,6 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize); - log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 || nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) { From b8321ccd045710ee04fd5322c34cadd13a5e58af Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 29 Jun 2018 13:55:22 +0800 Subject: [PATCH 0753/2608] f2fs: fix to do sanity check with node footer and iblocks commit e34438c903b653daca2b2a7de95aed46226f8ed3 upstream. This patch adds to do sanity check with below fields of inode to avoid reported panic. - node footer - iblocks https://bugzilla.kernel.org/show_bug.cgi?id=200223 - Overview BUG() triggered in f2fs_truncate_inode_blocks() when un-mounting a mounted f2fs image after writing to it - Reproduce - POC (poc.c) static void activity(char *mpoint) { char *foo_bar_baz; int err; static int buf[8192]; memset(buf, 0, sizeof(buf)); err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint); // open / write / read int fd = open(foo_bar_baz, O_RDWR | O_TRUNC, 0777); if (fd >= 0) { write(fd, (char *)buf, 517); write(fd, (char *)buf, sizeof(buf)); close(fd); } } int main(int argc, char *argv[]) { activity(argv[1]); return 0; } - Kernel meesage [ 552.479723] F2FS-fs (loop0): Mounted with checkpoint version = 2 [ 556.451891] ------------[ cut here ]------------ [ 556.451899] kernel BUG at fs/f2fs/node.c:987! [ 556.452920] invalid opcode: 0000 [#1] SMP KASAN PTI [ 556.453936] CPU: 1 PID: 1310 Comm: umount Not tainted 4.18.0-rc1+ #4 [ 556.455213] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 556.457140] RIP: 0010:f2fs_truncate_inode_blocks+0x4a7/0x6f0 [ 556.458280] Code: e8 ae ea ff ff 41 89 c7 c1 e8 1f 84 c0 74 0a 41 83 ff fe 0f 85 35 ff ff ff 81 85 b0 fe ff ff fb 03 00 00 e9 f7 fd ff ff 0f 0b <0f> 0b e8 62 b7 9a 00 48 8b bd a0 fe ff ff e8 56 54 ae ff 48 8b b5 [ 556.462015] RSP: 0018:ffff8801f292f808 EFLAGS: 00010286 [ 556.463068] RAX: ffffed003e73242d RBX: ffff8801f292f958 RCX: ffffffffb88b81bc [ 556.464479] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff8801f3992164 [ 556.465901] RBP: ffff8801f292f980 R08: ffffed003e73242d R09: ffffed003e73242d [ 556.467311] R10: 0000000000000001 R11: ffffed003e73242c R12: 00000000fffffc64 [ 556.468706] R13: ffff8801f3992000 R14: 0000000000000058 R15: 00000000ffff8801 [ 556.470117] FS: 00007f8029297840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 [ 556.471702] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 556.472838] CR2: 000055f5f57305d8 CR3: 00000001f18b0000 CR4: 00000000000006e0 [ 556.474265] Call Trace: [ 556.474782] ? f2fs_alloc_nid_failed+0xf0/0xf0 [ 556.475686] ? truncate_nodes+0x980/0x980 [ 556.476516] ? pagecache_get_page+0x21f/0x2f0 [ 556.477412] ? __asan_loadN+0xf/0x20 [ 556.478153] ? __get_node_page+0x331/0x5b0 [ 556.478992] ? reweight_entity+0x1e6/0x3b0 [ 556.479826] f2fs_truncate_blocks+0x55e/0x740 [ 556.480709] ? f2fs_truncate_data_blocks+0x20/0x20 [ 556.481689] ? __radix_tree_lookup+0x34/0x160 [ 556.482630] ? radix_tree_lookup+0xd/0x10 [ 556.483445] f2fs_truncate+0xd4/0x1a0 [ 556.484206] f2fs_evict_inode+0x5ce/0x630 [ 556.485032] evict+0x16f/0x290 [ 556.485664] iput+0x280/0x300 [ 556.486300] dentry_unlink_inode+0x165/0x1e0 [ 556.487169] __dentry_kill+0x16a/0x260 [ 556.487936] dentry_kill+0x70/0x250 [ 556.488651] shrink_dentry_list+0x125/0x260 [ 556.489504] shrink_dcache_parent+0xc1/0x110 [ 556.490379] ? shrink_dcache_sb+0x200/0x200 [ 556.491231] ? bit_wait_timeout+0xc0/0xc0 [ 556.492047] do_one_tree+0x12/0x40 [ 556.492743] shrink_dcache_for_umount+0x3f/0xa0 [ 556.493656] generic_shutdown_super+0x43/0x1c0 [ 556.494561] kill_block_super+0x52/0x80 [ 556.495341] kill_f2fs_super+0x62/0x70 [ 556.496105] deactivate_locked_super+0x6f/0xa0 [ 556.497004] deactivate_super+0x5e/0x80 [ 556.497785] cleanup_mnt+0x61/0xa0 [ 556.498492] __cleanup_mnt+0x12/0x20 [ 556.499218] task_work_run+0xc8/0xf0 [ 556.499949] exit_to_usermode_loop+0x125/0x130 [ 556.500846] do_syscall_64+0x138/0x170 [ 556.501609] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 556.502659] RIP: 0033:0x7f8028b77487 [ 556.503384] Code: 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 00 00 00 b8 a6 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e1 c9 2b 00 f7 d8 64 89 01 48 [ 556.507137] RSP: 002b:00007fff9f2e3598 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [ 556.508637] RAX: 0000000000000000 RBX: 0000000000ebd030 RCX: 00007f8028b77487 [ 556.510069] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000ec41e0 [ 556.511481] RBP: 0000000000ec41e0 R08: 0000000000000000 R09: 0000000000000014 [ 556.512892] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007f802908083c [ 556.514320] R13: 0000000000000000 R14: 0000000000ebd210 R15: 00007fff9f2e3820 [ 556.515745] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 556.529276] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 556.530340] RIP: 0010:f2fs_truncate_inode_blocks+0x4a7/0x6f0 [ 556.531513] Code: e8 ae ea ff ff 41 89 c7 c1 e8 1f 84 c0 74 0a 41 83 ff fe 0f 85 35 ff ff ff 81 85 b0 fe ff ff fb 03 00 00 e9 f7 fd ff ff 0f 0b <0f> 0b e8 62 b7 9a 00 48 8b bd a0 fe ff ff e8 56 54 ae ff 48 8b b5 [ 556.535330] RSP: 0018:ffff8801f292f808 EFLAGS: 00010286 [ 556.536395] RAX: ffffed003e73242d RBX: ffff8801f292f958 RCX: ffffffffb88b81bc [ 556.537824] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff8801f3992164 [ 556.539290] RBP: ffff8801f292f980 R08: ffffed003e73242d R09: ffffed003e73242d [ 556.540709] R10: 0000000000000001 R11: ffffed003e73242c R12: 00000000fffffc64 [ 556.542131] R13: ffff8801f3992000 R14: 0000000000000058 R15: 00000000ffff8801 [ 556.543579] FS: 00007f8029297840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 [ 556.545180] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 556.546338] CR2: 000055f5f57305d8 CR3: 00000001f18b0000 CR4: 00000000000006e0 [ 556.547809] ================================================================== [ 556.549248] BUG: KASAN: stack-out-of-bounds in arch_tlb_gather_mmu+0x52/0x170 [ 556.550672] Write of size 8 at addr ffff8801f292fd10 by task umount/1310 [ 556.552338] CPU: 1 PID: 1310 Comm: umount Tainted: G D 4.18.0-rc1+ #4 [ 556.553886] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 556.555756] Call Trace: [ 556.556264] dump_stack+0x7b/0xb5 [ 556.556944] print_address_description+0x70/0x290 [ 556.557903] kasan_report+0x291/0x390 [ 556.558649] ? arch_tlb_gather_mmu+0x52/0x170 [ 556.559537] __asan_store8+0x57/0x90 [ 556.560268] arch_tlb_gather_mmu+0x52/0x170 [ 556.561110] tlb_gather_mmu+0x12/0x40 [ 556.561862] exit_mmap+0x123/0x2a0 [ 556.562555] ? __ia32_sys_munmap+0x50/0x50 [ 556.563384] ? exit_aio+0x98/0x230 [ 556.564079] ? __x32_compat_sys_io_submit+0x260/0x260 [ 556.565099] ? taskstats_exit+0x1f4/0x640 [ 556.565925] ? kasan_check_read+0x11/0x20 [ 556.566739] ? mm_update_next_owner+0x322/0x380 [ 556.567652] mmput+0x8b/0x1d0 [ 556.568260] do_exit+0x43a/0x1390 [ 556.568937] ? mm_update_next_owner+0x380/0x380 [ 556.569855] ? deactivate_super+0x5e/0x80 [ 556.570668] ? cleanup_mnt+0x61/0xa0 [ 556.571395] ? __cleanup_mnt+0x12/0x20 [ 556.572156] ? task_work_run+0xc8/0xf0 [ 556.572917] ? exit_to_usermode_loop+0x125/0x130 [ 556.573861] rewind_stack_do_exit+0x17/0x20 [ 556.574707] RIP: 0033:0x7f8028b77487 [ 556.575428] Code: Bad RIP value. [ 556.576106] RSP: 002b:00007fff9f2e3598 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [ 556.577599] RAX: 0000000000000000 RBX: 0000000000ebd030 RCX: 00007f8028b77487 [ 556.579020] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000ec41e0 [ 556.580422] RBP: 0000000000ec41e0 R08: 0000000000000000 R09: 0000000000000014 [ 556.581833] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007f802908083c [ 556.583252] R13: 0000000000000000 R14: 0000000000ebd210 R15: 00007fff9f2e3820 [ 556.584983] The buggy address belongs to the page: [ 556.585961] page:ffffea0007ca4bc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 556.587540] flags: 0x2ffff0000000000() [ 556.588296] raw: 02ffff0000000000 0000000000000000 dead000000000200 0000000000000000 [ 556.589822] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 556.591359] page dumped because: kasan: bad access detected [ 556.592786] Memory state around the buggy address: [ 556.593753] ffff8801f292fc00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 556.595191] ffff8801f292fc80: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 00 [ 556.596613] >ffff8801f292fd00: 00 00 f3 00 00 00 00 f3 f3 00 00 00 00 f4 f4 f4 [ 556.598044] ^ [ 556.598797] ffff8801f292fd80: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 [ 556.600225] ffff8801f292fe00: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 f4 f4 f4 [ 556.601647] ================================================================== - Location https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/node.c#L987 case NODE_DIND_BLOCK: err = truncate_nodes(&dn, nofs, offset[1], 3); cont = 0; break; default: BUG(); <--- } Reported-by Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/inode.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 428d502f23e8..be7d9773d291 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -180,9 +180,30 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page)); } -static bool sanity_check_inode(struct inode *inode) +static bool sanity_check_inode(struct inode *inode, struct page *node_page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned long long iblocks; + + iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); + if (!iblocks) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, " + "run fsck to fix.", + __func__, inode->i_ino, iblocks); + return false; + } + + if (ino_of_node(node_page) != nid_of_node(node_page)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: corrupted inode footer i_ino=%lx, ino,nid: " + "[%u, %u] run fsck to fix.", + __func__, inode->i_ino, + ino_of_node(node_page), nid_of_node(node_page)); + return false; + } if (f2fs_has_extra_attr(inode) && !f2fs_sb_has_extra_attr(sbi->sb)) { @@ -242,7 +263,7 @@ static int do_read_inode(struct inode *inode) get_inline_info(inode, ri); - if (!sanity_check_inode(inode)) { + if (!sanity_check_inode(inode, node_page)) { f2fs_put_page(node_page, 1); return -EINVAL; } From ad19d1e78fd51f5b4bf3ebbcbf3a133c8c03c49d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 1 Aug 2018 19:13:44 +0800 Subject: [PATCH 0754/2608] f2fs: fix to do sanity check with block address in main area commit c9b60788fc760d136211853f10ce73dc152d1f4a upstream. This patch add to do sanity check with below field: - cp_pack_total_block_count - blkaddr of data/node - extent info - Overview BUG() in verify_block_addr() when writing to a corrupted f2fs image - Reproduce (4.18 upstream kernel) - POC (poc.c) static void activity(char *mpoint) { char *foo_bar_baz; int err; static int buf[8192]; memset(buf, 0, sizeof(buf)); err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint); int fd = open(foo_bar_baz, O_RDWR | O_TRUNC, 0777); if (fd >= 0) { write(fd, (char *)buf, sizeof(buf)); fdatasync(fd); close(fd); } } int main(int argc, char *argv[]) { activity(argv[1]); return 0; } - Kernel message [ 689.349473] F2FS-fs (loop0): Mounted with checkpoint version = 3 [ 699.728662] WARNING: CPU: 0 PID: 1309 at fs/f2fs/segment.c:2860 f2fs_inplace_write_data+0x232/0x240 [ 699.728670] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 699.729056] CPU: 0 PID: 1309 Comm: a.out Not tainted 4.18.0-rc1+ #4 [ 699.729064] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 699.729074] RIP: 0010:f2fs_inplace_write_data+0x232/0x240 [ 699.729076] Code: ff e9 cf fe ff ff 49 8d 7d 10 e8 39 45 ad ff 4d 8b 7d 10 be 04 00 00 00 49 8d 7f 48 e8 07 49 ad ff 45 8b 7f 48 e9 fb fe ff ff <0f> 0b f0 41 80 4d 48 04 e9 65 fe ff ff 90 66 66 66 66 90 55 48 8d [ 699.729130] RSP: 0018:ffff8801f43af568 EFLAGS: 00010202 [ 699.729139] RAX: 000000000000003f RBX: ffff8801f43af7b8 RCX: ffffffffb88c9113 [ 699.729142] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffff8802024e5540 [ 699.729144] RBP: ffff8801f43af590 R08: 0000000000000009 R09: ffffffffffffffe8 [ 699.729147] R10: 0000000000000001 R11: ffffed0039b0596a R12: ffff8802024e5540 [ 699.729149] R13: ffff8801f0335500 R14: ffff8801e3e7a700 R15: ffff8801e1ee4450 [ 699.729154] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 699.729156] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 699.729159] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0 [ 699.729171] Call Trace: [ 699.729192] f2fs_do_write_data_page+0x2e2/0xe00 [ 699.729203] ? f2fs_should_update_outplace+0xd0/0xd0 [ 699.729238] ? memcg_drain_all_list_lrus+0x280/0x280 [ 699.729269] ? __radix_tree_replace+0xa3/0x120 [ 699.729276] __write_data_page+0x5c7/0xe30 [ 699.729291] ? kasan_check_read+0x11/0x20 [ 699.729310] ? page_mapped+0x8a/0x110 [ 699.729321] ? page_mkclean+0xe9/0x160 [ 699.729327] ? f2fs_do_write_data_page+0xe00/0xe00 [ 699.729331] ? invalid_page_referenced_vma+0x130/0x130 [ 699.729345] ? clear_page_dirty_for_io+0x332/0x450 [ 699.729351] f2fs_write_cache_pages+0x4ca/0x860 [ 699.729358] ? __write_data_page+0xe30/0xe30 [ 699.729374] ? percpu_counter_add_batch+0x22/0xa0 [ 699.729380] ? kasan_check_write+0x14/0x20 [ 699.729391] ? _raw_spin_lock+0x17/0x40 [ 699.729403] ? f2fs_mark_inode_dirty_sync.part.18+0x16/0x30 [ 699.729413] ? iov_iter_advance+0x113/0x640 [ 699.729418] ? f2fs_write_end+0x133/0x2e0 [ 699.729423] ? balance_dirty_pages_ratelimited+0x239/0x640 [ 699.729428] f2fs_write_data_pages+0x329/0x520 [ 699.729433] ? generic_perform_write+0x250/0x320 [ 699.729438] ? f2fs_write_cache_pages+0x860/0x860 [ 699.729454] ? current_time+0x110/0x110 [ 699.729459] ? f2fs_preallocate_blocks+0x1ef/0x370 [ 699.729464] do_writepages+0x37/0xb0 [ 699.729468] ? f2fs_write_cache_pages+0x860/0x860 [ 699.729472] ? do_writepages+0x37/0xb0 [ 699.729478] __filemap_fdatawrite_range+0x19a/0x1f0 [ 699.729483] ? delete_from_page_cache_batch+0x4e0/0x4e0 [ 699.729496] ? __vfs_write+0x2b2/0x410 [ 699.729501] file_write_and_wait_range+0x66/0xb0 [ 699.729506] f2fs_do_sync_file+0x1f9/0xd90 [ 699.729511] ? truncate_partial_data_page+0x290/0x290 [ 699.729521] ? __sb_end_write+0x30/0x50 [ 699.729526] ? vfs_write+0x20f/0x260 [ 699.729530] f2fs_sync_file+0x9a/0xb0 [ 699.729534] ? f2fs_do_sync_file+0xd90/0xd90 [ 699.729548] vfs_fsync_range+0x68/0x100 [ 699.729554] ? __fget_light+0xc9/0xe0 [ 699.729558] do_fsync+0x3d/0x70 [ 699.729562] __x64_sys_fdatasync+0x24/0x30 [ 699.729585] do_syscall_64+0x78/0x170 [ 699.729595] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 699.729613] RIP: 0033:0x7f9bf930d800 [ 699.729615] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 49 bf 2c 00 00 75 10 b8 4b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be 78 01 00 48 89 04 24 [ 699.729668] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b [ 699.729673] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800 [ 699.729675] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003 [ 699.729678] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000 [ 699.729680] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610 [ 699.729683] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000 [ 699.729687] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 699.729782] ------------[ cut here ]------------ [ 699.729785] kernel BUG at fs/f2fs/segment.h:654! [ 699.731055] invalid opcode: 0000 [#1] SMP KASAN PTI [ 699.732104] CPU: 0 PID: 1309 Comm: a.out Tainted: G W 4.18.0-rc1+ #4 [ 699.733684] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 699.735611] RIP: 0010:f2fs_submit_page_bio+0x29b/0x730 [ 699.736649] Code: 54 49 8d bd 18 04 00 00 e8 b2 59 af ff 41 8b 8d 18 04 00 00 8b 45 b8 41 d3 e6 44 01 f0 4c 8d 73 14 41 39 c7 0f 82 37 fe ff ff <0f> 0b 65 8b 05 2c 04 77 47 89 c0 48 0f a3 05 52 c1 d5 01 0f 92 c0 [ 699.740524] RSP: 0018:ffff8801f43af508 EFLAGS: 00010283 [ 699.741573] RAX: 0000000000000000 RBX: ffff8801f43af7b8 RCX: ffffffffb88a7cef [ 699.743006] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8801e3e7a64c [ 699.744426] RBP: ffff8801f43af558 R08: ffffed003e066b55 R09: ffffed003e066b55 [ 699.745833] R10: 0000000000000001 R11: ffffed003e066b54 R12: ffffea0007876940 [ 699.747256] R13: ffff8801f0335500 R14: ffff8801e3e7a600 R15: 0000000000000001 [ 699.748683] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 699.750293] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 699.751462] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0 [ 699.752874] Call Trace: [ 699.753386] ? f2fs_inplace_write_data+0x93/0x240 [ 699.754341] f2fs_inplace_write_data+0xd2/0x240 [ 699.755271] f2fs_do_write_data_page+0x2e2/0xe00 [ 699.756214] ? f2fs_should_update_outplace+0xd0/0xd0 [ 699.757215] ? memcg_drain_all_list_lrus+0x280/0x280 [ 699.758209] ? __radix_tree_replace+0xa3/0x120 [ 699.759164] __write_data_page+0x5c7/0xe30 [ 699.760002] ? kasan_check_read+0x11/0x20 [ 699.760823] ? page_mapped+0x8a/0x110 [ 699.761573] ? page_mkclean+0xe9/0x160 [ 699.762345] ? f2fs_do_write_data_page+0xe00/0xe00 [ 699.763332] ? invalid_page_referenced_vma+0x130/0x130 [ 699.764374] ? clear_page_dirty_for_io+0x332/0x450 [ 699.765347] f2fs_write_cache_pages+0x4ca/0x860 [ 699.766276] ? __write_data_page+0xe30/0xe30 [ 699.767161] ? percpu_counter_add_batch+0x22/0xa0 [ 699.768112] ? kasan_check_write+0x14/0x20 [ 699.768951] ? _raw_spin_lock+0x17/0x40 [ 699.769739] ? f2fs_mark_inode_dirty_sync.part.18+0x16/0x30 [ 699.770885] ? iov_iter_advance+0x113/0x640 [ 699.771743] ? f2fs_write_end+0x133/0x2e0 [ 699.772569] ? balance_dirty_pages_ratelimited+0x239/0x640 [ 699.773680] f2fs_write_data_pages+0x329/0x520 [ 699.774603] ? generic_perform_write+0x250/0x320 [ 699.775544] ? f2fs_write_cache_pages+0x860/0x860 [ 699.776510] ? current_time+0x110/0x110 [ 699.777299] ? f2fs_preallocate_blocks+0x1ef/0x370 [ 699.778279] do_writepages+0x37/0xb0 [ 699.779026] ? f2fs_write_cache_pages+0x860/0x860 [ 699.779978] ? do_writepages+0x37/0xb0 [ 699.780755] __filemap_fdatawrite_range+0x19a/0x1f0 [ 699.781746] ? delete_from_page_cache_batch+0x4e0/0x4e0 [ 699.782820] ? __vfs_write+0x2b2/0x410 [ 699.783597] file_write_and_wait_range+0x66/0xb0 [ 699.784540] f2fs_do_sync_file+0x1f9/0xd90 [ 699.785381] ? truncate_partial_data_page+0x290/0x290 [ 699.786415] ? __sb_end_write+0x30/0x50 [ 699.787204] ? vfs_write+0x20f/0x260 [ 699.787941] f2fs_sync_file+0x9a/0xb0 [ 699.788694] ? f2fs_do_sync_file+0xd90/0xd90 [ 699.789572] vfs_fsync_range+0x68/0x100 [ 699.790360] ? __fget_light+0xc9/0xe0 [ 699.791128] do_fsync+0x3d/0x70 [ 699.791779] __x64_sys_fdatasync+0x24/0x30 [ 699.792614] do_syscall_64+0x78/0x170 [ 699.793371] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 699.794406] RIP: 0033:0x7f9bf930d800 [ 699.795134] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 49 bf 2c 00 00 75 10 b8 4b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be 78 01 00 48 89 04 24 [ 699.798960] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b [ 699.800483] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800 [ 699.801923] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003 [ 699.803373] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000 [ 699.804798] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610 [ 699.806233] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000 [ 699.807667] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 699.817079] ---[ end trace 4ce02f25ff7d3df6 ]--- [ 699.818068] RIP: 0010:f2fs_submit_page_bio+0x29b/0x730 [ 699.819114] Code: 54 49 8d bd 18 04 00 00 e8 b2 59 af ff 41 8b 8d 18 04 00 00 8b 45 b8 41 d3 e6 44 01 f0 4c 8d 73 14 41 39 c7 0f 82 37 fe ff ff <0f> 0b 65 8b 05 2c 04 77 47 89 c0 48 0f a3 05 52 c1 d5 01 0f 92 c0 [ 699.822919] RSP: 0018:ffff8801f43af508 EFLAGS: 00010283 [ 699.823977] RAX: 0000000000000000 RBX: ffff8801f43af7b8 RCX: ffffffffb88a7cef [ 699.825436] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8801e3e7a64c [ 699.826881] RBP: ffff8801f43af558 R08: ffffed003e066b55 R09: ffffed003e066b55 [ 699.828292] R10: 0000000000000001 R11: ffffed003e066b54 R12: ffffea0007876940 [ 699.829750] R13: ffff8801f0335500 R14: ffff8801e3e7a600 R15: 0000000000000001 [ 699.831192] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 699.832793] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 699.833981] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0 [ 699.835556] ================================================================== [ 699.837029] BUG: KASAN: stack-out-of-bounds in update_stack_state+0x38c/0x3e0 [ 699.838462] Read of size 8 at addr ffff8801f43af970 by task a.out/1309 [ 699.840086] CPU: 0 PID: 1309 Comm: a.out Tainted: G D W 4.18.0-rc1+ #4 [ 699.841603] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 699.843475] Call Trace: [ 699.843982] dump_stack+0x7b/0xb5 [ 699.844661] print_address_description+0x70/0x290 [ 699.845607] kasan_report+0x291/0x390 [ 699.846351] ? update_stack_state+0x38c/0x3e0 [ 699.853831] __asan_load8+0x54/0x90 [ 699.854569] update_stack_state+0x38c/0x3e0 [ 699.855428] ? __read_once_size_nocheck.constprop.7+0x20/0x20 [ 699.856601] ? __save_stack_trace+0x5e/0x100 [ 699.857476] unwind_next_frame.part.5+0x18e/0x490 [ 699.858448] ? unwind_dump+0x290/0x290 [ 699.859217] ? clear_page_dirty_for_io+0x332/0x450 [ 699.860185] __unwind_start+0x106/0x190 [ 699.860974] __save_stack_trace+0x5e/0x100 [ 699.861808] ? __save_stack_trace+0x5e/0x100 [ 699.862691] ? unlink_anon_vmas+0xba/0x2c0 [ 699.863525] save_stack_trace+0x1f/0x30 [ 699.864312] save_stack+0x46/0xd0 [ 699.864993] ? __alloc_pages_slowpath+0x1420/0x1420 [ 699.865990] ? flush_tlb_mm_range+0x15e/0x220 [ 699.866889] ? kasan_check_write+0x14/0x20 [ 699.867724] ? __dec_node_state+0x92/0xb0 [ 699.868543] ? lock_page_memcg+0x85/0xf0 [ 699.869350] ? unlock_page_memcg+0x16/0x80 [ 699.870185] ? page_remove_rmap+0x198/0x520 [ 699.871048] ? mark_page_accessed+0x133/0x200 [ 699.871930] ? _cond_resched+0x1a/0x50 [ 699.872700] ? unmap_page_range+0xcd4/0xe50 [ 699.873551] ? rb_next+0x58/0x80 [ 699.874217] ? rb_next+0x58/0x80 [ 699.874895] __kasan_slab_free+0x13c/0x1a0 [ 699.875734] ? unlink_anon_vmas+0xba/0x2c0 [ 699.876563] kasan_slab_free+0xe/0x10 [ 699.877315] kmem_cache_free+0x89/0x1e0 [ 699.878095] unlink_anon_vmas+0xba/0x2c0 [ 699.878913] free_pgtables+0x101/0x1b0 [ 699.879677] exit_mmap+0x146/0x2a0 [ 699.880378] ? __ia32_sys_munmap+0x50/0x50 [ 699.881214] ? kasan_check_read+0x11/0x20 [ 699.882052] ? mm_update_next_owner+0x322/0x380 [ 699.882985] mmput+0x8b/0x1d0 [ 699.883602] do_exit+0x43a/0x1390 [ 699.884288] ? mm_update_next_owner+0x380/0x380 [ 699.885212] ? f2fs_sync_file+0x9a/0xb0 [ 699.885995] ? f2fs_do_sync_file+0xd90/0xd90 [ 699.886877] ? vfs_fsync_range+0x68/0x100 [ 699.887694] ? __fget_light+0xc9/0xe0 [ 699.888442] ? do_fsync+0x3d/0x70 [ 699.889118] ? __x64_sys_fdatasync+0x24/0x30 [ 699.889996] rewind_stack_do_exit+0x17/0x20 [ 699.890860] RIP: 0033:0x7f9bf930d800 [ 699.891585] Code: Bad RIP value. [ 699.892268] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b [ 699.893781] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800 [ 699.895220] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003 [ 699.896643] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000 [ 699.898069] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610 [ 699.899505] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000 [ 699.901241] The buggy address belongs to the page: [ 699.902215] page:ffffea0007d0ebc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 699.903811] flags: 0x2ffff0000000000() [ 699.904585] raw: 02ffff0000000000 0000000000000000 ffffffff07d00101 0000000000000000 [ 699.906125] raw: 0000000000000000 0000000000240000 00000000ffffffff 0000000000000000 [ 699.907673] page dumped because: kasan: bad access detected [ 699.909108] Memory state around the buggy address: [ 699.910077] ffff8801f43af800: 00 f1 f1 f1 f1 00 f4 f4 f4 f3 f3 f3 f3 00 00 00 [ 699.911528] ffff8801f43af880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 699.912953] >ffff8801f43af900: 00 00 00 00 00 00 00 00 f1 01 f4 f4 f4 f2 f2 f2 [ 699.914392] ^ [ 699.915758] ffff8801f43af980: f2 00 f4 f4 00 00 00 00 f2 00 00 00 00 00 00 00 [ 699.917193] ffff8801f43afa00: 00 00 00 00 00 00 00 00 00 f3 f3 f3 00 00 00 00 [ 699.918634] ================================================================== - Location https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.h#L644 Reported-by Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim [bwh: Backported to 4.14: - Error label is different in validate_checkpoint() due to the earlier backport of "f2fs: fix invalid memory access" - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 22 +++++++++++++++++++--- fs/f2fs/data.c | 33 +++++++++++++++++++++++++++------ fs/f2fs/f2fs.h | 3 +++ fs/f2fs/file.c | 12 ++++++++++++ fs/f2fs/inode.c | 17 +++++++++++++++++ fs/f2fs/node.c | 4 ++++ fs/f2fs/segment.h | 3 +-- 7 files changed, 83 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index d7bd9745e883..c81cd5057b8e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -86,8 +86,10 @@ repeat: fio.page = page; if (f2fs_submit_page_bio(&fio)) { - f2fs_put_page(page, 1); - goto repeat; + memset(page_address(page), 0, PAGE_SIZE); + f2fs_stop_checkpoint(sbi, false); + f2fs_bug_on(sbi, 1); + return page; } lock_page(page); @@ -141,8 +143,14 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, case META_POR: case DATA_GENERIC: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || - blkaddr < MAIN_BLKADDR(sbi))) + blkaddr < MAIN_BLKADDR(sbi))) { + if (type == DATA_GENERIC) { + f2fs_msg(sbi->sb, KERN_WARNING, + "access invalid blkaddr:%u", blkaddr); + WARN_ON(1); + } return false; + } break; case META_GENERIC: if (unlikely(blkaddr < SEG0_BLKADDR(sbi) || @@ -746,6 +754,14 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, &cp_page_1, version); if (err) return NULL; + + if (le32_to_cpu(cp_block->cp_pack_total_block_count) > + sbi->blocks_per_seg) { + f2fs_msg(sbi->sb, KERN_WARNING, + "invalid cp_pack_total_block_count:%u", + le32_to_cpu(cp_block->cp_pack_total_block_count)); + goto invalid_cp; + } pre_version = *version; cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 615878806611..8f6e7c3a10f8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -369,7 +369,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; - verify_block_addr(fio, fio->new_blkaddr); + if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, + __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) + return -EFAULT; + trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); @@ -946,6 +949,12 @@ next_dnode: next_block: blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) { + err = -EFAULT; + goto sync_out; + } + if (!is_valid_data_blkaddr(sbi, blkaddr)) { if (create) { if (unlikely(f2fs_cp_error(sbi))) { @@ -1264,6 +1273,10 @@ got_it: SetPageUptodate(page); goto confused; } + + if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, + DATA_GENERIC)) + goto set_error_page; } else { zero_user_segment(page, 0, PAGE_SIZE); if (!PageUptodate(page)) @@ -1402,11 +1415,13 @@ int do_write_data_page(struct f2fs_io_info *fio) f2fs_lookup_extent_cache(inode, page->index, &ei)) { fio->old_blkaddr = ei.blk + page->index - ei.fofs; - if (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr)) { - ipu_force = true; - fio->need_lock = LOCK_DONE; - goto got_it; - } + if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, + DATA_GENERIC)) + return -EFAULT; + + ipu_force = true; + fio->need_lock = LOCK_DONE; + goto got_it; } /* Deadlock due to between page->lock and f2fs_lock_op */ @@ -1425,6 +1440,12 @@ int do_write_data_page(struct f2fs_io_info *fio) goto out_writepage; } got_it: + if (__is_valid_data_blkaddr(fio->old_blkaddr) && + !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, + DATA_GENERIC)) { + err = -EFAULT; + goto out_writepage; + } /* * If current allocation needs SSR, * it had better in-place writes for updated data. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d15d79457f5c..3f1a44696036 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2357,6 +2357,9 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, spin_unlock(&sbi->iostat_lock); } +#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \ + (!is_read_io(fio->op) || fio->is_meta)) + bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e9c575ef70b5..7d3189f1941c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -397,6 +397,13 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), + blkaddr, DATA_GENERIC)) { + f2fs_put_dnode(&dn); + goto fail; + } + if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty, pgofs, whence)) { f2fs_put_dnode(&dn); @@ -496,6 +503,11 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) dn->data_blkaddr = NULL_ADDR; set_data_blkaddr(dn); + + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) + continue; + invalidate_blocks(sbi, blkaddr); if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index be7d9773d291..aeed9943836a 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -214,6 +214,23 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) __func__, inode->i_ino); return false; } + + if (F2FS_I(inode)->extent_tree) { + struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest; + + if (ei->len && + (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) || + !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, + DATA_GENERIC))) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: inode (ino=%lx) extent info [%u, %u, %u] " + "is incorrect, run fsck to fix", + __func__, inode->i_ino, + ei->blk, ei->fofs, ei->len); + return false; + } + } return true; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 999814c8cbea..6adb6c60f017 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1398,6 +1398,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, return 0; } + if (__is_valid_data_blkaddr(ni.blk_addr) && + !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) + goto redirty_out; + if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index dd8f977fc273..47348d98165b 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -629,8 +629,7 @@ static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) { struct f2fs_sb_info *sbi = fio->sbi; - if (PAGE_TYPE_OF_BIO(fio->type) == META && - (!is_read_io(fio->op) || fio->is_meta)) + if (__is_meta_io(fio)) verify_blkaddr(sbi, blk_addr, META_GENERIC); else verify_blkaddr(sbi, blk_addr, DATA_GENERIC); From d7d9d29a837358636e12fe09c90a7882b53b2220 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 8 Jul 2018 22:16:55 +0800 Subject: [PATCH 0755/2608] f2fs: fix to do sanity check with i_extra_isize commit 18dd6470c2d14d10f5a2dd926925dc80dbd3abfd upstream. If inode.i_extra_isize was fuzzed to an abnormal value, when calculating inline data size, the result will overflow, result in accessing invalid memory area when operating inline data. Let's do sanity check with i_extra_isize during inode loading for fixing. https://bugzilla.kernel.org/show_bug.cgi?id=200421 - Reproduce - POC (poc.c) #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void activity(char *mpoint) { char *foo_bar_baz; char *foo_baz; char *xattr; int err; err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint); err = asprintf(&foo_baz, "%s/foo/baz", mpoint); err = asprintf(&xattr, "%s/foo/bar/xattr", mpoint); rename(foo_bar_baz, foo_baz); char buf2[113]; memset(buf2, 0, sizeof(buf2)); listxattr(xattr, buf2, sizeof(buf2)); removexattr(xattr, "user.mime_type"); } int main(int argc, char *argv[]) { activity(argv[1]); return 0; } - Kernel message Umount the image will leave the following message [ 2910.995489] F2FS-fs (loop0): Mounted with checkpoint version = 2 [ 2918.416465] ================================================================== [ 2918.416807] BUG: KASAN: slab-out-of-bounds in f2fs_iget+0xcb9/0x1a80 [ 2918.417009] Read of size 4 at addr ffff88018efc2068 by task a.out/1229 [ 2918.417311] CPU: 1 PID: 1229 Comm: a.out Not tainted 4.17.0+ #1 [ 2918.417314] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 2918.417323] Call Trace: [ 2918.417366] dump_stack+0x71/0xab [ 2918.417401] print_address_description+0x6b/0x290 [ 2918.417407] kasan_report+0x28e/0x390 [ 2918.417411] ? f2fs_iget+0xcb9/0x1a80 [ 2918.417415] f2fs_iget+0xcb9/0x1a80 [ 2918.417422] ? f2fs_lookup+0x2e7/0x580 [ 2918.417425] f2fs_lookup+0x2e7/0x580 [ 2918.417433] ? __recover_dot_dentries+0x400/0x400 [ 2918.417447] ? legitimize_path.isra.29+0x5a/0xa0 [ 2918.417453] __lookup_slow+0x11c/0x220 [ 2918.417457] ? may_delete+0x2a0/0x2a0 [ 2918.417475] ? deref_stack_reg+0xe0/0xe0 [ 2918.417479] ? __lookup_hash+0xb0/0xb0 [ 2918.417483] lookup_slow+0x3e/0x60 [ 2918.417488] walk_component+0x3ac/0x990 [ 2918.417492] ? generic_permission+0x51/0x1e0 [ 2918.417495] ? inode_permission+0x51/0x1d0 [ 2918.417499] ? pick_link+0x3e0/0x3e0 [ 2918.417502] ? link_path_walk+0x4b1/0x770 [ 2918.417513] ? _raw_spin_lock_irqsave+0x25/0x50 [ 2918.417518] ? walk_component+0x990/0x990 [ 2918.417522] ? path_init+0x2e6/0x580 [ 2918.417526] path_lookupat+0x13f/0x430 [ 2918.417531] ? trailing_symlink+0x3a0/0x3a0 [ 2918.417534] ? do_renameat2+0x270/0x7b0 [ 2918.417538] ? __kasan_slab_free+0x14c/0x190 [ 2918.417541] ? do_renameat2+0x270/0x7b0 [ 2918.417553] ? kmem_cache_free+0x85/0x1e0 [ 2918.417558] ? do_renameat2+0x270/0x7b0 [ 2918.417563] filename_lookup+0x13c/0x280 [ 2918.417567] ? filename_parentat+0x2b0/0x2b0 [ 2918.417572] ? kasan_unpoison_shadow+0x31/0x40 [ 2918.417575] ? kasan_kmalloc+0xa6/0xd0 [ 2918.417593] ? strncpy_from_user+0xaa/0x1c0 [ 2918.417598] ? getname_flags+0x101/0x2b0 [ 2918.417614] ? path_listxattr+0x87/0x110 [ 2918.417619] path_listxattr+0x87/0x110 [ 2918.417623] ? listxattr+0xc0/0xc0 [ 2918.417637] ? mm_fault_error+0x1b0/0x1b0 [ 2918.417654] do_syscall_64+0x73/0x160 [ 2918.417660] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 2918.417676] RIP: 0033:0x7f2f3a3480d7 [ 2918.417677] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 [ 2918.417732] RSP: 002b:00007fff4095b7d8 EFLAGS: 00000206 ORIG_RAX: 00000000000000c2 [ 2918.417744] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f2f3a3480d7 [ 2918.417746] RDX: 0000000000000071 RSI: 00007fff4095b810 RDI: 000000000126a0c0 [ 2918.417749] RBP: 00007fff4095b890 R08: 000000000126a010 R09: 0000000000000000 [ 2918.417751] R10: 00000000000001ab R11: 0000000000000206 R12: 00000000004005e0 [ 2918.417753] R13: 00007fff4095b990 R14: 0000000000000000 R15: 0000000000000000 [ 2918.417853] Allocated by task 329: [ 2918.418002] kasan_kmalloc+0xa6/0xd0 [ 2918.418007] kmem_cache_alloc+0xc8/0x1e0 [ 2918.418023] mempool_init_node+0x194/0x230 [ 2918.418027] mempool_init+0x12/0x20 [ 2918.418042] bioset_init+0x2bd/0x380 [ 2918.418052] blk_alloc_queue_node+0xe9/0x540 [ 2918.418075] dm_create+0x2c0/0x800 [ 2918.418080] dev_create+0xd2/0x530 [ 2918.418083] ctl_ioctl+0x2a3/0x5b0 [ 2918.418087] dm_ctl_ioctl+0xa/0x10 [ 2918.418092] do_vfs_ioctl+0x13e/0x8c0 [ 2918.418095] ksys_ioctl+0x66/0x70 [ 2918.418098] __x64_sys_ioctl+0x3d/0x50 [ 2918.418102] do_syscall_64+0x73/0x160 [ 2918.418106] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 2918.418204] Freed by task 0: [ 2918.418301] (stack is not available) [ 2918.418521] The buggy address belongs to the object at ffff88018efc0000 which belongs to the cache biovec-max of size 8192 [ 2918.418894] The buggy address is located 104 bytes to the right of 8192-byte region [ffff88018efc0000, ffff88018efc2000) [ 2918.419257] The buggy address belongs to the page: [ 2918.419431] page:ffffea00063bf000 count:1 mapcount:0 mapping:ffff8801f2242540 index:0x0 compound_mapcount: 0 [ 2918.419702] flags: 0x17fff8000008100(slab|head) [ 2918.419879] raw: 017fff8000008100 dead000000000100 dead000000000200 ffff8801f2242540 [ 2918.420101] raw: 0000000000000000 0000000000030003 00000001ffffffff 0000000000000000 [ 2918.420322] page dumped because: kasan: bad access detected [ 2918.420599] Memory state around the buggy address: [ 2918.420764] ffff88018efc1f00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2918.420975] ffff88018efc1f80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2918.421194] >ffff88018efc2000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 2918.421406] ^ [ 2918.421627] ffff88018efc2080: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 2918.421838] ffff88018efc2100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2918.422046] ================================================================== [ 2918.422264] Disabling lock debugging due to kernel taint [ 2923.901641] BUG: unable to handle kernel paging request at ffff88018f0db000 [ 2923.901884] PGD 22226a067 P4D 22226a067 PUD 222273067 PMD 18e642063 PTE 800000018f0db061 [ 2923.902120] Oops: 0003 [#1] SMP KASAN PTI [ 2923.902274] CPU: 1 PID: 1231 Comm: umount Tainted: G B 4.17.0+ #1 [ 2923.902490] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 2923.902761] RIP: 0010:__memset+0x24/0x30 [ 2923.902906] Code: 90 90 90 90 90 90 66 66 90 66 90 49 89 f9 48 89 d1 83 e2 07 48 c1 e9 03 40 0f b6 f6 48 b8 01 01 01 01 01 01 01 01 48 0f af c6 48 ab 89 d1 f3 aa 4c 89 c8 c3 90 49 89 f9 40 88 f0 48 89 d1 f3 [ 2923.903446] RSP: 0018:ffff88018ddf7ae0 EFLAGS: 00010206 [ 2923.903622] RAX: 0000000000000000 RBX: ffff8801d549d888 RCX: 1ffffffffffdaffb [ 2923.903833] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88018f0daffc [ 2923.904062] RBP: ffff88018efc206c R08: 1ffff10031df840d R09: ffff88018efc206c [ 2923.904273] R10: ffffffffffffe1ee R11: ffffed0031df65fa R12: 0000000000000000 [ 2923.904485] R13: ffff8801d549dc98 R14: 00000000ffffc3db R15: ffffea00063bec80 [ 2923.904693] FS: 00007fa8b2f8a840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 [ 2923.904937] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2923.910080] CR2: ffff88018f0db000 CR3: 000000018f892000 CR4: 00000000000006e0 [ 2923.914930] Call Trace: [ 2923.919724] f2fs_truncate_inline_inode+0x114/0x170 [ 2923.924487] f2fs_truncate_blocks+0x11b/0x7c0 [ 2923.929178] ? f2fs_truncate_data_blocks+0x10/0x10 [ 2923.933834] ? dqget+0x670/0x670 [ 2923.938437] ? f2fs_destroy_extent_tree+0xd6/0x270 [ 2923.943107] ? __radix_tree_lookup+0x2f/0x150 [ 2923.947772] f2fs_truncate+0xd4/0x1a0 [ 2923.952491] f2fs_evict_inode+0x5ab/0x610 [ 2923.957204] evict+0x15f/0x280 [ 2923.961898] __dentry_kill+0x161/0x250 [ 2923.966634] shrink_dentry_list+0xf3/0x250 [ 2923.971897] shrink_dcache_parent+0xa9/0x100 [ 2923.976561] ? shrink_dcache_sb+0x1f0/0x1f0 [ 2923.981177] ? wait_for_completion+0x8a/0x210 [ 2923.985781] ? migrate_swap_stop+0x2d0/0x2d0 [ 2923.990332] do_one_tree+0xe/0x40 [ 2923.994735] shrink_dcache_for_umount+0x3a/0xa0 [ 2923.999077] generic_shutdown_super+0x3e/0x1c0 [ 2924.003350] kill_block_super+0x4b/0x70 [ 2924.007619] deactivate_locked_super+0x65/0x90 [ 2924.011812] cleanup_mnt+0x5c/0xa0 [ 2924.015995] task_work_run+0xce/0xf0 [ 2924.020174] exit_to_usermode_loop+0x115/0x120 [ 2924.024293] do_syscall_64+0x12f/0x160 [ 2924.028479] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 2924.032709] RIP: 0033:0x7fa8b2868487 [ 2924.036888] Code: 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 00 00 00 b8 a6 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e1 c9 2b 00 f7 d8 64 89 01 48 [ 2924.045750] RSP: 002b:00007ffc39824d58 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [ 2924.050190] RAX: 0000000000000000 RBX: 00000000008ea030 RCX: 00007fa8b2868487 [ 2924.054604] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 00000000008f4360 [ 2924.058940] RBP: 00000000008f4360 R08: 0000000000000000 R09: 0000000000000014 [ 2924.063186] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007fa8b2d7183c [ 2924.067418] R13: 0000000000000000 R14: 00000000008ea210 R15: 00007ffc39824fe0 [ 2924.071534] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy [ 2924.098044] CR2: ffff88018f0db000 [ 2924.102520] ---[ end trace a8e0d899985faf31 ]--- [ 2924.107012] RIP: 0010:__memset+0x24/0x30 [ 2924.111448] Code: 90 90 90 90 90 90 66 66 90 66 90 49 89 f9 48 89 d1 83 e2 07 48 c1 e9 03 40 0f b6 f6 48 b8 01 01 01 01 01 01 01 01 48 0f af c6 48 ab 89 d1 f3 aa 4c 89 c8 c3 90 49 89 f9 40 88 f0 48 89 d1 f3 [ 2924.120724] RSP: 0018:ffff88018ddf7ae0 EFLAGS: 00010206 [ 2924.125312] RAX: 0000000000000000 RBX: ffff8801d549d888 RCX: 1ffffffffffdaffb [ 2924.129931] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88018f0daffc [ 2924.134537] RBP: ffff88018efc206c R08: 1ffff10031df840d R09: ffff88018efc206c [ 2924.139175] R10: ffffffffffffe1ee R11: ffffed0031df65fa R12: 0000000000000000 [ 2924.143825] R13: ffff8801d549dc98 R14: 00000000ffffc3db R15: ffffea00063bec80 [ 2924.148500] FS: 00007fa8b2f8a840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 [ 2924.153247] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2924.158003] CR2: ffff88018f0db000 CR3: 000000018f892000 CR4: 00000000000006e0 [ 2924.164641] BUG: Bad rss-counter state mm:00000000fa04621e idx:0 val:4 [ 2924.170007] BUG: Bad rss-counter tate mm:00000000fa04621e idx:1 val:2 - Location https://elixir.bootlin.com/linux/v4.18-rc3/source/fs/f2fs/inline.c#L78 memset(addr + from, 0, MAX_INLINE_DATA(inode) - from); Here the length can be negative. Reported-by Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim [bwh: Backported to 4.14: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/inode.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index aeed9943836a..9a40724dbaa6 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -183,6 +183,7 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) static bool sanity_check_inode(struct inode *inode, struct page *node_page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); unsigned long long iblocks; iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); @@ -215,6 +216,17 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE || + fi->i_extra_isize % sizeof(__le32)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, " + "max: %zu", + __func__, inode->i_ino, fi->i_extra_isize, + F2FS_TOTAL_EXTRA_ATTR_SIZE); + return false; + } + if (F2FS_I(inode)->extent_tree) { struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest; @@ -280,14 +292,14 @@ static int do_read_inode(struct inode *inode) get_inline_info(inode, ri); + fi->i_extra_isize = f2fs_has_extra_attr(inode) ? + le16_to_cpu(ri->i_extra_isize) : 0; + if (!sanity_check_inode(inode, node_page)) { f2fs_put_page(node_page, 1); return -EINVAL; } - fi->i_extra_isize = f2fs_has_extra_attr(inode) ? - le16_to_cpu(ri->i_extra_isize) : 0; - /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) __recover_inline_status(inode, node_page); From 30130700acfad8a705c109325379f5bbe21b3ccc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 1 Aug 2018 19:16:11 +0800 Subject: [PATCH 0756/2608] f2fs: fix to do sanity check with cp_pack_start_sum commit e494c2f995d6181d6e29c4927d68e0f295ecf75b upstream. After fuzzing, cp_pack_start_sum could be corrupted, so current log's summary info should be wrong due to loading incorrect summary block. Then, if segment's type in current log is exceeded NR_CURSEG_TYPE, it can lead accessing invalid dirty_i->dirty_segmap bitmap finally. Add sanity check for cp_pack_start_sum to fix this issue. https://bugzilla.kernel.org/show_bug.cgi?id=200419 - Reproduce - Kernel message (f2fs-dev w/ KASAN) [ 3117.578432] F2FS-fs (loop0): Invalid log blocks per segment (8) [ 3117.578445] F2FS-fs (loop0): Can't find valid F2FS filesystem in 2th superblock [ 3117.581364] F2FS-fs (loop0): invalid crc_offset: 30716 [ 3117.583564] WARNING: CPU: 1 PID: 1225 at fs/f2fs/checkpoint.c:90 __get_meta_page+0x448/0x4b0 [ 3117.583570] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy [ 3117.584014] CPU: 1 PID: 1225 Comm: mount Not tainted 4.17.0+ #1 [ 3117.584017] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 3117.584022] RIP: 0010:__get_meta_page+0x448/0x4b0 [ 3117.584023] Code: 00 49 8d bc 24 84 00 00 00 e8 74 54 da ff 41 83 8c 24 84 00 00 00 08 4c 89 f6 4c 89 ef e8 c0 d9 95 00 48 89 ef e8 18 e3 00 00 <0f> 0b f0 80 4d 48 04 e9 0f fe ff ff 0f 0b 48 89 c7 48 89 04 24 e8 [ 3117.584072] RSP: 0018:ffff88018eb678c0 EFLAGS: 00010286 [ 3117.584082] RAX: ffff88018f0a6a78 RBX: ffffea0007a46600 RCX: ffffffff9314d1b2 [ 3117.584085] RDX: ffffffff00000001 RSI: 0000000000000000 RDI: ffff88018f0a6a98 [ 3117.584087] RBP: ffff88018ebe9980 R08: 0000000000000002 R09: 0000000000000001 [ 3117.584090] R10: 0000000000000001 R11: ffffed00326e4450 R12: ffff880193722200 [ 3117.584092] R13: ffff88018ebe9afc R14: 0000000000000206 R15: ffff88018eb67900 [ 3117.584096] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 [ 3117.584098] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3117.584101] CR2: 00000000016f21b8 CR3: 0000000191c22000 CR4: 00000000000006e0 [ 3117.584112] Call Trace: [ 3117.584121] ? f2fs_set_meta_page_dirty+0x150/0x150 [ 3117.584127] ? f2fs_build_segment_manager+0xbf9/0x3190 [ 3117.584133] ? f2fs_npages_for_summary_flush+0x75/0x120 [ 3117.584145] f2fs_build_segment_manager+0xda8/0x3190 [ 3117.584151] ? f2fs_get_valid_checkpoint+0x298/0xa00 [ 3117.584156] ? f2fs_flush_sit_entries+0x10e0/0x10e0 [ 3117.584184] ? map_id_range_down+0x17c/0x1b0 [ 3117.584188] ? __put_user_ns+0x30/0x30 [ 3117.584206] ? find_next_bit+0x53/0x90 [ 3117.584237] ? cpumask_next+0x16/0x20 [ 3117.584249] f2fs_fill_super+0x1948/0x2b40 [ 3117.584258] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.584279] ? sget_userns+0x65e/0x690 [ 3117.584296] ? set_blocksize+0x88/0x130 [ 3117.584302] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.584305] mount_bdev+0x1c0/0x200 [ 3117.584310] mount_fs+0x5c/0x190 [ 3117.584320] vfs_kern_mount+0x64/0x190 [ 3117.584330] do_mount+0x2e4/0x1450 [ 3117.584343] ? lockref_put_return+0x130/0x130 [ 3117.584347] ? copy_mount_string+0x20/0x20 [ 3117.584357] ? kasan_unpoison_shadow+0x31/0x40 [ 3117.584362] ? kasan_kmalloc+0xa6/0xd0 [ 3117.584373] ? memcg_kmem_put_cache+0x16/0x90 [ 3117.584377] ? __kmalloc_track_caller+0x196/0x210 [ 3117.584383] ? _copy_from_user+0x61/0x90 [ 3117.584396] ? memdup_user+0x3e/0x60 [ 3117.584401] ksys_mount+0x7e/0xd0 [ 3117.584405] __x64_sys_mount+0x62/0x70 [ 3117.584427] do_syscall_64+0x73/0x160 [ 3117.584440] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 3117.584455] RIP: 0033:0x7f5693f14b9a [ 3117.584456] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 [ 3117.584505] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 3117.584510] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a [ 3117.584512] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040 [ 3117.584514] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 3117.584516] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040 [ 3117.584519] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003 [ 3117.584523] ---[ end trace a8e0d899985faf31 ]--- [ 3117.685663] F2FS-fs (loop0): f2fs_check_nid_range: out-of-range nid=2, run fsck to fix. [ 3117.685673] F2FS-fs (loop0): recover_data: ino = 2 (i_size: recover) recovered = 1, err = 0 [ 3117.685707] ================================================================== [ 3117.685955] BUG: KASAN: slab-out-of-bounds in __remove_dirty_segment+0xdd/0x1e0 [ 3117.686175] Read of size 8 at addr ffff88018f0a63d0 by task mount/1225 [ 3117.686477] CPU: 0 PID: 1225 Comm: mount Tainted: G W 4.17.0+ #1 [ 3117.686481] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 3117.686483] Call Trace: [ 3117.686494] dump_stack+0x71/0xab [ 3117.686512] print_address_description+0x6b/0x290 [ 3117.686517] kasan_report+0x28e/0x390 [ 3117.686522] ? __remove_dirty_segment+0xdd/0x1e0 [ 3117.686527] __remove_dirty_segment+0xdd/0x1e0 [ 3117.686532] locate_dirty_segment+0x189/0x190 [ 3117.686538] f2fs_allocate_new_segments+0xa9/0xe0 [ 3117.686543] recover_data+0x703/0x2c20 [ 3117.686547] ? f2fs_recover_fsync_data+0x48f/0xd50 [ 3117.686553] ? ksys_mount+0x7e/0xd0 [ 3117.686564] ? policy_nodemask+0x1a/0x90 [ 3117.686567] ? policy_node+0x56/0x70 [ 3117.686571] ? add_fsync_inode+0xf0/0xf0 [ 3117.686592] ? blk_finish_plug+0x44/0x60 [ 3117.686597] ? f2fs_ra_meta_pages+0x38b/0x5e0 [ 3117.686602] ? find_inode_fast+0xac/0xc0 [ 3117.686606] ? f2fs_is_valid_blkaddr+0x320/0x320 [ 3117.686618] ? __radix_tree_lookup+0x150/0x150 [ 3117.686633] ? dqget+0x670/0x670 [ 3117.686648] ? pagecache_get_page+0x29/0x410 [ 3117.686656] ? kmem_cache_alloc+0x176/0x1e0 [ 3117.686660] ? f2fs_is_valid_blkaddr+0x11d/0x320 [ 3117.686664] f2fs_recover_fsync_data+0xc23/0xd50 [ 3117.686670] ? f2fs_space_for_roll_forward+0x60/0x60 [ 3117.686674] ? rb_insert_color+0x323/0x3d0 [ 3117.686678] ? f2fs_recover_orphan_inodes+0xa5/0x700 [ 3117.686683] ? proc_register+0x153/0x1d0 [ 3117.686686] ? f2fs_remove_orphan_inode+0x10/0x10 [ 3117.686695] ? f2fs_attr_store+0x50/0x50 [ 3117.686700] ? proc_create_single_data+0x52/0x60 [ 3117.686707] f2fs_fill_super+0x1d06/0x2b40 [ 3117.686728] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.686735] ? sget_userns+0x65e/0x690 [ 3117.686740] ? set_blocksize+0x88/0x130 [ 3117.686745] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.686748] mount_bdev+0x1c0/0x200 [ 3117.686753] mount_fs+0x5c/0x190 [ 3117.686758] vfs_kern_mount+0x64/0x190 [ 3117.686762] do_mount+0x2e4/0x1450 [ 3117.686769] ? lockref_put_return+0x130/0x130 [ 3117.686773] ? copy_mount_string+0x20/0x20 [ 3117.686777] ? kasan_unpoison_shadow+0x31/0x40 [ 3117.686780] ? kasan_kmalloc+0xa6/0xd0 [ 3117.686786] ? memcg_kmem_put_cache+0x16/0x90 [ 3117.686790] ? __kmalloc_track_caller+0x196/0x210 [ 3117.686795] ? _copy_from_user+0x61/0x90 [ 3117.686801] ? memdup_user+0x3e/0x60 [ 3117.686804] ksys_mount+0x7e/0xd0 [ 3117.686809] __x64_sys_mount+0x62/0x70 [ 3117.686816] do_syscall_64+0x73/0x160 [ 3117.686824] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 3117.686829] RIP: 0033:0x7f5693f14b9a [ 3117.686830] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 [ 3117.686887] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 3117.686892] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a [ 3117.686894] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040 [ 3117.686896] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 3117.686899] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040 [ 3117.686901] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003 [ 3117.687005] Allocated by task 1225: [ 3117.687152] kasan_kmalloc+0xa6/0xd0 [ 3117.687157] kmem_cache_alloc_trace+0xfd/0x200 [ 3117.687161] f2fs_build_segment_manager+0x2d09/0x3190 [ 3117.687165] f2fs_fill_super+0x1948/0x2b40 [ 3117.687168] mount_bdev+0x1c0/0x200 [ 3117.687171] mount_fs+0x5c/0x190 [ 3117.687174] vfs_kern_mount+0x64/0x190 [ 3117.687177] do_mount+0x2e4/0x1450 [ 3117.687180] ksys_mount+0x7e/0xd0 [ 3117.687182] __x64_sys_mount+0x62/0x70 [ 3117.687186] do_syscall_64+0x73/0x160 [ 3117.687190] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 3117.687285] Freed by task 19: [ 3117.687412] __kasan_slab_free+0x137/0x190 [ 3117.687416] kfree+0x8b/0x1b0 [ 3117.687460] ttm_bo_man_put_node+0x61/0x80 [ttm] [ 3117.687476] ttm_bo_cleanup_refs+0x15f/0x250 [ttm] [ 3117.687492] ttm_bo_delayed_delete+0x2f0/0x300 [ttm] [ 3117.687507] ttm_bo_delayed_workqueue+0x17/0x50 [ttm] [ 3117.687528] process_one_work+0x2f9/0x740 [ 3117.687531] worker_thread+0x78/0x6b0 [ 3117.687541] kthread+0x177/0x1c0 [ 3117.687545] ret_from_fork+0x35/0x40 [ 3117.687638] The buggy address belongs to the object at ffff88018f0a6300 which belongs to the cache kmalloc-192 of size 192 [ 3117.688014] The buggy address is located 16 bytes to the right of 192-byte region [ffff88018f0a6300, ffff88018f0a63c0) [ 3117.688382] The buggy address belongs to the page: [ 3117.688554] page:ffffea00063c2980 count:1 mapcount:0 mapping:ffff8801f3403180 index:0x0 [ 3117.688788] flags: 0x17fff8000000100(slab) [ 3117.688944] raw: 017fff8000000100 ffffea00063c2840 0000000e0000000e ffff8801f3403180 [ 3117.689166] raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 [ 3117.689386] page dumped because: kasan: bad access detected [ 3117.689653] Memory state around the buggy address: [ 3117.689816] ffff88018f0a6280: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 3117.690027] ffff88018f0a6300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 3117.690239] >ffff88018f0a6380: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 3117.690448] ^ [ 3117.690644] ffff88018f0a6400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 3117.690868] ffff88018f0a6480: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 3117.691077] ================================================================== [ 3117.691290] Disabling lock debugging due to kernel taint [ 3117.693893] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [ 3117.694120] PGD 80000001f01bc067 P4D 80000001f01bc067 PUD 1d9638067 PMD 0 [ 3117.694338] Oops: 0002 [#1] SMP KASAN PTI [ 3117.694490] CPU: 1 PID: 1225 Comm: mount Tainted: G B W 4.17.0+ #1 [ 3117.694703] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 3117.695073] RIP: 0010:__remove_dirty_segment+0xe2/0x1e0 [ 3117.695246] Code: c4 48 89 c7 e8 cf bb d7 ff 45 0f b6 24 24 41 83 e4 3f 44 88 64 24 07 41 83 e4 3f 4a 8d 7c e3 08 e8 b3 bc d7 ff 4a 8b 4c e3 08 4c 0f b3 29 0f 82 94 00 00 00 48 8d bd 20 04 00 00 e8 97 bb d7 [ 3117.695793] RSP: 0018:ffff88018eb67638 EFLAGS: 00010292 [ 3117.695969] RAX: 0000000000000000 RBX: ffff88018f0a6300 RCX: 0000000000000000 [ 3117.696182] RDX: 0000000000000000 RSI: 0000000000000297 RDI: 0000000000000297 [ 3117.696391] RBP: ffff88018ebe9980 R08: ffffed003e743ebb R09: ffffed003e743ebb [ 3117.696604] R10: 0000000000000001 R11: ffffed003e743eba R12: 0000000000000019 [ 3117.696813] R13: 0000000000000014 R14: 0000000000000320 R15: ffff88018ebe99e0 [ 3117.697032] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 [ 3117.697280] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3117.702357] CR2: 00007fe89bb1a000 CR3: 0000000191c22000 CR4: 00000000000006e0 [ 3117.707235] Call Trace: [ 3117.712077] locate_dirty_segment+0x189/0x190 [ 3117.716891] f2fs_allocate_new_segments+0xa9/0xe0 [ 3117.721617] recover_data+0x703/0x2c20 [ 3117.726316] ? f2fs_recover_fsync_data+0x48f/0xd50 [ 3117.730957] ? ksys_mount+0x7e/0xd0 [ 3117.735573] ? policy_nodemask+0x1a/0x90 [ 3117.740198] ? policy_node+0x56/0x70 [ 3117.744829] ? add_fsync_inode+0xf0/0xf0 [ 3117.749487] ? blk_finish_plug+0x44/0x60 [ 3117.754152] ? f2fs_ra_meta_pages+0x38b/0x5e0 [ 3117.758831] ? find_inode_fast+0xac/0xc0 [ 3117.763448] ? f2fs_is_valid_blkaddr+0x320/0x320 [ 3117.768046] ? __radix_tree_lookup+0x150/0x150 [ 3117.772603] ? dqget+0x670/0x670 [ 3117.777159] ? pagecache_get_page+0x29/0x410 [ 3117.781648] ? kmem_cache_alloc+0x176/0x1e0 [ 3117.786067] ? f2fs_is_valid_blkaddr+0x11d/0x320 [ 3117.790476] f2fs_recover_fsync_data+0xc23/0xd50 [ 3117.794790] ? f2fs_space_for_roll_forward+0x60/0x60 [ 3117.799086] ? rb_insert_color+0x323/0x3d0 [ 3117.803304] ? f2fs_recover_orphan_inodes+0xa5/0x700 [ 3117.807563] ? proc_register+0x153/0x1d0 [ 3117.811766] ? f2fs_remove_orphan_inode+0x10/0x10 [ 3117.815947] ? f2fs_attr_store+0x50/0x50 [ 3117.820087] ? proc_create_single_data+0x52/0x60 [ 3117.824262] f2fs_fill_super+0x1d06/0x2b40 [ 3117.828367] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.832432] ? sget_userns+0x65e/0x690 [ 3117.836500] ? set_blocksize+0x88/0x130 [ 3117.840501] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.844420] mount_bdev+0x1c0/0x200 [ 3117.848275] mount_fs+0x5c/0x190 [ 3117.852053] vfs_kern_mount+0x64/0x190 [ 3117.855810] do_mount+0x2e4/0x1450 [ 3117.859441] ? lockref_put_return+0x130/0x130 [ 3117.862996] ? copy_mount_string+0x20/0x20 [ 3117.866417] ? kasan_unpoison_shadow+0x31/0x40 [ 3117.869719] ? kasan_kmalloc+0xa6/0xd0 [ 3117.872948] ? memcg_kmem_put_cache+0x16/0x90 [ 3117.876121] ? __kmalloc_track_caller+0x196/0x210 [ 3117.879333] ? _copy_from_user+0x61/0x90 [ 3117.882467] ? memdup_user+0x3e/0x60 [ 3117.885604] ksys_mount+0x7e/0xd0 [ 3117.888700] __x64_sys_mount+0x62/0x70 [ 3117.891742] do_syscall_64+0x73/0x160 [ 3117.894692] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 3117.897669] RIP: 0033:0x7f5693f14b9a [ 3117.900563] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 [ 3117.906922] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 3117.910159] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a [ 3117.913469] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040 [ 3117.916764] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 3117.920071] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040 [ 3117.923393] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003 [ 3117.926680] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy [ 3117.949979] CR2: 0000000000000000 [ 3117.954283] ---[ end trace a8e0d899985faf32 ]--- [ 3117.958575] RIP: 0010:__remove_dirty_segment+0xe2/0x1e0 [ 3117.962810] Code: c4 48 89 c7 e8 cf bb d7 ff 45 0f b6 24 24 41 83 e4 3f 44 88 64 24 07 41 83 e4 3f 4a 8d 7c e3 08 e8 b3 bc d7 ff 4a 8b 4c e3 08 4c 0f b3 29 0f 82 94 00 00 00 48 8d bd 20 04 00 00 e8 97 bb d7 [ 3117.971789] RSP: 0018:ffff88018eb67638 EFLAGS: 00010292 [ 3117.976333] RAX: 0000000000000000 RBX: ffff88018f0a6300 RCX: 0000000000000000 [ 3117.980926] RDX: 0000000000000000 RSI: 0000000000000297 RDI: 0000000000000297 [ 3117.985497] RBP: ffff88018ebe9980 R08: ffffed003e743ebb R09: ffffed003e743ebb [ 3117.990098] R10: 0000000000000001 R11: ffffed003e743eba R12: 0000000000000019 [ 3117.994761] R13: 0000000000000014 R14: 0000000000000320 R15: ffff88018ebe99e0 [ 3117.999392] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 [ 3118.004096] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3118.008816] CR2: 00007fe89bb1a000 CR3: 0000000191c22000 CR4: 00000000000006e0 - Location https://elixir.bootlin.com/linux/v4.18-rc3/source/fs/f2fs/segment.c#L775 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) dirty_i->nr_dirty[t]--; Here dirty_i->dirty_segmap[t] can be NULL which leads to crash in test_and_clear_bit() Reported-by Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim [bwh: Backported to 4.14: The function is called sanity_check_ckpt()] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 8 ++++---- fs/f2fs/super.c | 12 ++++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index c81cd5057b8e..624817eeb25e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -825,15 +825,15 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) cp_block = (struct f2fs_checkpoint *)page_address(cur_page); memcpy(sbi->ckpt, cp_block, blk_size); - /* Sanity checking of checkpoint */ - if (sanity_check_ckpt(sbi)) - goto free_fail_no_cp; - if (cur_page == cp1) sbi->cur_cp_pack = 1; else sbi->cur_cp_pack = 2; + /* Sanity checking of checkpoint */ + if (sanity_check_ckpt(sbi)) + goto free_fail_no_cp; + if (cp_blks <= 1) goto done; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9fafb1404f39..de4de4ebe64c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1957,6 +1957,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int sit_bitmap_size, nat_bitmap_size; unsigned int log_blocks_per_seg; unsigned int segment_count_main; + unsigned int cp_pack_start_sum, cp_payload; block_t user_block_count; int i; @@ -2017,6 +2018,17 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) return 1; } + cp_pack_start_sum = __start_sum_addr(sbi); + cp_payload = __cp_payload(sbi); + if (cp_pack_start_sum < cp_payload + 1 || + cp_pack_start_sum > blocks_per_seg - 1 - + NR_CURSEG_TYPE) { + f2fs_msg(sbi->sb, KERN_ERR, + "Wrong cp_pack_start_sum: %u", + cp_pack_start_sum); + return 1; + } + if (unlikely(f2fs_cp_error(sbi))) { f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); return 1; From cb7ccb9924bb3596f211badf0d2becf131a979cd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 17 Apr 2018 19:10:15 -0700 Subject: [PATCH 0757/2608] xfs: don't fail when converting shortform attr to long form during ATTR_REPLACE commit 7b38460dc8e4eafba06c78f8e37099d3b34d473c upstream. Kanda Motohiro reported that expanding a tiny xattr into a large xattr fails on XFS because we remove the tiny xattr from a shortform fork and then try to re-add it after converting the fork to extents format having not removed the ATTR_REPLACE flag. This fails because the attr is no longer present, causing a fs shutdown. This is derived from the patch in his bug report, but we really shouldn't ignore a nonzero retval from the remove call. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199119 Reported-by: kanda.motohiro@gmail.com Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_attr.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 6249c92671de..ea66f04f46f7 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -501,7 +501,14 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) if (args->flags & ATTR_CREATE) return retval; retval = xfs_attr_shortform_remove(args); - ASSERT(retval == 0); + if (retval) + return retval; + /* + * Since we have removed the old attr, clear ATTR_REPLACE so + * that the leaf format add routine won't trip over the attr + * not being around. + */ + args->flags &= ~ATTR_REPLACE; } if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX || From 04d269610ea35656fe4d9609f5a5fa043daa866e Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 2 Dec 2018 10:03:24 -0500 Subject: [PATCH 0758/2608] Revert "wlcore: Add missing PM call for wlcore_cmd_wait_for_event_or_timeout()" This reverts commit e87efc44dd36ba3db59847c418354711ebad779b which was upstream commit 4ec7cece87b3ed21ffcd407c62fb2f151a366bc1. From Dietmar May's report on the stable mailing list (https://www.spinics.net/lists/stable/msg272201.html): > I've run into some problems which appear due to (a) recent patch(es) on > the wlcore wifi driver. > > 4.4.160 - commit 3fdd34643ffc378b5924941fad40352c04610294 > 4.9.131 - commit afeeecc764436f31d4447575bb9007732333818c > > Earlier versions (4.9.130 and 4.4.159 - tested back to 4.4.49) do not > exhibit this problem. It is still present in 4.9.141. > > master as of 4.20.0-rc4 does not exhibit this problem. > > Basically, during client association when in AP mode (running hostapd), > handshake may or may not complete following a noticeable delay. If > successful, then the driver fails consistently in warn_slowpath_null > during disassociation. If unsuccessful, the wifi client attempts multiple > times, sometimes failing repeatedly. I've had clients unable to connect > for 3-5 minutes during testing, with the syslog filled with dozens of > backtraces. syslog details are below. > > I'm working on an embedded device with a TI 3352 ARM processor and a > murata wl1271 module in sdio mode. We're running a fully patched ubuntu > 18.04 ARM build, with a kernel built from kernel.org's stable/linux repo . > Relevant parts of the kernel config are included below. > > The commit message states: > > > /I've only seen this few times with the runtime PM patches enabled so > > this one is probably not needed before that. This seems to work > > currently based on the current PM implementation timer. Let's apply > > this separately though in case others are hitting this issue./ > We're not doing anything explicit with power management. The device is an > IoT edge gateway with battery backup, normally running on wall power. The > battery is currently used solely to shut down the system cleanly to avoid > filesystem corruption. > > The device tree is configured to keep power in suspend; but the device > should never suspend, so in our case, there is no need to call > wl1271_ps_elp_wakeup() or wl1271_ps_elp_sleep(), as occurs in the patch. Signed-off-by: Sasha Levin --- drivers/net/wireless/ti/wlcore/cmd.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c index f48c3f62966d..761cf8573a80 100644 --- a/drivers/net/wireless/ti/wlcore/cmd.c +++ b/drivers/net/wireless/ti/wlcore/cmd.c @@ -35,7 +35,6 @@ #include "wl12xx_80211.h" #include "cmd.h" #include "event.h" -#include "ps.h" #include "tx.h" #include "hw_ops.h" @@ -192,10 +191,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl, timeout_time = jiffies + msecs_to_jiffies(WL1271_EVENT_TIMEOUT); - ret = wl1271_ps_elp_wakeup(wl); - if (ret < 0) - return ret; - do { if (time_after(jiffies, timeout_time)) { wl1271_debug(DEBUG_CMD, "timeout waiting for event %d", @@ -227,7 +222,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl, } while (!event); out: - wl1271_ps_elp_sleep(wl); kfree(events_vector); return ret; } From 3a8a411c927716fad4004a6953a5b2cf988eb2df Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 20 Nov 2018 11:39:56 +0000 Subject: [PATCH 0759/2608] net: skb_scrub_packet(): Scrub offload_fwd_mark [ Upstream commit b5dd186d10ba59e6b5ba60e42b3b083df56df6f3 ] When a packet is trapped and the corresponding SKB marked as already-forwarded, it retains this marking even after it is forwarded across veth links into another bridge. There, since it ingresses the bridge over veth, which doesn't have offload_fwd_mark, it triggers a warning in nbp_switchdev_frame_mark(). Then nbp_switchdev_allowed_egress() decides not to allow egress from this bridge through another veth, because the SKB is already marked, and the mark (of 0) of course matches. Thus the packet is incorrectly blocked. Solve by resetting offload_fwd_mark() in skb_scrub_packet(). That function is called from tunnels and also from veth, and thus catches the cases where traffic is forwarded between bridges and transformed in a way that invalidates the marking. Fixes: 6bc506b4fb06 ("bridge: switchdev: Add forward mark support for stacked devices") Fixes: abf4bb6b63d0 ("skbuff: Add the offload_mr_fwd_mark field") Signed-off-by: Petr Machata Suggested-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c19a118f9f82..4067fa3fcbb2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4882,6 +4882,10 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) nf_reset(skb); nf_reset_trace(skb); +#ifdef CONFIG_NET_SWITCHDEV + skb->offload_fwd_mark = 0; +#endif + if (!xnet) return; From 44e4f3644934b65c91bc05245a9447dde2c7af40 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 21 Nov 2018 16:32:10 +0100 Subject: [PATCH 0760/2608] net: thunderx: set xdp_prog to NULL if bpf_prog_add fails [ Upstream commit 6d0f60b0f8588fd4380ea5df9601e12fddd55ce2 ] Set xdp_prog pointer to NULL if bpf_prog_add fails since that routine reports the error code instead of NULL in case of failure and xdp_prog pointer value is used in the driver to verify if XDP is currently enabled. Moreover report the error code to userspace if nicvf_xdp_setup fails Fixes: 05c773f52b96 ("net: thunderx: Add basic XDP support") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 2237ef8e4344..f13256af8031 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1691,6 +1691,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) bool if_up = netif_running(nic->netdev); struct bpf_prog *old_prog; bool bpf_attached = false; + int ret = 0; /* For now just support only the usual MTU sized frames */ if (prog && (dev->mtu > 1500)) { @@ -1724,8 +1725,12 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) if (nic->xdp_prog) { /* Attach BPF program */ nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1); - if (!IS_ERR(nic->xdp_prog)) + if (!IS_ERR(nic->xdp_prog)) { bpf_attached = true; + } else { + ret = PTR_ERR(nic->xdp_prog); + nic->xdp_prog = NULL; + } } /* Calculate Tx queues needed for XDP and network stack */ @@ -1737,7 +1742,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) netif_trans_update(nic->netdev); } - return 0; + return ret; } static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp) From e5cfda6c5d04a04e9ec9834925e590f7e921c57f Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 22 Nov 2018 14:36:30 +0800 Subject: [PATCH 0761/2608] virtio-net: disable guest csum during XDP set [ Upstream commit e59ff2c49ae16e1d179de679aca81405829aee6c ] We don't disable VIRTIO_NET_F_GUEST_CSUM if XDP was set. This means we can receive partial csumed packets with metadata kept in the vnet_hdr. This may have several side effects: - It could be overridden by header adjustment, thus is might be not correct after XDP processing. - There's no way to pass such metadata information through XDP_REDIRECT to another driver. - XDP does not support checksum offload right now. So simply disable guest csum if possible in this the case of XDP. Fixes: 3f93522ffab2d ("virtio-net: switch off offloads on demand if possible on XDP set") Reported-by: Jesper Dangaard Brouer Cc: Jesper Dangaard Brouer Cc: Pavel Popa Cc: David Ahern Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f528e9ac3413..2ffa7b290591 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -61,7 +61,8 @@ static const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_ECN, - VIRTIO_NET_F_GUEST_UFO + VIRTIO_NET_F_GUEST_UFO, + VIRTIO_NET_F_GUEST_CSUM }; struct virtnet_stats { @@ -1939,9 +1940,6 @@ static int virtnet_clear_guest_offloads(struct virtnet_info *vi) if (!vi->guest_offloads) return 0; - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM)) - offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM; - return virtnet_set_guest_offloads(vi, offloads); } @@ -1951,8 +1949,6 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi) if (!vi->guest_offloads) return 0; - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM)) - offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM; return virtnet_set_guest_offloads(vi, offloads); } From 0435cabc0d6a0f0ebb60589a9ad28d1a3aecd748 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 22 Nov 2018 14:36:31 +0800 Subject: [PATCH 0762/2608] virtio-net: fail XDP set if guest csum is negotiated [ Upstream commit 18ba58e1c234ea1a2d9835ac8c1735d965ce4640 ] We don't support partial csumed packet since its metadata will be lost or incorrect during XDP processing. So fail the XDP set if guest_csum feature is negotiated. Fixes: f600b6905015 ("virtio_net: Add XDP support") Reported-by: Jesper Dangaard Brouer Cc: Jesper Dangaard Brouer Cc: Pavel Popa Cc: David Ahern Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 2ffa7b290591..0e8e3be50332 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1966,8 +1966,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || - virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO))) { - NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first"); + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) { + NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first"); return -EOPNOTSUPP; } From 42412faf1a9356e6967c5c5fc530fe0ab5e32e9f Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 23 Nov 2018 18:28:01 +0100 Subject: [PATCH 0763/2608] net: thunderx: set tso_hdrs pointer to NULL in nicvf_free_snd_queue [ Upstream commit ef2a7cf1d8831535b8991459567b385661eb4a36 ] Reset snd_queue tso_hdrs pointer to NULL in nicvf_free_snd_queue routine since it is used to check if tso dma descriptor queue has been previously allocated. The issue can be triggered with the following reproducer: $ip link set dev enP2p1s0v0 xdpdrv obj xdp_dummy.o $ip link set dev enP2p1s0v0 xdpdrv off [ 341.467649] WARNING: CPU: 74 PID: 2158 at mm/vmalloc.c:1511 __vunmap+0x98/0xe0 [ 341.515010] Hardware name: GIGABYTE H270-T70/MT70-HD0, BIOS T49 02/02/2018 [ 341.521874] pstate: 60400005 (nZCv daif +PAN -UAO) [ 341.526654] pc : __vunmap+0x98/0xe0 [ 341.530132] lr : __vunmap+0x98/0xe0 [ 341.533609] sp : ffff00001c5db860 [ 341.536913] x29: ffff00001c5db860 x28: 0000000000020000 [ 341.542214] x27: ffff810feb5090b0 x26: ffff000017e57000 [ 341.547515] x25: 0000000000000000 x24: 00000000fbd00000 [ 341.552816] x23: 0000000000000000 x22: ffff810feb5090b0 [ 341.558117] x21: 0000000000000000 x20: 0000000000000000 [ 341.563418] x19: ffff000017e57000 x18: 0000000000000000 [ 341.568719] x17: 0000000000000000 x16: 0000000000000000 [ 341.574020] x15: 0000000000000010 x14: ffffffffffffffff [ 341.579321] x13: ffff00008985eb27 x12: ffff00000985eb2f [ 341.584622] x11: ffff0000096b3000 x10: ffff00001c5db510 [ 341.589923] x9 : 00000000ffffffd0 x8 : ffff0000086868e8 [ 341.595224] x7 : 3430303030303030 x6 : 00000000000006ef [ 341.600525] x5 : 00000000003fffff x4 : 0000000000000000 [ 341.605825] x3 : 0000000000000000 x2 : ffffffffffffffff [ 341.611126] x1 : ffff0000096b3728 x0 : 0000000000000038 [ 341.616428] Call trace: [ 341.618866] __vunmap+0x98/0xe0 [ 341.621997] vunmap+0x3c/0x50 [ 341.624961] arch_dma_free+0x68/0xa0 [ 341.628534] dma_direct_free+0x50/0x80 [ 341.632285] nicvf_free_resources+0x160/0x2d8 [nicvf] [ 341.637327] nicvf_config_data_transfer+0x174/0x5e8 [nicvf] [ 341.642890] nicvf_stop+0x298/0x340 [nicvf] [ 341.647066] __dev_close_many+0x9c/0x108 [ 341.650977] dev_close_many+0xa4/0x158 [ 341.654720] rollback_registered_many+0x140/0x530 [ 341.659414] rollback_registered+0x54/0x80 [ 341.663499] unregister_netdevice_queue+0x9c/0xe8 [ 341.668192] unregister_netdev+0x28/0x38 [ 341.672106] nicvf_remove+0xa4/0xa8 [nicvf] [ 341.676280] nicvf_shutdown+0x20/0x30 [nicvf] [ 341.680630] pci_device_shutdown+0x44/0x88 [ 341.684720] device_shutdown+0x144/0x250 [ 341.688640] kernel_restart_prepare+0x44/0x50 [ 341.692986] kernel_restart+0x20/0x68 [ 341.696638] __se_sys_reboot+0x210/0x238 [ 341.700550] __arm64_sys_reboot+0x24/0x30 [ 341.704555] el0_svc_handler+0x94/0x110 [ 341.708382] el0_svc+0x8/0xc [ 341.711252] ---[ end trace 3f4019c8439959c9 ]--- [ 341.715874] page:ffff7e0003ef4000 count:0 mapcount:0 mapping:0000000000000000 index:0x4 [ 341.723872] flags: 0x1fffe000000000() [ 341.727527] raw: 001fffe000000000 ffff7e0003f1a008 ffff7e0003ef4048 0000000000000000 [ 341.735263] raw: 0000000000000004 0000000000000000 00000000ffffffff 0000000000000000 [ 341.742994] page dumped because: VM_BUG_ON_PAGE(page_ref_count(page) == 0) where xdp_dummy.c is a simple bpf program that forwards the incoming frames to the network stack (available here: https://github.com/altoor/xdp_walkthrough_examples/blob/master/sample_1/xdp_dummy.c) Fixes: 05c773f52b96 ("net: thunderx: Add basic XDP support") Fixes: 4863dea3fab0 ("net: Adding support for Cavium ThunderX network controller") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index a3d12dbde95b..09494e1c77c5 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -585,10 +585,12 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) if (!sq->dmem.base) return; - if (sq->tso_hdrs) + if (sq->tso_hdrs) { dma_free_coherent(&nic->pdev->dev, sq->dmem.q_len * TSO_HEADER_SIZE, sq->tso_hdrs, sq->tso_hdrs_phys); + sq->tso_hdrs = NULL; + } /* Free pending skbs in the queue */ smp_rmb(); From 67f6fba765ef8cb19089d476e3c367212a2fbdfd Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 20 Nov 2018 13:00:18 -0500 Subject: [PATCH 0764/2608] packet: copy user buffers before orphan or clone [ Upstream commit 5cd8d46ea1562be80063f53c7c6a5f40224de623 ] tpacket_snd sends packets with user pages linked into skb frags. It notifies that pages can be reused when the skb is released by setting skb->destructor to tpacket_destruct_skb. This can cause data corruption if the skb is orphaned (e.g., on transmit through veth) or cloned (e.g., on mirror to another psock). Create a kernel-private copy of data in these cases, same as tun/tap zerocopy transmission. Reuse that infrastructure: mark the skb as SKBTX_ZEROCOPY_FRAG, which will trigger copy in skb_orphan_frags(_rx). Unlike other zerocopy packets, do not set shinfo destructor_arg to struct ubuf_info. tpacket_destruct_skb already uses that ptr to notify when the original skb is released and a timestamp is recorded. Do not change this timestamp behavior. The ubuf_info->callback is not needed anyway, as no zerocopy notification is expected. Mark destructor_arg as not-a-uarg by setting the lower bit to 1. The resulting value is not a valid ubuf_info pointer, nor a valid tpacket_snd frame address. Add skb_zcopy_.._nouarg helpers for this. The fix relies on features introduced in commit 52267790ef52 ("sock: add MSG_ZEROCOPY"), so can be backported as is only to 4.14. Tested with from `./in_netns.sh ./txring_overwrite` from http://github.com/wdebruij/kerneltools/tests Fixes: 69e3c75f4d54 ("net: TX_RING and packet mmap") Reported-by: Anand H. Krishnan Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 18 +++++++++++++++++- net/packet/af_packet.c | 4 ++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f64e88444082..f6250555ce7d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1288,6 +1288,22 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg) } } +static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val) +{ + skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL); + skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG; +} + +static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb) +{ + return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL; +} + +static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb) +{ + return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL); +} + /* Release a reference on a zerocopy structure */ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) { @@ -1297,7 +1313,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) if (uarg->callback == sock_zerocopy_callback) { uarg->zerocopy = uarg->zerocopy && zerocopy; sock_zerocopy_put(uarg); - } else { + } else if (!skb_zcopy_is_nouarg(skb)) { uarg->callback(uarg, zerocopy); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8d1a7c900393..88d5b2645bb0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2433,7 +2433,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb) void *ph; __u32 ts; - ph = skb_shinfo(skb)->destructor_arg; + ph = skb_zcopy_get_nouarg(skb); packet_dec_pending(&po->tx_ring); ts = __packet_set_timestamp(po, ph, skb); @@ -2499,7 +2499,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->priority = po->sk.sk_priority; skb->mark = po->sk.sk_mark; sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); - skb_shinfo(skb)->destructor_arg = ph.raw; + skb_zcopy_set_nouarg(skb, ph.raw); skb_reserve(skb, hlen); skb_reset_network_header(skb); From 47897682fe041a5898cfd30a5f24d3173e856d5a Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 28 Nov 2018 14:53:19 +0800 Subject: [PATCH 0765/2608] rapidio/rionet: do not free skb before reading its length [ Upstream commit cfc435198f53a6fa1f656d98466b24967ff457d0 ] skb is freed via dev_kfree_skb_any, however, skb->len is read then. This may result in a use-after-free bug. Fixes: e6161d64263 ("rapidio/rionet: rework driver initialization and removal") Signed-off-by: Pan Bian Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/rionet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index e9f101c9bae2..bfbb39f93554 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -216,9 +216,9 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev) * it just report sending a packet to the target * (without actual packet transfer). */ - dev_kfree_skb_any(skb); ndev->stats.tx_packets++; ndev->stats.tx_bytes += skb->len; + dev_kfree_skb_any(skb); } } From eb09c6dbe4bb799ba153c5d729e2d26098a3b410 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 28 Nov 2018 16:20:50 +0100 Subject: [PATCH 0766/2608] s390/qeth: fix length check in SNMP processing [ Upstream commit 9a764c1e59684c0358e16ccaafd870629f2cfe67 ] The response for a SNMP request can consist of multiple parts, which the cmd callback stages into a kernel buffer until all parts have been received. If the callback detects that the staging buffer provides insufficient space, it bails out with error. This processing is buggy for the first part of the response - while it initially checks for a length of 'data_len', it later copies an additional amount of 'offsetof(struct qeth_snmp_cmd, data)' bytes. Fix the calculation of 'data_len' for the first part of the response. This also nicely cleans up the memcpy code. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_core_main.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 169dd7127f9e..69ef5f4060ed 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4545,8 +4545,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card, { struct qeth_ipa_cmd *cmd; struct qeth_arp_query_info *qinfo; - struct qeth_snmp_cmd *snmp; unsigned char *data; + void *snmp_data; __u16 data_len; QETH_CARD_TEXT(card, 3, "snpcmdcb"); @@ -4554,7 +4554,6 @@ static int qeth_snmp_command_cb(struct qeth_card *card, cmd = (struct qeth_ipa_cmd *) sdata; data = (unsigned char *)((char *)cmd - reply->offset); qinfo = (struct qeth_arp_query_info *) reply->param; - snmp = &cmd->data.setadapterparms.data.snmp; if (cmd->hdr.return_code) { QETH_CARD_TEXT_(card, 4, "scer1%x", cmd->hdr.return_code); @@ -4567,10 +4566,15 @@ static int qeth_snmp_command_cb(struct qeth_card *card, return 0; } data_len = *((__u16 *)QETH_IPA_PDU_LEN_PDU1(data)); - if (cmd->data.setadapterparms.hdr.seq_no == 1) - data_len -= (__u16)((char *)&snmp->data - (char *)cmd); - else - data_len -= (__u16)((char *)&snmp->request - (char *)cmd); + if (cmd->data.setadapterparms.hdr.seq_no == 1) { + snmp_data = &cmd->data.setadapterparms.data.snmp; + data_len -= offsetof(struct qeth_ipa_cmd, + data.setadapterparms.data.snmp); + } else { + snmp_data = &cmd->data.setadapterparms.data.snmp.request; + data_len -= offsetof(struct qeth_ipa_cmd, + data.setadapterparms.data.snmp.request); + } /* check if there is enough room in userspace */ if ((qinfo->udata_len - qinfo->udata_offset) < data_len) { @@ -4583,16 +4587,9 @@ static int qeth_snmp_command_cb(struct qeth_card *card, QETH_CARD_TEXT_(card, 4, "sseqn%i", cmd->data.setadapterparms.hdr.seq_no); /*copy entries to user buffer*/ - if (cmd->data.setadapterparms.hdr.seq_no == 1) { - memcpy(qinfo->udata + qinfo->udata_offset, - (char *)snmp, - data_len + offsetof(struct qeth_snmp_cmd, data)); - qinfo->udata_offset += offsetof(struct qeth_snmp_cmd, data); - } else { - memcpy(qinfo->udata + qinfo->udata_offset, - (char *)&snmp->request, data_len); - } + memcpy(qinfo->udata + qinfo->udata_offset, snmp_data, data_len); qinfo->udata_offset += data_len; + /* check if all replies received ... */ QETH_CARD_TEXT_(card, 4, "srtot%i", cmd->data.setadapterparms.hdr.used_total); From 72e5a2bdf753f5ba82ea6bfb7706aabc9587d0cb Mon Sep 17 00:00:00 2001 From: Bernd Eckstein <3erndeckstein@gmail.com> Date: Fri, 23 Nov 2018 13:51:26 +0100 Subject: [PATCH 0767/2608] usbnet: ipheth: fix potential recvmsg bug and recvmsg bug 2 [ Upstream commit 45611c61dd503454b2edae00aabe1e429ec49ebe ] The bug is not easily reproducable, as it may occur very infrequently (we had machines with 20minutes heavy downloading before it occurred) However, on a virual machine (VMWare on Windows 10 host) it occurred pretty frequently (1-2 seconds after a speedtest was started) dev->tx_skb mab be freed via dev_kfree_skb_irq on a callback before it is set. This causes the following problems: - double free of the skb or potential memory leak - in dmesg: 'recvmsg bug' and 'recvmsg bug 2' and eventually general protection fault Example dmesg output: [ 134.841986] ------------[ cut here ]------------ [ 134.841987] recvmsg bug: copied 9C24A555 seq 9C24B557 rcvnxt 9C25A6B3 fl 0 [ 134.841993] WARNING: CPU: 7 PID: 2629 at /build/linux-hwe-On9fm7/linux-hwe-4.15.0/net/ipv4/tcp.c:1865 tcp_recvmsg+0x44d/0xab0 [ 134.841994] Modules linked in: ipheth(OE) kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd glue_helper cryptd vmw_balloon intel_rapl_perf joydev input_leds serio_raw vmw_vsock_vmci_transport vsock shpchp i2c_piix4 mac_hid binfmt_misc vmw_vmci parport_pc ppdev lp parport autofs4 vmw_pvscsi vmxnet3 hid_generic usbhid hid vmwgfx ttm drm_kms_helper syscopyarea sysfillrect mptspi mptscsih sysimgblt ahci psmouse fb_sys_fops pata_acpi mptbase libahci e1000 drm scsi_transport_spi [ 134.842046] CPU: 7 PID: 2629 Comm: python Tainted: G W OE 4.15.0-34-generic #37~16.04.1-Ubuntu [ 134.842046] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017 [ 134.842048] RIP: 0010:tcp_recvmsg+0x44d/0xab0 [ 134.842048] RSP: 0018:ffffa6630422bcc8 EFLAGS: 00010286 [ 134.842049] RAX: 0000000000000000 RBX: ffff997616f4f200 RCX: 0000000000000006 [ 134.842049] RDX: 0000000000000007 RSI: 0000000000000082 RDI: ffff9976257d6490 [ 134.842050] RBP: ffffa6630422bd98 R08: 0000000000000001 R09: 000000000004bba4 [ 134.842050] R10: 0000000001e00c6f R11: 000000000004bba4 R12: ffff99760dee3000 [ 134.842051] R13: 0000000000000000 R14: ffff99760dee3514 R15: 0000000000000000 [ 134.842051] FS: 00007fe332347700(0000) GS:ffff9976257c0000(0000) knlGS:0000000000000000 [ 134.842052] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 134.842053] CR2: 0000000001e41000 CR3: 000000020e9b4006 CR4: 00000000003606e0 [ 134.842055] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 134.842055] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 134.842057] Call Trace: [ 134.842060] ? aa_sk_perm+0x53/0x1a0 [ 134.842064] inet_recvmsg+0x51/0xc0 [ 134.842066] sock_recvmsg+0x43/0x50 [ 134.842070] SYSC_recvfrom+0xe4/0x160 [ 134.842072] ? __schedule+0x3de/0x8b0 [ 134.842075] ? ktime_get_ts64+0x4c/0xf0 [ 134.842079] SyS_recvfrom+0xe/0x10 [ 134.842082] do_syscall_64+0x73/0x130 [ 134.842086] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 134.842086] RIP: 0033:0x7fe331f5a81d [ 134.842088] RSP: 002b:00007ffe8da98398 EFLAGS: 00000246 ORIG_RAX: 000000000000002d [ 134.842090] RAX: ffffffffffffffda RBX: ffffffffffffffff RCX: 00007fe331f5a81d [ 134.842094] RDX: 00000000000003fb RSI: 0000000001e00874 RDI: 0000000000000003 [ 134.842095] RBP: 00007fe32f642c70 R08: 0000000000000000 R09: 0000000000000000 [ 134.842097] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe332347698 [ 134.842099] R13: 0000000001b7e0a0 R14: 0000000001e00874 R15: 0000000000000000 [ 134.842103] Code: 24 fd ff ff e9 cc fe ff ff 48 89 d8 41 8b 8c 24 10 05 00 00 44 8b 45 80 48 c7 c7 08 bd 59 8b 48 89 85 68 ff ff ff e8 b3 c4 7d ff <0f> 0b 48 8b 85 68 ff ff ff e9 e9 fe ff ff 41 8b 8c 24 10 05 00 [ 134.842126] ---[ end trace b7138fc08c83147f ]--- [ 134.842144] general protection fault: 0000 [#1] SMP PTI [ 134.842145] Modules linked in: ipheth(OE) kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd glue_helper cryptd vmw_balloon intel_rapl_perf joydev input_leds serio_raw vmw_vsock_vmci_transport vsock shpchp i2c_piix4 mac_hid binfmt_misc vmw_vmci parport_pc ppdev lp parport autofs4 vmw_pvscsi vmxnet3 hid_generic usbhid hid vmwgfx ttm drm_kms_helper syscopyarea sysfillrect mptspi mptscsih sysimgblt ahci psmouse fb_sys_fops pata_acpi mptbase libahci e1000 drm scsi_transport_spi [ 134.842161] CPU: 7 PID: 2629 Comm: python Tainted: G W OE 4.15.0-34-generic #37~16.04.1-Ubuntu [ 134.842162] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017 [ 134.842164] RIP: 0010:tcp_close+0x2c6/0x440 [ 134.842165] RSP: 0018:ffffa6630422bde8 EFLAGS: 00010202 [ 134.842167] RAX: 0000000000000000 RBX: ffff99760dee3000 RCX: 0000000180400034 [ 134.842168] RDX: 5c4afd407207a6c4 RSI: ffffe868495bd300 RDI: ffff997616f4f200 [ 134.842169] RBP: ffffa6630422be08 R08: 0000000016f4d401 R09: 0000000180400034 [ 134.842169] R10: ffffa6630422bd98 R11: 0000000000000000 R12: 000000000000600c [ 134.842170] R13: 0000000000000000 R14: ffff99760dee30c8 R15: ffff9975bd44fe00 [ 134.842171] FS: 00007fe332347700(0000) GS:ffff9976257c0000(0000) knlGS:0000000000000000 [ 134.842173] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 134.842174] CR2: 0000000001e41000 CR3: 000000020e9b4006 CR4: 00000000003606e0 [ 134.842177] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 134.842178] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 134.842179] Call Trace: [ 134.842181] inet_release+0x42/0x70 [ 134.842183] __sock_release+0x42/0xb0 [ 134.842184] sock_close+0x15/0x20 [ 134.842187] __fput+0xea/0x220 [ 134.842189] ____fput+0xe/0x10 [ 134.842191] task_work_run+0x8a/0xb0 [ 134.842193] exit_to_usermode_loop+0xc4/0xd0 [ 134.842195] do_syscall_64+0xf4/0x130 [ 134.842197] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 134.842197] RIP: 0033:0x7fe331f5a560 [ 134.842198] RSP: 002b:00007ffe8da982e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000003 [ 134.842200] RAX: 0000000000000000 RBX: 00007fe32f642c70 RCX: 00007fe331f5a560 [ 134.842201] RDX: 00000000008f5320 RSI: 0000000001cd4b50 RDI: 0000000000000003 [ 134.842202] RBP: 00007fe32f6500f8 R08: 000000000000003c R09: 00000000009343c0 [ 134.842203] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe32f6500d0 [ 134.842204] R13: 00000000008f5320 R14: 00000000008f5320 R15: 0000000001cd4770 [ 134.842205] Code: c8 00 00 00 45 31 e4 49 39 fe 75 4d eb 50 83 ab d8 00 00 00 01 48 8b 17 48 8b 47 08 48 c7 07 00 00 00 00 48 c7 47 08 00 00 00 00 <48> 89 42 08 48 89 10 0f b6 57 34 8b 47 2c 2b 47 28 83 e2 01 80 [ 134.842226] RIP: tcp_close+0x2c6/0x440 RSP: ffffa6630422bde8 [ 134.842227] ---[ end trace b7138fc08c831480 ]--- The proposed patch eliminates a potential racing condition. Before, usb_submit_urb was called and _after_ that, the skb was attached (dev->tx_skb). So, on a callback it was possible, however unlikely that the skb was freed before it was set. That way (because dev->tx_skb was not set to NULL after it was freed), it could happen that a skb from a earlier transmission was freed a second time (and the skb we should have freed did not get freed at all) Now we free the skb directly in ipheth_tx(). It is not passed to the callback anymore, eliminating the posibility of a double free of the same skb. Depending on the retval of usb_submit_urb() we use dev_kfree_skb_any() respectively dev_consume_skb_any() to free the skb. Signed-off-by: Oliver Zweigle Signed-off-by: Bernd Eckstein <3ernd.Eckstein@gmail.com> Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/ipheth.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index d49c7103085e..aabbcfb6e6da 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -140,7 +140,6 @@ struct ipheth_device { struct usb_device *udev; struct usb_interface *intf; struct net_device *net; - struct sk_buff *tx_skb; struct urb *tx_urb; struct urb *rx_urb; unsigned char *tx_buf; @@ -229,6 +228,7 @@ static void ipheth_rcvbulk_callback(struct urb *urb) case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: + case -EPROTO: return; case 0: break; @@ -280,7 +280,6 @@ static void ipheth_sndbulk_callback(struct urb *urb) dev_err(&dev->intf->dev, "%s: urb status: %d\n", __func__, status); - dev_kfree_skb_irq(dev->tx_skb); netif_wake_queue(dev->net); } @@ -410,7 +409,7 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net) if (skb->len > IPHETH_BUF_SIZE) { WARN(1, "%s: skb too large: %d bytes\n", __func__, skb->len); dev->net->stats.tx_dropped++; - dev_kfree_skb_irq(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -430,12 +429,11 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net) dev_err(&dev->intf->dev, "%s: usb_submit_urb: %d\n", __func__, retval); dev->net->stats.tx_errors++; - dev_kfree_skb_irq(skb); + dev_kfree_skb_any(skb); } else { - dev->tx_skb = skb; - dev->net->stats.tx_packets++; dev->net->stats.tx_bytes += skb->len; + dev_consume_skb_any(skb); netif_stop_queue(net); } From e5d981df9a1d3f13dfff083ab0578daedbcd9e8a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jan 2018 22:53:28 +0100 Subject: [PATCH 0768/2608] sched/core: Fix cpu.max vs. cpuhotplug deadlock commit ce48c146495a1a50e48cdbfbfaba3e708be7c07c upstream Tejun reported the following cpu-hotplug lock (percpu-rwsem) read recursion: tg_set_cfs_bandwidth() get_online_cpus() cpus_read_lock() cfs_bandwidth_usage_inc() static_key_slow_inc() cpus_read_lock() Reported-by: Tejun Heo Tested-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180122215328.GP3397@worktop Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/linux/jump_label.h | 7 +++++++ kernel/jump_label.c | 12 +++++++++--- kernel/sched/fair.c | 4 ++-- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 3b7675bcca64..cd0d2270998f 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -160,6 +160,8 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry, extern int jump_label_text_reserved(void *start, void *end); extern void static_key_slow_inc(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); +extern void static_key_slow_inc_cpuslocked(struct static_key *key); +extern void static_key_slow_dec_cpuslocked(struct static_key *key); extern void jump_label_apply_nops(struct module *mod); extern int static_key_count(struct static_key *key); extern void static_key_enable(struct static_key *key); @@ -222,6 +224,9 @@ static inline void static_key_slow_dec(struct static_key *key) atomic_dec(&key->enabled); } +#define static_key_slow_inc_cpuslocked(key) static_key_slow_inc(key) +#define static_key_slow_dec_cpuslocked(key) static_key_slow_dec(key) + static inline int jump_label_text_reserved(void *start, void *end) { return 0; @@ -416,6 +421,8 @@ extern bool ____wrong_branch_error(void); #define static_branch_inc(x) static_key_slow_inc(&(x)->key) #define static_branch_dec(x) static_key_slow_dec(&(x)->key) +#define static_branch_inc_cpuslocked(x) static_key_slow_inc_cpuslocked(&(x)->key) +#define static_branch_dec_cpuslocked(x) static_key_slow_dec_cpuslocked(&(x)->key) /* * Normal usage; boolean enable/disable. diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 7c3774ac1d51..70be35a19be2 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -79,7 +79,7 @@ int static_key_count(struct static_key *key) } EXPORT_SYMBOL_GPL(static_key_count); -static void static_key_slow_inc_cpuslocked(struct static_key *key) +void static_key_slow_inc_cpuslocked(struct static_key *key) { int v, v1; @@ -180,7 +180,7 @@ void static_key_disable(struct static_key *key) } EXPORT_SYMBOL_GPL(static_key_disable); -static void static_key_slow_dec_cpuslocked(struct static_key *key, +static void __static_key_slow_dec_cpuslocked(struct static_key *key, unsigned long rate_limit, struct delayed_work *work) { @@ -211,7 +211,7 @@ static void __static_key_slow_dec(struct static_key *key, struct delayed_work *work) { cpus_read_lock(); - static_key_slow_dec_cpuslocked(key, rate_limit, work); + __static_key_slow_dec_cpuslocked(key, rate_limit, work); cpus_read_unlock(); } @@ -229,6 +229,12 @@ void static_key_slow_dec(struct static_key *key) } EXPORT_SYMBOL_GPL(static_key_slow_dec); +void static_key_slow_dec_cpuslocked(struct static_key *key) +{ + STATIC_KEY_CHECK_USE(); + __static_key_slow_dec_cpuslocked(key, 0, NULL); +} + void static_key_slow_dec_deferred(struct static_key_deferred *key) { STATIC_KEY_CHECK_USE(); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2d4d79420e36..7240bb4a4090 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4040,12 +4040,12 @@ static inline bool cfs_bandwidth_used(void) void cfs_bandwidth_usage_inc(void) { - static_key_slow_inc(&__cfs_bandwidth_used); + static_key_slow_inc_cpuslocked(&__cfs_bandwidth_used); } void cfs_bandwidth_usage_dec(void) { - static_key_slow_dec(&__cfs_bandwidth_used); + static_key_slow_dec_cpuslocked(&__cfs_bandwidth_used); } #else /* HAVE_JUMP_LABEL */ static bool cfs_bandwidth_used(void) From da06d6d14a0a14d398b1e7c8024b090e40f4ec89 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 1 Jun 2018 10:59:19 -0400 Subject: [PATCH 0769/2608] x86/bugs: Add AMD's variant of SSB_NO commit 24809860012e0130fbafe536709e08a22b3e959e upstream The AMD document outlining the SSBD handling 124441_AMD64_SpeculativeStoreBypassDisable_Whitepaper_final.pdf mentions that the CPUID 8000_0008.EBX[26] will mean that the speculative store bypass disable is no longer needed. A copy of this document is available at: https://bugzilla.kernel.org/show_bug.cgi?id=199889 Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Cc: Tom Lendacky Cc: Janakarajan Natarajan Cc: kvm@vger.kernel.org Cc: andrew.cooper3@citrix.com Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Borislav Petkov Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180601145921.9500-2-konrad.wilk@oracle.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/common.c | 3 ++- arch/x86/kvm/cpuid.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 673d6e988196..21a653dfc01b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -285,6 +285,7 @@ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 96643e2c75b8..065582692446 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -958,7 +958,8 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_SSB_NO)) + !(ia32_cap & ARCH_CAP_SSB_NO) && + !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (x86_match_cpu(cpu_no_speculation)) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index d1f5c744142b..95a98397beca 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -367,7 +367,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD); + F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD) | F(AMD_SSB_NO); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = From 54b65f8eaf75872052565976f524e91133778c54 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 1 Jun 2018 10:59:20 -0400 Subject: [PATCH 0770/2608] x86/bugs: Add AMD's SPEC_CTRL MSR usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6ac2f49edb1ef5446089c7c660017732886d62d6 upstream The AMD document outlining the SSBD handling 124441_AMD64_SpeculativeStoreBypassDisable_Whitepaper_final.pdf mentions that if CPUID 8000_0008.EBX[24] is set we should be using the SPEC_CTRL MSR (0x48) over the VIRT SPEC_CTRL MSR (0xC001_011f) for speculative store bypass disable. This in effect means we should clear the X86_FEATURE_VIRT_SSBD flag so that we would prefer the SPEC_CTRL MSR. See the document titled: 124441_AMD64_SpeculativeStoreBypassDisable_Whitepaper_final.pdf A copy of this document is available at https://bugzilla.kernel.org/show_bug.cgi?id=199889 Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Cc: Tom Lendacky Cc: Janakarajan Natarajan Cc: kvm@vger.kernel.org Cc: KarimAllah Ahmed Cc: andrew.cooper3@citrix.com Cc: Joerg Roedel Cc: Radim Krčmář Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Paolo Bonzini Cc: Borislav Petkov Cc: David Woodhouse Cc: Kees Cook Link: https://lkml.kernel.org/r/20180601145921.9500-3-konrad.wilk@oracle.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 12 +++++++----- arch/x86/kernel/cpu/common.c | 6 ++++++ arch/x86/kvm/cpuid.c | 10 ++++++++-- arch/x86/kvm/svm.c | 8 +++++--- 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 21a653dfc01b..7d910827126b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -284,6 +284,7 @@ #define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e92aedd93806..fd79e16dab07 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -532,18 +532,20 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) if (mode == SPEC_STORE_BYPASS_DISABLE) { setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); /* - * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses - * a completely different MSR and bit dependent on family. + * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may + * use a completely different MSR and bit dependent on family. */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: + case X86_VENDOR_AMD: + if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { + x86_amd_ssb_disable(); + break; + } x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); break; - case X86_VENDOR_AMD: - x86_amd_ssb_disable(); - break; } } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 065582692446..51e49f6fe8e1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -760,6 +760,12 @@ static void init_speculation_control(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_STIBP); set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); } + + if (cpu_has(c, X86_FEATURE_AMD_SSBD)) { + set_cpu_cap(c, X86_FEATURE_SSBD); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD); + } } void get_cpu_cap(struct cpuinfo_x86 *c) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 95a98397beca..bbcd69c76d96 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -367,7 +367,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD) | F(AMD_SSB_NO); + F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | + F(AMD_SSB_NO); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -649,7 +650,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ebx |= F(VIRT_SSBD); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); - if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + /* + * The preference is to use SPEC CTRL MSR instead of the + * VIRT_SPEC MSR. + */ + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) && + !boot_cpu_has(X86_FEATURE_AMD_SSBD)) entry->ebx |= F(VIRT_SSBD); break; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f6bebcec60b4..e9dbd62bb46e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3644,7 +3644,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) return 1; msr_info->data = svm->spec_ctrl; @@ -3749,11 +3750,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) return 1; svm->spec_ctrl = data; From 6d2533a6818274602ef1a783594f2d6fc65d8fc6 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 1 Jun 2018 10:59:21 -0400 Subject: [PATCH 0771/2608] x86/bugs: Switch the selection of mitigation from CPU vendor to CPU features commit 108fab4b5c8f12064ef86e02cb0459992affb30f upstream Both AMD and Intel can have SPEC_CTRL_MSR for SSBD. However AMD also has two more other ways of doing it - which are !SPEC_CTRL MSR ways. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Cc: Kees Cook Cc: kvm@vger.kernel.org Cc: KarimAllah Ahmed Cc: andrew.cooper3@citrix.com Cc: "H. Peter Anvin" Cc: Borislav Petkov Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180601145921.9500-4-konrad.wilk@oracle.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fd79e16dab07..63a9f3fd3a37 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -535,17 +535,12 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may * use a completely different MSR and bit dependent on family. */ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - case X86_VENDOR_AMD: - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { - x86_amd_ssb_disable(); - break; - } + if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + x86_amd_ssb_disable(); + else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - break; } } From 0071cdff923922f85a8204fd9bff56a44af2b9a3 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 2 Jul 2018 16:35:53 -0500 Subject: [PATCH 0772/2608] x86/bugs: Update when to check for the LS_CFG SSBD mitigation commit 845d382bb15c6e7dc5026c0ff919c5b13fc7e11b upstream If either the X86_FEATURE_AMD_SSBD or X86_FEATURE_VIRT_SSBD features are present, then there is no need to perform the check for the LS_CFG SSBD mitigation support. Signed-off-by: Tom Lendacky Cc: Borislav Petkov Cc: David Woodhouse Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180702213553.29202.21089.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index dda741bd5789..7e03515662c0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -554,7 +554,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) nodes_per_socket = ((value >> 3) & 7) + 1; } - if (c->x86 >= 0x15 && c->x86 <= 0x17) { + if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && + !boot_cpu_has(X86_FEATURE_VIRT_SSBD) && + c->x86 >= 0x15 && c->x86 <= 0x17) { unsigned int bit; switch (c->x86) { From 82647e461639a7cae7beaf2391f6b58f69b81110 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 2 Jul 2018 16:36:02 -0500 Subject: [PATCH 0773/2608] x86/bugs: Fix the AMD SSBD usage of the SPEC_CTRL MSR commit 612bc3b3d4be749f73a513a17d9b3ee1330d3487 upstream On AMD, the presence of the MSR_SPEC_CTRL feature does not imply that the SSBD mitigation support should use the SPEC_CTRL MSR. Other features could have caused the MSR_SPEC_CTRL feature to be set, while a different SSBD mitigation option is in place. Update the SSBD support to check for the actual SSBD features that will use the SPEC_CTRL MSR. Signed-off-by: Tom Lendacky Cc: Borislav Petkov Cc: David Woodhouse Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 6ac2f49edb1e ("x86/bugs: Add AMD's SPEC_CTRL MSR usage") Link: http://lkml.kernel.org/r/20180702213602.29202.33151.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 63a9f3fd3a37..50941119e13e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -166,7 +166,8 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) hostval |= ssbd_tif_to_spec_ctrl(ti->flags); if (hostval != guestval) { @@ -535,9 +536,10 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may * use a completely different MSR and bit dependent on family. */ - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) && + !static_cpu_has(X86_FEATURE_AMD_SSBD)) { x86_amd_ssb_disable(); - else { + } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); From 300a6f27dd8277843e39b37cca5e0c5b203743cb Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 25 Sep 2018 14:38:55 +0200 Subject: [PATCH 0774/2608] x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation commit 53c613fe6349994f023245519265999eed75957f upstream STIBP is a feature provided by certain Intel ucodes / CPUs. This feature (once enabled) prevents cross-hyperthread control of decisions made by indirect branch predictors. Enable this feature if - the CPU is vulnerable to spectre v2 - the CPU supports SMT and has SMT siblings online - spectre_v2 mitigation autoselection is enabled (default) After some previous discussion, this leaves STIBP on all the time, as wrmsr on crossing kernel boundary is a no-no. This could perhaps later be a bit more optimized (like disabling it in NOHZ, experiment with disabling it in idle, etc) if needed. Note that the synchronization of the mask manipulation via newly added spec_ctrl_mutex is currently not strictly needed, as the only updater is already being serialized by cpu_add_remove_lock, but let's make this a little bit more future-proof. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: "WoodhouseDavid" Cc: Andi Kleen Cc: Tim Chen Cc: "SchauflerCasey" Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1809251438240.15880@cbobk.fhfr.pm Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 57 ++++++++++++++++++++++++++++++++++---- kernel/cpu.c | 11 +++++++- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 50941119e13e..ef02230221c5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -34,12 +34,10 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); -/* - * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any - * writes to SPEC_CTRL contain whatever reserved bits have been set. - */ -u64 __ro_after_init x86_spec_ctrl_base; +/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ +u64 x86_spec_ctrl_base; EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); +static DEFINE_MUTEX(spec_ctrl_mutex); /* * The vendor and possibly platform specific bits which can be modified in @@ -324,6 +322,46 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } +static bool stibp_needed(void) +{ + if (spectre_v2_enabled == SPECTRE_V2_NONE) + return false; + + if (!boot_cpu_has(X86_FEATURE_STIBP)) + return false; + + return true; +} + +static void update_stibp_msr(void *info) +{ + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); +} + +void arch_smt_update(void) +{ + u64 mask; + + if (!stibp_needed()) + return; + + mutex_lock(&spec_ctrl_mutex); + mask = x86_spec_ctrl_base; + if (cpu_smt_control == CPU_SMT_ENABLED) + mask |= SPEC_CTRL_STIBP; + else + mask &= ~SPEC_CTRL_STIBP; + + if (mask != x86_spec_ctrl_base) { + pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", + cpu_smt_control == CPU_SMT_ENABLED ? + "Enabling" : "Disabling"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); + } + mutex_unlock(&spec_ctrl_mutex); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -423,6 +461,9 @@ specv2_set_mode: setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } + + /* Enable STIBP if appropriate */ + arch_smt_update(); } #undef pr_fmt @@ -813,6 +854,8 @@ static ssize_t l1tf_show_state(char *buf) static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { + int ret; + if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); @@ -827,10 +870,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + ret = sprintf(buf, "%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", spectre_v2_module_string()); + return ret; case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); diff --git a/kernel/cpu.c b/kernel/cpu.c index f3f389e33343..90cf6a04e08a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2045,6 +2045,12 @@ static void cpuhp_online_cpu_device(unsigned int cpu) kobject_uevent(&dev->kobj, KOBJ_ONLINE); } +/* + * Architectures that need SMT-specific errata handling during SMT hotplug + * should override this. + */ +void __weak arch_smt_update(void) { }; + static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { int cpu, ret = 0; @@ -2071,8 +2077,10 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) */ cpuhp_offline_cpu_device(cpu); } - if (!ret) + if (!ret) { cpu_smt_control = ctrlval; + arch_smt_update(); + } cpu_maps_update_done(); return ret; } @@ -2083,6 +2091,7 @@ static int cpuhp_smt_enable(void) cpu_maps_update_begin(); cpu_smt_control = CPU_SMT_ENABLED; + arch_smt_update(); for_each_present_cpu(cpu) { /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) From 4741e31931b27b9eba1e9df02d781a2f808b7d82 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 25 Sep 2018 14:38:18 +0200 Subject: [PATCH 0775/2608] x86/speculation: Apply IBPB more strictly to avoid cross-process data leak commit dbfe2953f63c640463c630746cd5d9de8b2f63ae upstream Currently, IBPB is only issued in cases when switching into a non-dumpable process, the rationale being to protect such 'important and security sensitive' processess (such as GPG) from data leaking into a different userspace process via spectre v2. This is however completely insufficient to provide proper userspace-to-userpace spectrev2 protection, as any process can poison branch buffers before being scheduled out, and the newly scheduled process immediately becomes spectrev2 victim. In order to minimize the performance impact (for usecases that do require spectrev2 protection), issue the barrier only in cases when switching between processess where the victim can't be ptraced by the potential attacker (as in such cases, the attacker doesn't have to bother with branch buffers at all). [ tglx: Split up PTRACE_MODE_NOACCESS_CHK into PTRACE_MODE_SCHED and PTRACE_MODE_IBPB to be able to do ptrace() context tracking reasonably fine-grained ] Fixes: 18bf3c3ea8 ("x86/speculation: Use Indirect Branch Prediction Barrier in context switch") Originally-by: Tim Chen Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: "WoodhouseDavid" Cc: Andi Kleen Cc: "SchauflerCasey" Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1809251437340.15880@cbobk.fhfr.pm Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/tlb.c | 31 ++++++++++++++++++++----------- include/linux/ptrace.h | 21 +++++++++++++++++++-- kernel/ptrace.c | 10 ++++++++++ 3 files changed, 49 insertions(+), 13 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 83a3f4c935fc..9e943de9d628 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -180,6 +181,19 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) } } +static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id) +{ + /* + * Check if the current (previous) task has access to the memory + * of the @tsk (next) task. If access is denied, make sure to + * issue a IBPB to stop user->user Spectre-v2 attacks. + * + * Note: __ptrace_may_access() returns 0 or -ERRNO. + */ + return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id && + ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB)); +} + void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -256,18 +270,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * one process from doing Spectre-v2 attacks on another. * * As an optimization, flush indirect branches only when - * switching into processes that disable dumping. This - * protects high value processes like gpg, without having - * too high performance overhead. IBPB is *expensive*! - * - * This will not flush branches when switching into kernel - * threads. It will also not flush if we switch to idle - * thread and back to the same process. It will flush if we - * switch to a different non-dumpable process. + * switching into a processes that can't be ptrace by the + * current one (as in such case, attacker has much more + * convenient way how to tamper with the next process than + * branch buffer poisoning). */ - if (tsk && tsk->mm && - tsk->mm->context.ctx_id != last_ctx_id && - get_dumpable(tsk->mm) != SUID_DUMP_USER) + if (static_cpu_has(X86_FEATURE_USE_IBPB) && + ibpb_needed(tsk, last_ctx_id)) indirect_branch_prediction_barrier(); if (IS_ENABLED(CONFIG_VMAP_STACK)) { diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 919b2a0b0307..b1d43b80abfa 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -62,14 +62,17 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); #define PTRACE_MODE_READ 0x01 #define PTRACE_MODE_ATTACH 0x02 #define PTRACE_MODE_NOAUDIT 0x04 -#define PTRACE_MODE_FSCREDS 0x08 -#define PTRACE_MODE_REALCREDS 0x10 +#define PTRACE_MODE_FSCREDS 0x08 +#define PTRACE_MODE_REALCREDS 0x10 +#define PTRACE_MODE_SCHED 0x20 +#define PTRACE_MODE_IBPB 0x40 /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) +#define PTRACE_MODE_SPEC_IBPB (PTRACE_MODE_ATTACH_REALCREDS | PTRACE_MODE_IBPB) /** * ptrace_may_access - check whether the caller is permitted to access @@ -87,6 +90,20 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); */ extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); +/** + * ptrace_may_access - check whether the caller is permitted to access + * a target task. + * @task: target task + * @mode: selects type of access and caller credentials + * + * Returns true on success, false on denial. + * + * Similar to ptrace_may_access(). Only to be called from context switch + * code. Does not call into audit and the regular LSM hooks due to locking + * constraints. + */ +extern bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode); + static inline int ptrace_reparented(struct task_struct *child) { return !same_thread_group(child->real_parent, child->parent); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 84b1367935e4..d7be2159ac09 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -261,6 +261,9 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) { + if (mode & PTRACE_MODE_SCHED) + return false; + if (mode & PTRACE_MODE_NOAUDIT) return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); else @@ -328,9 +331,16 @@ ok: !ptrace_has_cap(mm->user_ns, mode))) return -EPERM; + if (mode & PTRACE_MODE_SCHED) + return 0; return security_ptrace_access_check(task, mode); } +bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode) +{ + return __ptrace_may_access(task, mode | PTRACE_MODE_SCHED); +} + bool ptrace_may_access(struct task_struct *task, unsigned int mode) { int err; From 0d55dce78a0e96f0909f4b7395887e84ba637254 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 25 Sep 2018 14:39:28 +0200 Subject: [PATCH 0776/2608] x86/speculation: Propagate information about RSB filling mitigation to sysfs commit bb4b3b7762735cdaba5a40fd94c9303d9ffa147a upstream If spectrev2 mitigation has been enabled, RSB is filled on context switch in order to protect from various classes of spectrev2 attacks. If this mitigation is enabled, say so in sysfs for spectrev2. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: "WoodhouseDavid" Cc: Andi Kleen Cc: Tim Chen Cc: "SchauflerCasey" Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1809251438580.15880@cbobk.fhfr.pm Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ef02230221c5..700d1149601e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -870,10 +870,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - ret = sprintf(buf, "%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + ret = sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", spectre_v2_module_string()); return ret; From 46dfe55fbee9be96f70482f3053b084fad3858d0 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Tue, 18 Sep 2018 07:45:00 -0700 Subject: [PATCH 0777/2608] x86/speculation: Add RETPOLINE_AMD support to the inline asm CALL_NOSPEC variant commit 0cbb76d6285794f30953bfa3ab831714b59dd700 upstream ..so that they match their asm counterpart. Add the missing ANNOTATE_NOSPEC_ALTERNATIVE in CALL_NOSPEC, while at it. Signed-off-by: Zhenzhong Duan Signed-off-by: Borislav Petkov Cc: Daniel Borkmann Cc: David Woodhouse Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Wang YanQing Cc: dhaval.giani@oracle.com Cc: srinivas.eeda@oracle.com Link: http://lkml.kernel.org/r/c3975665-173e-4d70-8dee-06c926ac26ee@default Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 1b4132161c1f..7827f17e9a8a 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -170,11 +170,15 @@ */ # define CALL_NOSPEC \ ANNOTATE_NOSPEC_ALTERNATIVE \ - ALTERNATIVE( \ + ALTERNATIVE_2( \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ "call __x86_indirect_thunk_%V[thunk_target]\n", \ - X86_FEATURE_RETPOLINE) + X86_FEATURE_RETPOLINE, \ + "lfence;\n" \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ + X86_FEATURE_RETPOLINE_AMD) # define THUNK_TARGET(addr) [thunk_target] "r" (addr) #elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) @@ -184,7 +188,8 @@ * here, anyway. */ # define CALL_NOSPEC \ - ALTERNATIVE( \ + ANNOTATE_NOSPEC_ALTERNATIVE \ + ALTERNATIVE_2( \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ " jmp 904f;\n" \ @@ -199,7 +204,11 @@ " ret;\n" \ " .align 16\n" \ "904: call 901b;\n", \ - X86_FEATURE_RETPOLINE) + X86_FEATURE_RETPOLINE, \ + "lfence;\n" \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ + X86_FEATURE_RETPOLINE_AMD) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #else /* No retpoline for C / inline asm */ From a9c90037af800bcc74bbdac338355c7f331bd913 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Fri, 2 Nov 2018 01:45:41 -0700 Subject: [PATCH 0778/2608] x86/retpoline: Make CONFIG_RETPOLINE depend on compiler support commit 4cd24de3a0980bf3100c9dcb08ef65ca7c31af48 upstream Since retpoline capable compilers are widely available, make CONFIG_RETPOLINE hard depend on the compiler capability. Break the build when CONFIG_RETPOLINE is enabled and the compiler does not support it. Emit an error message in that case: "arch/x86/Makefile:226: *** You are building kernel with non-retpoline compiler, please update your compiler.. Stop." [dwmw: Fail the build with non-retpoline compiler] Suggested-by: Peter Zijlstra Signed-off-by: Zhenzhong Duan Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: Borislav Petkov Cc: Daniel Borkmann Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Andy Lutomirski Cc: Masahiro Yamada Cc: Michal Marek Cc: Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/cca0cb20-f9e2-4094-840b-fb0f8810cd34@default Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 4 ---- arch/x86/Makefile | 5 +++-- arch/x86/include/asm/nospec-branch.h | 10 ++++++---- arch/x86/kernel/cpu/bugs.c | 2 +- scripts/Makefile.build | 2 -- 5 files changed, 10 insertions(+), 13 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2af0af33362a..026c2f2ac702 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -440,10 +440,6 @@ config RETPOLINE branches. Requires a compiler with -mindirect-branch=thunk-extern support for full protection. The kernel may run slower. - Without compiler support, at least indirect branches in assembler - code are eliminated. Since this includes the syscall entry path, - it is not entirely pointless. - config INTEL_RDT bool "Intel Resource Director Technology support" default n diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1c4d012550ec..ce3658dd98e8 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -241,9 +241,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre ifdef CONFIG_RETPOLINE -ifneq ($(RETPOLINE_CFLAGS),) - KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE +ifeq ($(RETPOLINE_CFLAGS),) + $(error You are building kernel with non-retpoline compiler, please update your compiler.) endif + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) endif archscripts: scripts_basic diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 7827f17e9a8a..690c838e3a7e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -162,11 +162,12 @@ _ASM_PTR " 999b\n\t" \ ".popsection\n\t" -#if defined(CONFIG_X86_64) && defined(RETPOLINE) +#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_X86_64 /* - * Since the inline asm uses the %V modifier which is only in newer GCC, - * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE. + * Inline asm uses the %V modifier which is only in newer GCC + * which is ensured when CONFIG_RETPOLINE is defined. */ # define CALL_NOSPEC \ ANNOTATE_NOSPEC_ALTERNATIVE \ @@ -181,7 +182,7 @@ X86_FEATURE_RETPOLINE_AMD) # define THUNK_TARGET(addr) [thunk_target] "r" (addr) -#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) +#else /* CONFIG_X86_32 */ /* * For i386 we use the original ret-equivalent retpoline, because * otherwise we'll run out of registers. We don't care about CET @@ -211,6 +212,7 @@ X86_FEATURE_RETPOLINE_AMD) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#endif #else /* No retpoline for C / inline asm */ # define CALL_NOSPEC "call *%[thunk_target]\n" # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 700d1149601e..a0146ee52124 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -251,7 +251,7 @@ static void __init spec2_print_if_secure(const char *reason) static inline bool retp_compiler(void) { - return __is_defined(RETPOLINE); + return __is_defined(CONFIG_RETPOLINE); } static inline bool match_option(const char *arg, int arglen, const char *opt) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 7143da06d702..be9e5deb58ba 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -272,10 +272,8 @@ else objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable) endif ifdef CONFIG_RETPOLINE -ifneq ($(RETPOLINE_CFLAGS),) objtool_args += --retpoline endif -endif ifdef CONFIG_MODVERSIONS From 8bbb50b6de99c178acd648a73098009073139f46 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Fri, 2 Nov 2018 01:45:41 -0700 Subject: [PATCH 0779/2608] x86/retpoline: Remove minimal retpoline support commit ef014aae8f1cd2793e4e014bbb102bed53f852b7 upstream Now that CONFIG_RETPOLINE hard depends on compiler support, there is no reason to keep the minimal retpoline support around which only provided basic protection in the assembly files. Suggested-by: Peter Zijlstra Signed-off-by: Zhenzhong Duan Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/f06f0a89-5587-45db-8ed2-0a9d6638d5c0@default Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 3 --- arch/x86/kernel/cpu/bugs.c | 13 ++----------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 690c838e3a7e..20d016b4e986 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -221,11 +221,8 @@ /* The Spectre V2 mitigation variants */ enum spectre_v2_mitigation { SPECTRE_V2_NONE, - SPECTRE_V2_RETPOLINE_MINIMAL, - SPECTRE_V2_RETPOLINE_MINIMAL_AMD, SPECTRE_V2_RETPOLINE_GENERIC, SPECTRE_V2_RETPOLINE_AMD, - SPECTRE_V2_IBRS, SPECTRE_V2_IBRS_ENHANCED, }; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a0146ee52124..dd87fe5397c3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -134,8 +134,6 @@ enum spectre_v2_mitigation_cmd { static const char *spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", - [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", - [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", @@ -249,11 +247,6 @@ static void __init spec2_print_if_secure(const char *reason) pr_info("%s selected on command line.\n", reason); } -static inline bool retp_compiler(void) -{ - return __is_defined(CONFIG_RETPOLINE); -} - static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt); @@ -414,14 +407,12 @@ retpoline_auto: pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); goto retpoline_generic; } - mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : - SPECTRE_V2_RETPOLINE_MINIMAL_AMD; + mode = SPECTRE_V2_RETPOLINE_AMD; setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); setup_force_cpu_cap(X86_FEATURE_RETPOLINE); } else { retpoline_generic: - mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC : - SPECTRE_V2_RETPOLINE_MINIMAL; + mode = SPECTRE_V2_RETPOLINE_GENERIC; setup_force_cpu_cap(X86_FEATURE_RETPOLINE); } From 7d422ca1950a1f04bc467e706ae6f89840b0bd16 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:29 +0100 Subject: [PATCH 0780/2608] x86/speculation: Update the TIF_SSBD comment commit 8eb729b77faf83ac4c1f363a9ad68d042415f24c upstream "Reduced Data Speculation" is an obsolete term. The correct new name is "Speculative store bypass disable" - which is abbreviated into SSBD. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.593893901@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 95ff2d7f553f..2976312b826a 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -81,7 +81,7 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_SSBD 5 /* Reduced data speculation */ +#define TIF_SSBD 5 /* Speculative store bypass disable */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ From 4398714cb7d595725a4840da137faeac47c10669 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:30 +0100 Subject: [PATCH 0781/2608] x86/speculation: Clean up spectre_v2_parse_cmdline() commit 24848509aa55eac39d524b587b051f4e86df3c12 upstream Remove the unnecessary 'else' statement in spectre_v2_parse_cmdline() to save an indentation level. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.688010903@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index dd87fe5397c3..cb087dee5117 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -275,22 +275,21 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; - else { - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); - if (ret < 0) - return SPECTRE_V2_CMD_AUTO; - for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { - if (!match_option(arg, ret, mitigation_options[i].option)) - continue; - cmd = mitigation_options[i].cmd; - break; - } + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_CMD_AUTO; - if (i >= ARRAY_SIZE(mitigation_options)) { - pr_err("unknown option (%s). Switching to AUTO select\n", arg); - return SPECTRE_V2_CMD_AUTO; - } + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { + if (!match_option(arg, ret, mitigation_options[i].option)) + continue; + cmd = mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", arg); + return SPECTRE_V2_CMD_AUTO; } if ((cmd == SPECTRE_V2_CMD_RETPOLINE || From 05dd5dc4c4fba8a8457b0bcb5336d4fe22d5c013 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:31 +0100 Subject: [PATCH 0782/2608] x86/speculation: Remove unnecessary ret variable in cpu_show_common() commit b86bda0426853bfe8a3506c7d2a5b332760ae46b upstream Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.783903657@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index cb087dee5117..fcd710ab26b5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -844,8 +844,6 @@ static ssize_t l1tf_show_state(char *buf) static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { - int ret; - if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); @@ -860,13 +858,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - ret = sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", spectre_v2_module_string()); - return ret; case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); From 8588c7d4a487927356a5026982bd89b45a2092fe Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:32 +0100 Subject: [PATCH 0783/2608] x86/speculation: Move STIPB/IBPB string conditionals out of cpu_show_common() commit a8f76ae41cd633ac00be1b3019b1eb4741be3828 upstream The Spectre V2 printout in cpu_show_common() handles conditionals for the various mitigation methods directly in the sprintf() argument list. That's hard to read and will become unreadable if more complex decisions need to be made for a particular method. Move the conditionals for STIBP and IBPB string selection into helper functions, so they can be extended later on. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.874479208@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fcd710ab26b5..06494a0cb107 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -841,6 +841,22 @@ static ssize_t l1tf_show_state(char *buf) } #endif +static char *stibp_state(void) +{ + if (x86_spec_ctrl_base & SPEC_CTRL_STIBP) + return ", STIBP"; + else + return ""; +} + +static char *ibpb_state(void) +{ + if (boot_cpu_has(X86_FEATURE_USE_IBPB)) + return ", IBPB"; + else + return ""; +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -859,9 +875,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SPECTRE_V2: return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + ibpb_state(), boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", + stibp_state(), boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", spectre_v2_module_string()); From aca2ddbc23800652ec2f09c2df76f417823cda77 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:33 +0100 Subject: [PATCH 0784/2608] x86/speculation: Disable STIBP when enhanced IBRS is in use commit 34bce7c9690b1d897686aac89604ba7adc365556 upstream If enhanced IBRS is active, STIBP is redundant for mitigating Spectre v2 user space exploits from hyperthread sibling. Disable STIBP when enhanced IBRS is used. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.966801480@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 06494a0cb107..3b2c5f26bde2 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -319,6 +319,10 @@ static bool stibp_needed(void) if (spectre_v2_enabled == SPECTRE_V2_NONE) return false; + /* Enhanced IBRS makes using STIBP unnecessary. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return false; + if (!boot_cpu_has(X86_FEATURE_STIBP)) return false; @@ -843,6 +847,9 @@ static ssize_t l1tf_show_state(char *buf) static char *stibp_state(void) { + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return ""; + if (x86_spec_ctrl_base & SPEC_CTRL_STIBP) return ", STIBP"; else From 999b295ad1a9980754c6d63e9f8be109a3574118 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:34 +0100 Subject: [PATCH 0785/2608] x86/speculation: Rename SSBD update functions commit 26c4d75b234040c11728a8acb796b3a85ba7507c upstream During context switch, the SSBD bit in SPEC_CTRL MSR is updated according to changes of the TIF_SSBD flag in the current and next running task. Currently, only the bit controlling speculative store bypass disable in SPEC_CTRL MSR is updated and the related update functions all have "speculative_store" or "ssb" in their names. For enhanced mitigation control other bits in SPEC_CTRL MSR need to be updated as well, which makes the SSB names inadequate. Rename the "speculative_store*" functions to a more generic name. No functional change. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.058866968@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/spec-ctrl.h | 6 +++--- arch/x86/kernel/cpu/bugs.c | 4 ++-- arch/x86/kernel/process.c | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index ae7c2c5cd7f0..8e2f8411c7a7 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -70,11 +70,11 @@ extern void speculative_store_bypass_ht_init(void); static inline void speculative_store_bypass_ht_init(void) { } #endif -extern void speculative_store_bypass_update(unsigned long tif); +extern void speculation_ctrl_update(unsigned long tif); -static inline void speculative_store_bypass_update_current(void) +static inline void speculation_ctrl_update_current(void) { - speculative_store_bypass_update(current_thread_info()->flags); + speculation_ctrl_update(current_thread_info()->flags); } #endif diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3b2c5f26bde2..a06ddbd25219 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -199,7 +199,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : ssbd_spec_ctrl_to_tif(hostval); - speculative_store_bypass_update(tif); + speculation_ctrl_update(tif); } } EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); @@ -629,7 +629,7 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) * mitigation until it is next scheduled. */ if (task == current && update) - speculative_store_bypass_update_current(); + speculation_ctrl_update_current(); return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 988a98f34c66..48c4a0fbd764 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -398,27 +398,27 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); } -static __always_inline void intel_set_ssb_state(unsigned long tifn) +static __always_inline void spec_ctrl_update_msr(unsigned long tifn) { u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); wrmsrl(MSR_IA32_SPEC_CTRL, msr); } -static __always_inline void __speculative_store_bypass_update(unsigned long tifn) +static __always_inline void __speculation_ctrl_update(unsigned long tifn) { if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) amd_set_ssb_virt_state(tifn); else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) amd_set_core_ssb_state(tifn); else - intel_set_ssb_state(tifn); + spec_ctrl_update_msr(tifn); } -void speculative_store_bypass_update(unsigned long tif) +void speculation_ctrl_update(unsigned long tif) { preempt_disable(); - __speculative_store_bypass_update(tif); + __speculation_ctrl_update(tif); preempt_enable(); } @@ -455,7 +455,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); if ((tifp ^ tifn) & _TIF_SSBD) - __speculative_store_bypass_update(tifn); + __speculation_ctrl_update(tifn); } /* From bc4aa78e6954bd71295543163271262fede5d46f Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:35 +0100 Subject: [PATCH 0786/2608] x86/speculation: Reorganize speculation control MSRs update commit 01daf56875ee0cd50ed496a09b20eb369b45dfa5 upstream The logic to detect whether there's a change in the previous and next task's flag relevant to update speculation control MSRs is spread out across multiple functions. Consolidate all checks needed for updating speculation control MSRs into the new __speculation_ctrl_update() helper function. This makes it easy to pick the right speculation control MSR and the bits in MSR_IA32_SPEC_CTRL that need updating based on TIF flags changes. Originally-by: Thomas Lendacky Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.151077005@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/process.c | 42 +++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 48c4a0fbd764..65c9edd08c27 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -398,27 +398,40 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); } -static __always_inline void spec_ctrl_update_msr(unsigned long tifn) +/* + * Update the MSRs managing speculation control, during context switch. + * + * tifp: Previous task's thread flags + * tifn: Next task's thread flags + */ +static __always_inline void __speculation_ctrl_update(unsigned long tifp, + unsigned long tifn) { - u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); + u64 msr = x86_spec_ctrl_base; + bool updmsr = false; - wrmsrl(MSR_IA32_SPEC_CTRL, msr); -} + /* If TIF_SSBD is different, select the proper mitigation method */ + if ((tifp ^ tifn) & _TIF_SSBD) { + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + amd_set_ssb_virt_state(tifn); + } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + amd_set_core_ssb_state(tifn); + } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + msr |= ssbd_tif_to_spec_ctrl(tifn); + updmsr = true; + } + } -static __always_inline void __speculation_ctrl_update(unsigned long tifn) -{ - if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) - amd_set_ssb_virt_state(tifn); - else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) - amd_set_core_ssb_state(tifn); - else - spec_ctrl_update_msr(tifn); + if (updmsr) + wrmsrl(MSR_IA32_SPEC_CTRL, msr); } void speculation_ctrl_update(unsigned long tif) { + /* Forced update. Make sure all relevant TIF flags are different */ preempt_disable(); - __speculation_ctrl_update(tif); + __speculation_ctrl_update(~tif, tif); preempt_enable(); } @@ -454,8 +467,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); - if ((tifp ^ tifn) & _TIF_SSBD) - __speculation_ctrl_update(tifn); + __speculation_ctrl_update(tifp, tifn); } /* From 01659361c63fdc91c0af239d08cdd211d590a656 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Sun, 25 Nov 2018 19:33:36 +0100 Subject: [PATCH 0787/2608] sched/smt: Make sched_smt_present track topology commit c5511d03ec090980732e929c318a7a6374b5550e upstream Currently the 'sched_smt_present' static key is enabled when at CPU bringup SMT topology is observed, but it is never disabled. However there is demand to also disable the key when the topology changes such that there is no SMT present anymore. Implement this by making the key count the number of cores that have SMT enabled. In particular, the SMT topology bits are set before interrrupts are enabled and similarly, are cleared after interrupts are disabled for the last time and the CPU dies. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.246110444@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3bc664662081..0552ddbb25e2 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5617,15 +5617,10 @@ int sched_cpu_activate(unsigned int cpu) #ifdef CONFIG_SCHED_SMT /* - * The sched_smt_present static key needs to be evaluated on every - * hotplug event because at boot time SMT might be disabled when - * the number of booted CPUs is limited. - * - * If then later a sibling gets hotplugged, then the key would stay - * off and SMT scheduling would never be functional. + * When going up, increment the number of cores with SMT present. */ - if (cpumask_weight(cpu_smt_mask(cpu)) > 1) - static_branch_enable_cpuslocked(&sched_smt_present); + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + static_branch_inc_cpuslocked(&sched_smt_present); #endif set_cpu_active(cpu, true); @@ -5669,6 +5664,14 @@ int sched_cpu_deactivate(unsigned int cpu) */ synchronize_rcu_mult(call_rcu, call_rcu_sched); +#ifdef CONFIG_SCHED_SMT + /* + * When going down, decrement the number of cores with SMT present. + */ + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + static_branch_dec_cpuslocked(&sched_smt_present); +#endif + if (!sched_smp_initialized) return 0; From 44ac7cd01544dd12241bc0c0e0eb5604ad77379a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:37 +0100 Subject: [PATCH 0788/2608] x86/Kconfig: Select SCHED_SMT if SMP enabled commit dbe733642e01dd108f71436aaea7b328cb28fd87 upstream CONFIG_SCHED_SMT is enabled by all distros, so there is not a real point to have it configurable. The runtime overhead in the core scheduler code is minimal because the actual SMT scheduling parts are conditional on a static key. This allows to expose the scheduler's SMT state static key to the speculation control code. Alternatively the scheduler's static key could be made always available when CONFIG_SMP is enabled, but that's just adding an unused static key to every other architecture for nothing. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.337452245@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 026c2f2ac702..4f393eb9745f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -955,13 +955,7 @@ config NR_CPUS approximately eight kilobytes to the kernel image. config SCHED_SMT - bool "SMT (Hyperthreading) scheduler support" - depends on SMP - ---help--- - SMT scheduler support improves the CPU scheduler's decision making - when dealing with Intel Pentium 4 chips with HyperThreading at a - cost of slightly increased overhead in some places. If unsure say - N here. + def_bool y if SMP config SCHED_MC def_bool y From 0e797117f18573ab31f72aed0924ff7999d860ea Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:38 +0100 Subject: [PATCH 0789/2608] sched/smt: Expose sched_smt_present static key commit 321a874a7ef85655e93b3206d0f36b4a6097f948 upstream Make the scheduler's 'sched_smt_present' static key globaly available, so it can be used in the x86 speculation control code. Provide a query function and a stub for the CONFIG_SMP=n case. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.430168326@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/sched/smt.h | 18 ++++++++++++++++++ kernel/sched/sched.h | 4 +--- 2 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 include/linux/sched/smt.h diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h new file mode 100644 index 000000000000..c9e0be514110 --- /dev/null +++ b/include/linux/sched/smt.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SCHED_SMT_H +#define _LINUX_SCHED_SMT_H + +#include + +#ifdef CONFIG_SCHED_SMT +extern struct static_key_false sched_smt_present; + +static __always_inline bool sched_smt_active(void) +{ + return static_branch_likely(&sched_smt_present); +} +#else +static inline bool sched_smt_active(void) { return false; } +#endif + +#endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 63d999dfec80..b3ba6e5e99f2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -825,9 +826,6 @@ static inline int cpu_of(struct rq *rq) #ifdef CONFIG_SCHED_SMT - -extern struct static_key_false sched_smt_present; - extern void __update_idle_core(struct rq *rq); static inline void update_idle_core(struct rq *rq) From 36a4c5fc92857711019dc56be2669e19f8c3c86e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:39 +0100 Subject: [PATCH 0790/2608] x86/speculation: Rework SMT state change commit a74cfffb03b73d41e08f84c2e5c87dec0ce3db9f upstream arch_smt_update() is only called when the sysfs SMT control knob is changed. This means that when SMT is enabled in the sysfs control knob the system is considered to have SMT active even if all siblings are offline. To allow finegrained control of the speculation mitigations, the actual SMT state is more interesting than the fact that siblings could be enabled. Rework the code, so arch_smt_update() is invoked from each individual CPU hotplug function, and simplify the update function while at it. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.521974984@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 11 +++++------ include/linux/sched/smt.h | 2 ++ kernel/cpu.c | 15 +++++++++------ 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a06ddbd25219..fa7a35ab831f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -342,16 +343,14 @@ void arch_smt_update(void) return; mutex_lock(&spec_ctrl_mutex); - mask = x86_spec_ctrl_base; - if (cpu_smt_control == CPU_SMT_ENABLED) + + mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + if (sched_smt_active()) mask |= SPEC_CTRL_STIBP; - else - mask &= ~SPEC_CTRL_STIBP; if (mask != x86_spec_ctrl_base) { pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - cpu_smt_control == CPU_SMT_ENABLED ? - "Enabling" : "Disabling"); + mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); x86_spec_ctrl_base = mask; on_each_cpu(update_stibp_msr, NULL, 1); } diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h index c9e0be514110..59d3736c454c 100644 --- a/include/linux/sched/smt.h +++ b/include/linux/sched/smt.h @@ -15,4 +15,6 @@ static __always_inline bool sched_smt_active(void) static inline bool sched_smt_active(void) { return false; } #endif +void arch_smt_update(void); + #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 90cf6a04e08a..5c907d96e3dd 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -347,6 +348,12 @@ void cpu_hotplug_enable(void) EXPORT_SYMBOL_GPL(cpu_hotplug_enable); #endif /* CONFIG_HOTPLUG_CPU */ +/* + * Architectures that need SMT-specific errata handling during SMT hotplug + * should override this. + */ +void __weak arch_smt_update(void) { } + #ifdef CONFIG_HOTPLUG_SMT enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; EXPORT_SYMBOL_GPL(cpu_smt_control); @@ -998,6 +1005,7 @@ out: * concurrent CPU hotplug via cpu_add_remove_lock. */ lockup_detector_cleanup(); + arch_smt_update(); return ret; } @@ -1126,6 +1134,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) ret = cpuhp_up_callbacks(cpu, st, target); out: cpus_write_unlock(); + arch_smt_update(); return ret; } @@ -2045,12 +2054,6 @@ static void cpuhp_online_cpu_device(unsigned int cpu) kobject_uevent(&dev->kobj, KOBJ_ONLINE); } -/* - * Architectures that need SMT-specific errata handling during SMT hotplug - * should override this. - */ -void __weak arch_smt_update(void) { }; - static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { int cpu, ret = 0; From 8345d546238f68e7fe219df25eeecb200f34334b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:40 +0100 Subject: [PATCH 0791/2608] x86/l1tf: Show actual SMT state commit 130d6f946f6f2a972ee3ec8540b7243ab99abe97 upstream Use the now exposed real SMT state, not the SMT sysfs control knob state. This reflects the state of the system when the mitigation status is queried. This does not change the warning in the VMX launch code. There the dependency on the control knob makes sense because siblings could be brought online anytime after launching the VM. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.613357354@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fa7a35ab831f..ccd08d127624 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -829,13 +829,14 @@ static ssize_t l1tf_show_state(char *buf) if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED || (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER && - cpu_smt_control == CPU_SMT_ENABLED)) + sched_smt_active())) { return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, l1tf_vmx_states[l1tf_vmx_mitigation]); + } return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, l1tf_vmx_states[l1tf_vmx_mitigation], - cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled"); + sched_smt_active() ? "vulnerable" : "disabled"); } #else static ssize_t l1tf_show_state(char *buf) From 72f90a89be9e125c1dd44a4d70b83055d1c3753e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:41 +0100 Subject: [PATCH 0792/2608] x86/speculation: Reorder the spec_v2 code commit 15d6b7aab0793b2de8a05d8a828777dd24db424e upstream Reorder the code so it is better grouped. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.707122879@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 172 ++++++++++++++++++------------------- 1 file changed, 86 insertions(+), 86 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ccd08d127624..9fea8ff20c1f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -123,29 +123,6 @@ void __init check_bugs(void) #endif } -/* The kernel command line selection */ -enum spectre_v2_mitigation_cmd { - SPECTRE_V2_CMD_NONE, - SPECTRE_V2_CMD_AUTO, - SPECTRE_V2_CMD_FORCE, - SPECTRE_V2_CMD_RETPOLINE, - SPECTRE_V2_CMD_RETPOLINE_GENERIC, - SPECTRE_V2_CMD_RETPOLINE_AMD, -}; - -static const char *spectre_v2_strings[] = { - [SPECTRE_V2_NONE] = "Vulnerable", - [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", - [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", - [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", -}; - -#undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 : " fmt - -static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = - SPECTRE_V2_NONE; - void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { @@ -215,6 +192,12 @@ static void x86_amd_ssb_disable(void) wrmsrl(MSR_AMD64_LS_CFG, msrval); } +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt + +static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + SPECTRE_V2_NONE; + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -236,6 +219,43 @@ static inline const char *spectre_v2_module_string(void) static inline const char *spectre_v2_module_string(void) { return ""; } #endif +static inline bool match_option(const char *arg, int arglen, const char *opt) +{ + int len = strlen(opt); + + return len == arglen && !strncmp(arg, opt, len); +} + +/* The kernel command line selection for spectre v2 */ +enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_NONE, + SPECTRE_V2_CMD_AUTO, + SPECTRE_V2_CMD_FORCE, + SPECTRE_V2_CMD_RETPOLINE, + SPECTRE_V2_CMD_RETPOLINE_GENERIC, + SPECTRE_V2_CMD_RETPOLINE_AMD, +}; + +static const char *spectre_v2_strings[] = { + [SPECTRE_V2_NONE] = "Vulnerable", + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", + [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", +}; + +static const struct { + const char *option; + enum spectre_v2_mitigation_cmd cmd; + bool secure; +} mitigation_options[] = { + { "off", SPECTRE_V2_CMD_NONE, false }, + { "on", SPECTRE_V2_CMD_FORCE, true }, + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, +}; + static void __init spec2_print_if_insecure(const char *reason) { if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) @@ -248,31 +268,11 @@ static void __init spec2_print_if_secure(const char *reason) pr_info("%s selected on command line.\n", reason); } -static inline bool match_option(const char *arg, int arglen, const char *opt) -{ - int len = strlen(opt); - - return len == arglen && !strncmp(arg, opt, len); -} - -static const struct { - const char *option; - enum spectre_v2_mitigation_cmd cmd; - bool secure; -} mitigation_options[] = { - { "off", SPECTRE_V2_CMD_NONE, false }, - { "on", SPECTRE_V2_CMD_FORCE, true }, - { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, - { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, - { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, - { "auto", SPECTRE_V2_CMD_AUTO, false }, -}; - static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) { + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; char arg[20]; int ret, i; - enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; @@ -315,48 +315,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } -static bool stibp_needed(void) -{ - if (spectre_v2_enabled == SPECTRE_V2_NONE) - return false; - - /* Enhanced IBRS makes using STIBP unnecessary. */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) - return false; - - if (!boot_cpu_has(X86_FEATURE_STIBP)) - return false; - - return true; -} - -static void update_stibp_msr(void *info) -{ - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -} - -void arch_smt_update(void) -{ - u64 mask; - - if (!stibp_needed()) - return; - - mutex_lock(&spec_ctrl_mutex); - - mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; - if (sched_smt_active()) - mask |= SPEC_CTRL_STIBP; - - if (mask != x86_spec_ctrl_base) { - pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); - x86_spec_ctrl_base = mask; - on_each_cpu(update_stibp_msr, NULL, 1); - } - mutex_unlock(&spec_ctrl_mutex); -} - static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -459,6 +417,48 @@ specv2_set_mode: arch_smt_update(); } +static bool stibp_needed(void) +{ + if (spectre_v2_enabled == SPECTRE_V2_NONE) + return false; + + /* Enhanced IBRS makes using STIBP unnecessary. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return false; + + if (!boot_cpu_has(X86_FEATURE_STIBP)) + return false; + + return true; +} + +static void update_stibp_msr(void *info) +{ + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); +} + +void arch_smt_update(void) +{ + u64 mask; + + if (!stibp_needed()) + return; + + mutex_lock(&spec_ctrl_mutex); + + mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + if (sched_smt_active()) + mask |= SPEC_CTRL_STIBP; + + if (mask != x86_spec_ctrl_base) { + pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", + mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); + } + mutex_unlock(&spec_ctrl_mutex); +} + #undef pr_fmt #define pr_fmt(fmt) "Speculative Store Bypass: " fmt From caa118cf4c7997a35f6a18f5a16c38c7a9c71e3f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:42 +0100 Subject: [PATCH 0793/2608] x86/speculation: Mark string arrays const correctly commit 8770709f411763884535662744a3786a1806afd3 upstream checkpatch.pl muttered when reshuffling the code: WARNING: static const char * array should probably be static const char * const Fix up all the string arrays. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.800018931@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9fea8ff20c1f..45e08aacefe3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -236,7 +236,7 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_RETPOLINE_AMD, }; -static const char *spectre_v2_strings[] = { +static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", @@ -473,7 +473,7 @@ enum ssb_mitigation_cmd { SPEC_STORE_BYPASS_CMD_SECCOMP, }; -static const char *ssb_strings[] = { +static const char * const ssb_strings[] = { [SPEC_STORE_BYPASS_NONE] = "Vulnerable", [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", @@ -813,7 +813,7 @@ early_param("l1tf", l1tf_cmdline); #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion" #if IS_ENABLED(CONFIG_KVM_INTEL) -static const char *l1tf_vmx_states[] = { +static const char * const l1tf_vmx_states[] = { [VMENTER_L1D_FLUSH_AUTO] = "auto", [VMENTER_L1D_FLUSH_NEVER] = "vulnerable", [VMENTER_L1D_FLUSH_COND] = "conditional cache flushes", From 95d41f1332f78fa6ad19459d1b98ad70407dd88b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:43 +0100 Subject: [PATCH 0794/2608] x86/speculataion: Mark command line parser data __initdata commit 30ba72a990f5096ae08f284de17986461efcc408 upstream No point to keep that around. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.893886356@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 45e08aacefe3..b11c7dfcae0e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -247,7 +247,7 @@ static const struct { const char *option; enum spectre_v2_mitigation_cmd cmd; bool secure; -} mitigation_options[] = { +} mitigation_options[] __initdata = { { "off", SPECTRE_V2_CMD_NONE, false }, { "on", SPECTRE_V2_CMD_FORCE, true }, { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, @@ -483,7 +483,7 @@ static const char * const ssb_strings[] = { static const struct { const char *option; enum ssb_mitigation_cmd cmd; -} ssb_mitigation_options[] = { +} ssb_mitigation_options[] __initdata = { { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ From ebd473909994d4ecce8e973f048f06d999f84845 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:44 +0100 Subject: [PATCH 0795/2608] x86/speculation: Unify conditional spectre v2 print functions commit 495d470e9828500e0155027f230449ac5e29c025 upstream There is no point in having two functions and a conditional at the call site. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.986890749@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b11c7dfcae0e..e3b444dd0039 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -256,15 +256,9 @@ static const struct { { "auto", SPECTRE_V2_CMD_AUTO, false }, }; -static void __init spec2_print_if_insecure(const char *reason) +static void __init spec_v2_print_cond(const char *reason, bool secure) { - if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s selected on command line.\n", reason); -} - -static void __init spec2_print_if_secure(const char *reason) -{ - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure) pr_info("%s selected on command line.\n", reason); } @@ -307,11 +301,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } - if (mitigation_options[i].secure) - spec2_print_if_secure(mitigation_options[i].option); - else - spec2_print_if_insecure(mitigation_options[i].option); - + spec_v2_print_cond(mitigation_options[i].option, + mitigation_options[i].secure); return cmd; } From 90f293cc49fa44e24c07e74c89f3d87bc97cf41e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:45 +0100 Subject: [PATCH 0796/2608] x86/speculation: Add command line control for indirect branch speculation commit fa1202ef224391b6f5b26cdd44cc50495e8fab54 upstream Add command line control for user space indirect branch speculation mitigations. The new option is: spectre_v2_user= The initial options are: - on: Unconditionally enabled - off: Unconditionally disabled -auto: Kernel selects mitigation (default off for now) When the spectre_v2= command line argument is either 'on' or 'off' this implies that the application to application control follows that state even if a contradicting spectre_v2_user= argument is supplied. Originally-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.082720373@linutronix.de Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 32 ++++- arch/x86/include/asm/nospec-branch.h | 10 ++ arch/x86/kernel/cpu/bugs.c | 133 +++++++++++++++--- 3 files changed, 156 insertions(+), 19 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 99a08722124d..f0c19f61db80 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3994,9 +3994,13 @@ spectre_v2= [X86] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. + The default operation protects the kernel from + user space attacks. - on - unconditionally enable - off - unconditionally disable + on - unconditionally enable, implies + spectre_v2_user=on + off - unconditionally disable, implies + spectre_v2_user=off auto - kernel detects whether your CPU model is vulnerable @@ -4006,6 +4010,12 @@ CONFIG_RETPOLINE configuration option, and the compiler with which the kernel was built. + Selecting 'on' will also enable the mitigation + against user space to user space task attacks. + + Selecting 'off' will disable both the kernel and + the user space protections. + Specific mitigations can also be selected manually: retpoline - replace indirect branches @@ -4015,6 +4025,24 @@ Not specifying this option is equivalent to spectre_v2=auto. + spectre_v2_user= + [X86] Control mitigation of Spectre variant 2 + (indirect branch speculation) vulnerability between + user space tasks + + on - Unconditionally enable mitigations. Is + enforced by spectre_v2=on + + off - Unconditionally disable mitigations. Is + enforced by spectre_v2=off + + auto - Kernel selects the mitigation depending on + the available CPU features and vulnerability. + Default is off. + + Not specifying this option is equivalent to + spectre_v2_user=auto. + spec_store_bypass_disable= [HW] Control Speculative Store Bypass (SSB) Disable mitigation (Speculative Store Bypass vulnerability) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 20d016b4e986..6057086fde1c 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -3,6 +3,8 @@ #ifndef _ASM_X86_NOSPEC_BRANCH_H_ #define _ASM_X86_NOSPEC_BRANCH_H_ +#include + #include #include #include @@ -226,6 +228,12 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS_ENHANCED, }; +/* The indirect branch speculation control variants */ +enum spectre_v2_user_mitigation { + SPECTRE_V2_USER_NONE, + SPECTRE_V2_USER_STRICT, +}; + /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, @@ -303,6 +311,8 @@ do { \ preempt_enable(); \ } while (0) +DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e3b444dd0039..bf5c3c4b9c19 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -53,6 +53,9 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; u64 __ro_after_init x86_amd_ls_cfg_base; u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; +/* Control conditional STIPB in switch_to() */ +DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp); + void __init check_bugs(void) { identify_boot_cpu(); @@ -198,6 +201,9 @@ static void x86_amd_ssb_disable(void) static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; +static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init = + SPECTRE_V2_USER_NONE; + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -236,6 +242,104 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_RETPOLINE_AMD, }; +enum spectre_v2_user_cmd { + SPECTRE_V2_USER_CMD_NONE, + SPECTRE_V2_USER_CMD_AUTO, + SPECTRE_V2_USER_CMD_FORCE, +}; + +static const char * const spectre_v2_user_strings[] = { + [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", + [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", +}; + +static const struct { + const char *option; + enum spectre_v2_user_cmd cmd; + bool secure; +} v2_user_options[] __initdata = { + { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, + { "off", SPECTRE_V2_USER_CMD_NONE, false }, + { "on", SPECTRE_V2_USER_CMD_FORCE, true }, +}; + +static void __init spec_v2_user_print_cond(const char *reason, bool secure) +{ + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure) + pr_info("spectre_v2_user=%s forced on command line.\n", reason); +} + +static enum spectre_v2_user_cmd __init +spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) +{ + char arg[20]; + int ret, i; + + switch (v2_cmd) { + case SPECTRE_V2_CMD_NONE: + return SPECTRE_V2_USER_CMD_NONE; + case SPECTRE_V2_CMD_FORCE: + return SPECTRE_V2_USER_CMD_FORCE; + default: + break; + } + + ret = cmdline_find_option(boot_command_line, "spectre_v2_user", + arg, sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_USER_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) { + if (match_option(arg, ret, v2_user_options[i].option)) { + spec_v2_user_print_cond(v2_user_options[i].option, + v2_user_options[i].secure); + return v2_user_options[i].cmd; + } + } + + pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg); + return SPECTRE_V2_USER_CMD_AUTO; +} + +static void __init +spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) +{ + enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; + bool smt_possible = IS_ENABLED(CONFIG_SMP); + + if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP)) + return; + + if (cpu_smt_control == CPU_SMT_FORCE_DISABLED || + cpu_smt_control == CPU_SMT_NOT_SUPPORTED) + smt_possible = false; + + switch (spectre_v2_parse_user_cmdline(v2_cmd)) { + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_NONE: + goto set_mode; + case SPECTRE_V2_USER_CMD_FORCE: + mode = SPECTRE_V2_USER_STRICT; + break; + } + + /* Initialize Indirect Branch Prediction Barrier */ + if (boot_cpu_has(X86_FEATURE_IBPB)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); + } + + /* If enhanced IBRS is enabled no STIPB required */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return; + +set_mode: + spectre_v2_user = mode; + /* Only print the STIBP mode when SMT possible */ + if (smt_possible) + pr_info("%s\n", spectre_v2_user_strings[mode]); +} + static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", @@ -382,12 +486,6 @@ specv2_set_mode: setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); - /* Initialize Indirect Branch Prediction Barrier if supported */ - if (boot_cpu_has(X86_FEATURE_IBPB)) { - setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); - } - /* * Retpoline means the kernel is safe because it has no indirect * branches. Enhanced IBRS protects firmware too, so, enable restricted @@ -404,23 +502,21 @@ specv2_set_mode: pr_info("Enabling Restricted Speculation for firmware calls\n"); } + /* Set up IBPB and STIBP depending on the general spectre V2 command */ + spectre_v2_user_select_mitigation(cmd); + /* Enable STIBP if appropriate */ arch_smt_update(); } static bool stibp_needed(void) { - if (spectre_v2_enabled == SPECTRE_V2_NONE) - return false; - /* Enhanced IBRS makes using STIBP unnecessary. */ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return false; - if (!boot_cpu_has(X86_FEATURE_STIBP)) - return false; - - return true; + /* Check for strict user mitigation mode */ + return spectre_v2_user == SPECTRE_V2_USER_STRICT; } static void update_stibp_msr(void *info) @@ -841,10 +937,13 @@ static char *stibp_state(void) if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return ""; - if (x86_spec_ctrl_base & SPEC_CTRL_STIBP) - return ", STIBP"; - else - return ""; + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return ", STIBP: disabled"; + case SPECTRE_V2_USER_STRICT: + return ", STIBP: forced"; + } + return ""; } static char *ibpb_state(void) From 1fe4e69a5ce7bf36fbd303e64cb63adb14708c0b Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:46 +0100 Subject: [PATCH 0797/2608] x86/speculation: Prepare for per task indirect branch speculation control commit 5bfbe3ad5840d941b89bcac54b821ba14f50a0ba upstream To avoid the overhead of STIBP always on, it's necessary to allow per task control of STIBP. Add a new task flag TIF_SPEC_IB and evaluate it during context switch if SMT is active and flag evaluation is enabled by the speculation control code. Add the conditional evaluation to x86_virt_spec_ctrl() as well so the guest/host switch works properly. This has no effect because TIF_SPEC_IB cannot be set yet and the static key which controls evaluation is off. Preparatory patch for adding the control code. [ tglx: Simplify the context switch logic and make the TIF evaluation depend on SMP=y and on the static key controlling the conditional update. Rename it to TIF_SPEC_IB because it controls both STIBP and IBPB ] Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.176917199@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 5 +++-- arch/x86/include/asm/spec-ctrl.h | 12 ++++++++++++ arch/x86/include/asm/thread_info.h | 5 ++++- arch/x86/kernel/cpu/bugs.c | 4 ++++ arch/x86/kernel/process.c | 20 ++++++++++++++++++-- 5 files changed, 41 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ef7eec669a1b..62c62d3eb0ff 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -41,9 +41,10 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ -#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ +#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ +#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 8e2f8411c7a7..27b0bce3933b 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline u64 stibp_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); + return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); +} + static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) { BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl) +{ + BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); + return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); +} + static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) { return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 2976312b826a..78ebda74b225 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -85,6 +85,7 @@ struct thread_info { #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_PATCH_PENDING 13 /* pending live patching update */ @@ -112,6 +113,7 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SPEC_IB (1 << TIF_SPEC_IB) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) @@ -148,7 +150,8 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ + _TIF_SSBD|_TIF_SPEC_IB) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bf5c3c4b9c19..5bba757fead1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -147,6 +147,10 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) static_cpu_has(X86_FEATURE_AMD_SSBD)) hostval |= ssbd_tif_to_spec_ctrl(ti->flags); + /* Conditional STIBP enabled? */ + if (static_branch_unlikely(&switch_to_cond_stibp)) + hostval |= stibp_tif_to_spec_ctrl(ti->flags); + if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 65c9edd08c27..2baa10f05052 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -407,11 +407,17 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) static __always_inline void __speculation_ctrl_update(unsigned long tifp, unsigned long tifn) { + unsigned long tif_diff = tifp ^ tifn; u64 msr = x86_spec_ctrl_base; bool updmsr = false; - /* If TIF_SSBD is different, select the proper mitigation method */ - if ((tifp ^ tifn) & _TIF_SSBD) { + /* + * If TIF_SSBD is different, select the proper mitigation + * method. Note that if SSBD mitigation is disabled or permanentely + * enabled this branch can't be taken because nothing can set + * TIF_SSBD. + */ + if (tif_diff & _TIF_SSBD) { if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { amd_set_ssb_virt_state(tifn); } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { @@ -423,6 +429,16 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } } + /* + * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled, + * otherwise avoid the MSR write. + */ + if (IS_ENABLED(CONFIG_SMP) && + static_branch_unlikely(&switch_to_cond_stibp)) { + updmsr |= !!(tif_diff & _TIF_SPEC_IB); + msr |= stibp_tif_to_spec_ctrl(tifn); + } + if (updmsr) wrmsrl(MSR_IA32_SPEC_CTRL, msr); } From 7fe6a4baff26dc90fa8ebcbb8f844cc8d3fb256f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:47 +0100 Subject: [PATCH 0798/2608] x86/process: Consolidate and simplify switch_to_xtra() code commit ff16701a29cba3aafa0bd1656d766813b2d0a811 upstream Move the conditional invocation of __switch_to_xtra() into an inline function so the logic can be shared between 32 and 64 bit. Remove the handthrough of the TSS pointer and retrieve the pointer directly in the bitmap handling function. Use this_cpu_ptr() instead of the per_cpu() indirection. This is a preparatory change so integration of conditional indirect branch speculation optimization happens only in one place. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.280855518@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/switch_to.h | 3 --- arch/x86/kernel/process.c | 12 +++++++----- arch/x86/kernel/process.h | 24 ++++++++++++++++++++++++ arch/x86/kernel/process_32.c | 10 +++------- arch/x86/kernel/process_64.c | 10 +++------- 5 files changed, 37 insertions(+), 22 deletions(-) create mode 100644 arch/x86/kernel/process.h diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 9b6df68d8fd1..12ef2b49d11b 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -11,9 +11,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev, __visible struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); -struct tss_struct; -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss); /* This runs runs on the previous thread's stack. */ static inline void prepare_switch_to(struct task_struct *prev, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2baa10f05052..dd1157b4bd15 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -41,6 +41,8 @@ #include #include +#include "process.h" + /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. The TSS size is kept cacheline-aligned @@ -255,11 +257,12 @@ void arch_setup_new_exec(void) enable_cpuid(); } -static inline void switch_to_bitmap(struct tss_struct *tss, - struct thread_struct *prev, +static inline void switch_to_bitmap(struct thread_struct *prev, struct thread_struct *next, unsigned long tifp, unsigned long tifn) { + struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + if (tifn & _TIF_IO_BITMAP) { /* * Copy the relevant range of the IO bitmap. @@ -451,8 +454,7 @@ void speculation_ctrl_update(unsigned long tif) preempt_enable(); } -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev, *next; unsigned long tifp, tifn; @@ -462,7 +464,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, tifn = READ_ONCE(task_thread_info(next_p)->flags); tifp = READ_ONCE(task_thread_info(prev_p)->flags); - switch_to_bitmap(tss, prev, next, tifp, tifn); + switch_to_bitmap(prev, next, tifp, tifn); propagate_user_return_notify(prev_p, next_p); diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h new file mode 100644 index 000000000000..020fbfac3a27 --- /dev/null +++ b/arch/x86/kernel/process.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Code shared between 32 and 64 bit + +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p); + +/* + * This needs to be inline to optimize for the common case where no extra + * work needs to be done. + */ +static inline void switch_to_extra(struct task_struct *prev, + struct task_struct *next) +{ + unsigned long next_tif = task_thread_info(next)->flags; + unsigned long prev_tif = task_thread_info(prev)->flags; + + /* + * __switch_to_xtra() handles debug registers, i/o bitmaps, + * speculation mitigations etc. + */ + if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT || + prev_tif & _TIF_WORK_CTXSW_PREV)) + __switch_to_xtra(prev, next); +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5224c6099184..c2df91eab573 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -59,6 +59,8 @@ #include #include +#include "process.h" + void __show_regs(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; @@ -234,7 +236,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ @@ -266,12 +267,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) set_iopl_mask(next->iopl); - /* - * Now maybe handle debug registers and/or IO bitmaps - */ - if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || - task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) - __switch_to_xtra(prev_p, next_p, tss); + switch_to_extra(prev_p, next_p); /* * Leave lazy mode, flushing any hypercalls made here. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index cbeecfcc66d6..ec63d6be5e02 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -59,6 +59,8 @@ #include #endif +#include "process.h" + __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); /* Prints also some state that isn't saved in the pt_regs */ @@ -400,7 +402,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count) != -1); @@ -467,12 +468,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Reload sp0. */ update_sp0(next_p); - /* - * Now maybe reload the debug registers and handle I/O bitmaps - */ - if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || - task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) - __switch_to_xtra(prev_p, next_p, tss); + __switch_to_xtra(prev_p, next_p); #ifdef CONFIG_XEN_PV /* From ba523588b1bc0b9601e7ef102b56dc246a91d026 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:48 +0100 Subject: [PATCH 0799/2608] x86/speculation: Avoid __switch_to_xtra() calls commit 5635d99953f04b550738f6f4c1c532667c3fd872 upstream The TIF_SPEC_IB bit does not need to be evaluated in the decision to invoke __switch_to_xtra() when: - CONFIG_SMP is disabled - The conditional STIPB mode is disabled The TIF_SPEC_IB bit still controls IBPB in both cases so the TIF work mask checks might invoke __switch_to_xtra() for nothing if TIF_SPEC_IB is the only set bit in the work masks. Optimize it out by masking the bit at compile time for CONFIG_SMP=n and at run time when the static key controlling the conditional STIBP mode is disabled. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.374062201@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/thread_info.h | 13 +++++++++++-- arch/x86/kernel/process.h | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 78ebda74b225..7b30338ca55b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -149,9 +149,18 @@ struct thread_info { _TIF_FSCHECK) /* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ +#define _TIF_WORK_CTXSW_BASE \ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ - _TIF_SSBD|_TIF_SPEC_IB) + _TIF_SSBD) + +/* + * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. + */ +#ifdef CONFIG_SMP +# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB) +#else +# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE) +#endif #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h index 020fbfac3a27..898e97cf6629 100644 --- a/arch/x86/kernel/process.h +++ b/arch/x86/kernel/process.h @@ -2,6 +2,8 @@ // // Code shared between 32 and 64 bit +#include + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p); /* @@ -14,6 +16,19 @@ static inline void switch_to_extra(struct task_struct *prev, unsigned long next_tif = task_thread_info(next)->flags; unsigned long prev_tif = task_thread_info(prev)->flags; + if (IS_ENABLED(CONFIG_SMP)) { + /* + * Avoid __switch_to_xtra() invocation when conditional + * STIPB is disabled and the only different bit is + * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not + * in the TIF_WORK_CTXSW masks. + */ + if (!static_branch_likely(&switch_to_cond_stibp)) { + prev_tif &= ~_TIF_SPEC_IB; + next_tif &= ~_TIF_SPEC_IB; + } + } + /* * __switch_to_xtra() handles debug registers, i/o bitmaps, * speculation mitigations etc. From cbca99b96f06f540669f07f768e228014e38b3b6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:49 +0100 Subject: [PATCH 0800/2608] x86/speculation: Prepare for conditional IBPB in switch_mm() commit 4c71a2b6fd7e42814aa68a6dec88abf3b42ea573 upstream The IBPB speculation barrier is issued from switch_mm() when the kernel switches to a user space task with a different mm than the user space task which ran last on the same CPU. An additional optimization is to avoid IBPB when the incoming task can be ptraced by the outgoing task. This optimization only works when switching directly between two user space tasks. When switching from a kernel task to a user space task the optimization fails because the previous task cannot be accessed anymore. So for quite some scenarios the optimization is just adding overhead. The upcoming conditional IBPB support will issue IBPB only for user space tasks which have the TIF_SPEC_IB bit set. This requires to handle the following cases: 1) Switch from a user space task (potential attacker) which has TIF_SPEC_IB set to a user space task (potential victim) which has TIF_SPEC_IB not set. 2) Switch from a user space task (potential attacker) which has TIF_SPEC_IB not set to a user space task (potential victim) which has TIF_SPEC_IB set. This needs to be optimized for the case where the IBPB can be avoided when only kernel threads ran in between user space tasks which belong to the same process. The current check whether two tasks belong to the same context is using the tasks context id. While correct, it's simpler to use the mm pointer because it allows to mangle the TIF_SPEC_IB bit into it. The context id based mechanism requires extra storage, which creates worse code. When a task is scheduled out its TIF_SPEC_IB bit is mangled as bit 0 into the per CPU storage which is used to track the last user space mm which was running on a CPU. This bit can be used together with the TIF_SPEC_IB bit of the incoming task to make the decision whether IBPB needs to be issued or not to cover the two cases above. As conditional IBPB is going to be the default, remove the dubious ptrace check for the IBPB always case and simply issue IBPB always when the process changes. Move the storage to a different place in the struct as the original one created a hole. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.466447057@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 2 + arch/x86/include/asm/tlbflush.h | 8 +- arch/x86/kernel/cpu/bugs.c | 29 +++++-- arch/x86/mm/tlb.c | 114 ++++++++++++++++++++------- 4 files changed, 118 insertions(+), 35 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 6057086fde1c..2de273c47521 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -312,6 +312,8 @@ do { \ } while (0) DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); +DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); +DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 2501be609b82..e31040333f0c 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -185,10 +185,14 @@ struct tlb_state { #define LOADED_MM_SWITCHING ((struct mm_struct *)1) + /* Last user mm for optimizing IBPB */ + union { + struct mm_struct *last_user_mm; + unsigned long last_user_mm_ibpb; + }; + u16 loaded_mm_asid; u16 next_asid; - /* last user mm's ctx id */ - u64 last_ctx_id; /* * We can be in one of several states: diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5bba757fead1..f4bcdae79907 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -55,6 +55,10 @@ u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; /* Control conditional STIPB in switch_to() */ DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp); +/* Control conditional IBPB in switch_mm() */ +DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); +/* Control unconditional IBPB in switch_mm() */ +DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); void __init check_bugs(void) { @@ -330,7 +334,17 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) /* Initialize Indirect Branch Prediction Barrier */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); + + switch (mode) { + case SPECTRE_V2_USER_STRICT: + static_branch_enable(&switch_mm_always_ibpb); + break; + default: + break; + } + + pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", + mode == SPECTRE_V2_USER_STRICT ? "always-on" : "conditional"); } /* If enhanced IBRS is enabled no STIPB required */ @@ -952,10 +966,15 @@ static char *stibp_state(void) static char *ibpb_state(void) { - if (boot_cpu_has(X86_FEATURE_USE_IBPB)) - return ", IBPB"; - else - return ""; + if (boot_cpu_has(X86_FEATURE_IBPB)) { + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return ", IBPB: disabled"; + case SPECTRE_V2_USER_STRICT: + return ", IBPB: always-on"; + } + } + return ""; } static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 9e943de9d628..5400a24e1a8c 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -30,6 +29,12 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */ +/* + * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is + * stored in cpu_tlb_state.last_user_mm_ibpb. + */ +#define LAST_USER_MM_IBPB 0x1UL + /* * We get here when we do something requiring a TLB invalidation * but could not go invalidate all of the contexts. We do the @@ -181,17 +186,87 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) } } -static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id) +static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next) { + unsigned long next_tif = task_thread_info(next)->flags; + unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB; + + return (unsigned long)next->mm | ibpb; +} + +static void cond_ibpb(struct task_struct *next) +{ + if (!next || !next->mm) + return; + /* - * Check if the current (previous) task has access to the memory - * of the @tsk (next) task. If access is denied, make sure to - * issue a IBPB to stop user->user Spectre-v2 attacks. - * - * Note: __ptrace_may_access() returns 0 or -ERRNO. + * Both, the conditional and the always IBPB mode use the mm + * pointer to avoid the IBPB when switching between tasks of the + * same process. Using the mm pointer instead of mm->context.ctx_id + * opens a hypothetical hole vs. mm_struct reuse, which is more or + * less impossible to control by an attacker. Aside of that it + * would only affect the first schedule so the theoretically + * exposed data is not really interesting. */ - return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id && - ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB)); + if (static_branch_likely(&switch_mm_cond_ibpb)) { + unsigned long prev_mm, next_mm; + + /* + * This is a bit more complex than the always mode because + * it has to handle two cases: + * + * 1) Switch from a user space task (potential attacker) + * which has TIF_SPEC_IB set to a user space task + * (potential victim) which has TIF_SPEC_IB not set. + * + * 2) Switch from a user space task (potential attacker) + * which has TIF_SPEC_IB not set to a user space task + * (potential victim) which has TIF_SPEC_IB set. + * + * This could be done by unconditionally issuing IBPB when + * a task which has TIF_SPEC_IB set is either scheduled in + * or out. Though that results in two flushes when: + * + * - the same user space task is scheduled out and later + * scheduled in again and only a kernel thread ran in + * between. + * + * - a user space task belonging to the same process is + * scheduled in after a kernel thread ran in between + * + * - a user space task belonging to the same process is + * scheduled in immediately. + * + * Optimize this with reasonably small overhead for the + * above cases. Mangle the TIF_SPEC_IB bit into the mm + * pointer of the incoming task which is stored in + * cpu_tlbstate.last_user_mm_ibpb for comparison. + */ + next_mm = mm_mangle_tif_spec_ib(next); + prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb); + + /* + * Issue IBPB only if the mm's are different and one or + * both have the IBPB bit set. + */ + if (next_mm != prev_mm && + (next_mm | prev_mm) & LAST_USER_MM_IBPB) + indirect_branch_prediction_barrier(); + + this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm); + } + + if (static_branch_unlikely(&switch_mm_always_ibpb)) { + /* + * Only flush when switching to a user space task with a + * different context than the user space task which ran + * last on this CPU. + */ + if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) { + indirect_branch_prediction_barrier(); + this_cpu_write(cpu_tlbstate.last_user_mm, next->mm); + } + } } void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, @@ -262,22 +337,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, } else { u16 new_asid; bool need_flush; - u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); /* * Avoid user/user BTB poisoning by flushing the branch * predictor when switching between processes. This stops * one process from doing Spectre-v2 attacks on another. - * - * As an optimization, flush indirect branches only when - * switching into a processes that can't be ptrace by the - * current one (as in such case, attacker has much more - * convenient way how to tamper with the next process than - * branch buffer poisoning). */ - if (static_cpu_has(X86_FEATURE_USE_IBPB) && - ibpb_needed(tsk, last_ctx_id)) - indirect_branch_prediction_barrier(); + cond_ibpb(tsk); if (IS_ENABLED(CONFIG_VMAP_STACK)) { /* @@ -327,14 +393,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); } - /* - * Record last user mm's context id, so we can avoid - * flushing branch buffer with IBPB if we switch back - * to the same user. - */ - if (next != &init_mm) - this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); - /* Make sure we write CR3 before loaded_mm. */ barrier(); @@ -415,7 +473,7 @@ void initialize_tlbstate_and_flush(void) write_cr3(build_cr3(mm->pgd, 0)); /* Reinitialize tlbstate. */ - this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id); + this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB); this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); this_cpu_write(cpu_tlbstate.next_asid, 1); this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); From dae4d59096700703266190f8dff92f9edb82b1b5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:50 +0100 Subject: [PATCH 0801/2608] ptrace: Remove unused ptrace_may_access_sched() and MODE_IBRS commit 46f7ecb1e7359f183f5bbd1e08b90e10e52164f9 upstream The IBPB control code in x86 removed the usage. Remove the functionality which was introduced for this. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.559149393@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/ptrace.h | 17 ----------------- kernel/ptrace.c | 10 ---------- 2 files changed, 27 deletions(-) diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index b1d43b80abfa..38342e88b3f3 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -64,15 +64,12 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); #define PTRACE_MODE_NOAUDIT 0x04 #define PTRACE_MODE_FSCREDS 0x08 #define PTRACE_MODE_REALCREDS 0x10 -#define PTRACE_MODE_SCHED 0x20 -#define PTRACE_MODE_IBPB 0x40 /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) -#define PTRACE_MODE_SPEC_IBPB (PTRACE_MODE_ATTACH_REALCREDS | PTRACE_MODE_IBPB) /** * ptrace_may_access - check whether the caller is permitted to access @@ -90,20 +87,6 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); */ extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); -/** - * ptrace_may_access - check whether the caller is permitted to access - * a target task. - * @task: target task - * @mode: selects type of access and caller credentials - * - * Returns true on success, false on denial. - * - * Similar to ptrace_may_access(). Only to be called from context switch - * code. Does not call into audit and the regular LSM hooks due to locking - * constraints. - */ -extern bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode); - static inline int ptrace_reparented(struct task_struct *child) { return !same_thread_group(child->real_parent, child->parent); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index d7be2159ac09..84b1367935e4 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -261,9 +261,6 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) { - if (mode & PTRACE_MODE_SCHED) - return false; - if (mode & PTRACE_MODE_NOAUDIT) return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); else @@ -331,16 +328,9 @@ ok: !ptrace_has_cap(mm->user_ns, mode))) return -EPERM; - if (mode & PTRACE_MODE_SCHED) - return 0; return security_ptrace_access_check(task, mode); } -bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode) -{ - return __ptrace_may_access(task, mode | PTRACE_MODE_SCHED); -} - bool ptrace_may_access(struct task_struct *task, unsigned int mode) { int err; From dcb4ac34f9373b1f4365b24ddfac2bcdefd59f6e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:51 +0100 Subject: [PATCH 0802/2608] x86/speculation: Split out TIF update commit e6da8bb6f9abb2628381904b24163c770e630bac upstream The update of the TIF_SSBD flag and the conditional speculation control MSR update is done in the ssb_prctl_set() function directly. The upcoming prctl support for controlling indirect branch speculation via STIBP needs the same mechanism. Split the code out and make it reusable. Reword the comment about updates for other tasks. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.652305076@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f4bcdae79907..68da43cacead 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -699,10 +699,29 @@ static void ssb_select_mitigation(void) #undef pr_fmt #define pr_fmt(fmt) "Speculation prctl: " fmt -static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on) { bool update; + if (on) + update = !test_and_set_tsk_thread_flag(tsk, tifbit); + else + update = test_and_clear_tsk_thread_flag(tsk, tifbit); + + /* + * Immediately update the speculation control MSRs for the current + * task, but for a non-current task delay setting the CPU + * mitigation until it is scheduled next. + * + * This can only happen for SECCOMP mitigation. For PRCTL it's + * always the current task. + */ + if (tsk == current && update) + speculation_ctrl_update_current(); +} + +static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +{ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && ssb_mode != SPEC_STORE_BYPASS_SECCOMP) return -ENXIO; @@ -713,28 +732,20 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); - update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task, TIF_SSBD, false); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task, TIF_SSBD, true); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task, TIF_SSBD, true); break; default: return -ERANGE; } - - /* - * If being set on non-current task, delay setting the CPU - * mitigation until it is next scheduled. - */ - if (task == current && update) - speculation_ctrl_update_current(); - return 0; } From e3f822b628a0d715a46d233e81881f6c2885247b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 28 Nov 2018 10:56:57 +0100 Subject: [PATCH 0803/2608] x86/speculation: Prevent stale SPEC_CTRL msr content commit 6d991ba509ebcfcc908e009d1db51972a4f7a064 upstream The seccomp speculation control operates on all tasks of a process, but only the current task of a process can update the MSR immediately. For the other threads the update is deferred to the next context switch. This creates the following situation with Process A and B: Process A task 2 and Process B task 1 are pinned on CPU1. Process A task 2 does not have the speculation control TIF bit set. Process B task 1 has the speculation control TIF bit set. CPU0 CPU1 MSR bit is set ProcB.T1 schedules out ProcA.T2 schedules in MSR bit is cleared ProcA.T1 seccomp_update() set TIF bit on ProcA.T2 ProcB.T1 schedules in MSR is not updated <-- FAIL This happens because the context switch code tries to avoid the MSR update if the speculation control TIF bits of the incoming and the outgoing task are the same. In the worst case ProcB.T1 and ProcA.T2 are the only tasks scheduling back and forth on CPU1, which keeps the MSR stale forever. In theory this could be remedied by IPIs, but chasing the remote task which could be migrated is complex and full of races. The straight forward solution is to avoid the asychronous update of the TIF bit and defer it to the next context switch. The speculation control state is stored in task_struct::atomic_flags by the prctl and seccomp updates already. Add a new TIF_SPEC_FORCE_UPDATE bit and set this after updating the atomic_flags. Check the bit on context switch and force a synchronous update of the speculation control if set. Use the same mechanism for updating the current task. Reported-by: Tim Chen Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1811272247140.1875@nanos.tec.linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/spec-ctrl.h | 6 +----- arch/x86/include/asm/thread_info.h | 4 +++- arch/x86/kernel/cpu/bugs.c | 18 +++++++----------- arch/x86/kernel/process.c | 30 +++++++++++++++++++++++++++++- 4 files changed, 40 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 27b0bce3933b..5393babc0598 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -83,10 +83,6 @@ static inline void speculative_store_bypass_ht_init(void) { } #endif extern void speculation_ctrl_update(unsigned long tif); - -static inline void speculation_ctrl_update_current(void) -{ - speculation_ctrl_update(current_thread_info()->flags); -} +extern void speculation_ctrl_update_current(void); #endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 7b30338ca55b..bf9175d87844 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -86,6 +86,7 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ +#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_PATCH_PENDING 13 /* pending live patching update */ @@ -114,6 +115,7 @@ struct thread_info { #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_SPEC_IB (1 << TIF_SPEC_IB) +#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) @@ -151,7 +153,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW_BASE \ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ - _TIF_SSBD) + _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE) /* * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 68da43cacead..9f9b03094628 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -699,14 +699,10 @@ static void ssb_select_mitigation(void) #undef pr_fmt #define pr_fmt(fmt) "Speculation prctl: " fmt -static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on) +static void task_update_spec_tif(struct task_struct *tsk) { - bool update; - - if (on) - update = !test_and_set_tsk_thread_flag(tsk, tifbit); - else - update = test_and_clear_tsk_thread_flag(tsk, tifbit); + /* Force the update of the real TIF bits */ + set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE); /* * Immediately update the speculation control MSRs for the current @@ -716,7 +712,7 @@ static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on) * This can only happen for SECCOMP mitigation. For PRCTL it's * always the current task. */ - if (tsk == current && update) + if (tsk == current) speculation_ctrl_update_current(); } @@ -732,16 +728,16 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); - task_update_spec_tif(task, TIF_SSBD, false); + task_update_spec_tif(task); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); - task_update_spec_tif(task, TIF_SSBD, true); + task_update_spec_tif(task); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); - task_update_spec_tif(task, TIF_SSBD, true); + task_update_spec_tif(task); break; default: return -ERANGE; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index dd1157b4bd15..0b7be5f31fb4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -446,6 +446,18 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, wrmsrl(MSR_IA32_SPEC_CTRL, msr); } +static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) +{ + if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) { + if (task_spec_ssb_disable(tsk)) + set_tsk_thread_flag(tsk, TIF_SSBD); + else + clear_tsk_thread_flag(tsk, TIF_SSBD); + } + /* Return the updated threadinfo flags*/ + return task_thread_info(tsk)->flags; +} + void speculation_ctrl_update(unsigned long tif) { /* Forced update. Make sure all relevant TIF flags are different */ @@ -454,6 +466,14 @@ void speculation_ctrl_update(unsigned long tif) preempt_enable(); } +/* Called from seccomp/prctl update */ +void speculation_ctrl_update_current(void) +{ + preempt_disable(); + speculation_ctrl_update(speculation_ctrl_update_tif(current)); + preempt_enable(); +} + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev, *next; @@ -485,7 +505,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); - __speculation_ctrl_update(tifp, tifn); + if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) { + __speculation_ctrl_update(tifp, tifn); + } else { + speculation_ctrl_update_tif(prev_p); + tifn = speculation_ctrl_update_tif(next_p); + + /* Enforce MSR update to ensure consistent state */ + __speculation_ctrl_update(~tifn, tifn); + } } /* From 99f1cb80daab80107c4b7f7f9a9ddf041a8b2137 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:52 +0100 Subject: [PATCH 0804/2608] x86/speculation: Prepare arch_smt_update() for PRCTL mode commit 6893a959d7fdebbab5f5aa112c277d5a44435ba1 upstream The upcoming fine grained per task STIBP control needs to be updated on CPU hotplug as well. Split out the code which controls the strict mode so the prctl control code can be added later. Mark the SMP function call argument __unused while at it. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.759457117@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 50 ++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9f9b03094628..2e3da643d3fd 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -527,40 +527,44 @@ specv2_set_mode: arch_smt_update(); } -static bool stibp_needed(void) -{ - /* Enhanced IBRS makes using STIBP unnecessary. */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) - return false; - - /* Check for strict user mitigation mode */ - return spectre_v2_user == SPECTRE_V2_USER_STRICT; -} - -static void update_stibp_msr(void *info) +static void update_stibp_msr(void * __unused) { wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); } +/* Update x86_spec_ctrl_base in case SMT state changed. */ +static void update_stibp_strict(void) +{ + u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + + if (sched_smt_active()) + mask |= SPEC_CTRL_STIBP; + + if (mask == x86_spec_ctrl_base) + return; + + pr_info("Update user space SMT mitigation: STIBP %s\n", + mask & SPEC_CTRL_STIBP ? "always-on" : "off"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); +} + void arch_smt_update(void) { - u64 mask; - - if (!stibp_needed()) + /* Enhanced IBRS implies STIBP. No update required. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; mutex_lock(&spec_ctrl_mutex); - mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; - if (sched_smt_active()) - mask |= SPEC_CTRL_STIBP; - - if (mask != x86_spec_ctrl_base) { - pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); - x86_spec_ctrl_base = mask; - on_each_cpu(update_stibp_msr, NULL, 1); + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + break; + case SPECTRE_V2_USER_STRICT: + update_stibp_strict(); + break; } + mutex_unlock(&spec_ctrl_mutex); } From 6a847a6057721011b9534e709e5286c51ea31b9f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:53 +0100 Subject: [PATCH 0805/2608] x86/speculation: Add prctl() control for indirect branch speculation commit 9137bb27e60e554dab694eafa4cca241fa3a694f upstream Add the PR_SPEC_INDIRECT_BRANCH option for the PR_GET_SPECULATION_CTRL and PR_SET_SPECULATION_CTRL prctls to allow fine grained per task control of indirect branch speculation via STIBP and IBPB. Invocations: Check indirect branch speculation status with - prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0); Enable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0); Disable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0); Force disable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0); See Documentation/userspace-api/spec_ctrl.rst. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.866780996@linutronix.de Signed-off-by: Greg Kroah-Hartman --- Documentation/userspace-api/spec_ctrl.rst | 9 +++ arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 67 +++++++++++++++++++++++ arch/x86/kernel/process.c | 5 ++ include/linux/sched.h | 9 +++ include/uapi/linux/prctl.h | 1 + 6 files changed, 92 insertions(+) diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst index 32f3d55c54b7..c4dbe6f7cdae 100644 --- a/Documentation/userspace-api/spec_ctrl.rst +++ b/Documentation/userspace-api/spec_ctrl.rst @@ -92,3 +92,12 @@ Speculation misfeature controls * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0); + +- PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes + (Mitigate Spectre V2 style attacks against user processes) + + Invocations: + * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 2de273c47521..35ef92c5cc66 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -232,6 +232,7 @@ enum spectre_v2_mitigation { enum spectre_v2_user_mitigation { SPECTRE_V2_USER_NONE, SPECTRE_V2_USER_STRICT, + SPECTRE_V2_USER_PRCTL, }; /* The Speculative Store Bypass disable variants */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2e3da643d3fd..bcd39b928fea 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -563,6 +563,8 @@ void arch_smt_update(void) case SPECTRE_V2_USER_STRICT: update_stibp_strict(); break; + case SPECTRE_V2_USER_PRCTL: + break; } mutex_unlock(&spec_ctrl_mutex); @@ -749,12 +751,50 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) return 0; } +static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + switch (ctrl) { + case PR_SPEC_ENABLE: + if (spectre_v2_user == SPECTRE_V2_USER_NONE) + return 0; + /* + * Indirect branch speculation is always disabled in strict + * mode. + */ + if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + return -EPERM; + task_clear_spec_ib_disable(task); + task_update_spec_tif(task); + break; + case PR_SPEC_DISABLE: + case PR_SPEC_FORCE_DISABLE: + /* + * Indirect branch speculation is always allowed when + * mitigation is force disabled. + */ + if (spectre_v2_user == SPECTRE_V2_USER_NONE) + return -EPERM; + if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + return 0; + task_set_spec_ib_disable(task); + if (ctrl == PR_SPEC_FORCE_DISABLE) + task_set_spec_ib_force_disable(task); + task_update_spec_tif(task); + break; + default: + return -ERANGE; + } + return 0; +} + int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, unsigned long ctrl) { switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_set(task, ctrl); + case PR_SPEC_INDIRECT_BRANCH: + return ib_prctl_set(task, ctrl); default: return -ENODEV; } @@ -787,11 +827,34 @@ static int ssb_prctl_get(struct task_struct *task) } } +static int ib_prctl_get(struct task_struct *task) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return PR_SPEC_NOT_AFFECTED; + + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return PR_SPEC_ENABLE; + case SPECTRE_V2_USER_PRCTL: + if (task_spec_ib_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ib_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + case SPECTRE_V2_USER_STRICT: + return PR_SPEC_DISABLE; + default: + return PR_SPEC_NOT_AFFECTED; + } +} + int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) { switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_get(task); + case PR_SPEC_INDIRECT_BRANCH: + return ib_prctl_get(task); default: return -ENODEV; } @@ -971,6 +1034,8 @@ static char *stibp_state(void) return ", STIBP: disabled"; case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; + case SPECTRE_V2_USER_PRCTL: + return ""; } return ""; } @@ -983,6 +1048,8 @@ static char *ibpb_state(void) return ", IBPB: disabled"; case SPECTRE_V2_USER_STRICT: return ", IBPB: always-on"; + case SPECTRE_V2_USER_PRCTL: + return ""; } } return ""; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0b7be5f31fb4..a98d1cdd6299 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -453,6 +453,11 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) set_tsk_thread_flag(tsk, TIF_SSBD); else clear_tsk_thread_flag(tsk, TIF_SSBD); + + if (task_spec_ib_disable(tsk)) + set_tsk_thread_flag(tsk, TIF_SPEC_IB); + else + clear_tsk_thread_flag(tsk, TIF_SPEC_IB); } /* Return the updated threadinfo flags*/ return task_thread_info(tsk)->flags; diff --git a/include/linux/sched.h b/include/linux/sched.h index e04919aa8201..866439c361a9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1405,6 +1405,8 @@ static inline bool is_percpu_thread(void) #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ #define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */ #define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/ +#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */ +#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */ #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ @@ -1436,6 +1438,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) +TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable) +TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable) +TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable) + +TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) +TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) + static inline void current_restore_flags(unsigned long orig_flags, unsigned long flags) { diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 3027f943f4b3..214102fab940 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -203,6 +203,7 @@ struct prctl_mm_map { #define PR_SET_SPECULATION_CTRL 53 /* Speculation control variants */ # define PR_SPEC_STORE_BYPASS 0 +# define PR_SPEC_INDIRECT_BRANCH 1 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */ # define PR_SPEC_NOT_AFFECTED 0 # define PR_SPEC_PRCTL (1UL << 0) From 605b2828ff55471d8b2ab0b405991895a2216e06 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:54 +0100 Subject: [PATCH 0806/2608] x86/speculation: Enable prctl mode for spectre_v2_user commit 7cc765a67d8e04ef7d772425ca5a2a1e2b894c15 upstream Now that all prerequisites are in place: - Add the prctl command line option - Default the 'auto' mode to 'prctl' - When SMT state changes, update the static key which controls the conditional STIBP evaluation on context switch. - At init update the static key which controls the conditional IBPB evaluation on context switch. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.958421388@linutronix.de Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 7 +++- arch/x86/kernel/cpu/bugs.c | 41 +++++++++++++++---- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f0c19f61db80..dd9783e43443 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4036,9 +4036,14 @@ off - Unconditionally disable mitigations. Is enforced by spectre_v2=off + prctl - Indirect branch speculation is enabled, + but mitigation can be enabled via prctl + per thread. The mitigation control state + is inherited on fork. + auto - Kernel selects the mitigation depending on the available CPU features and vulnerability. - Default is off. + Default is prctl. Not specifying this option is equivalent to spectre_v2_user=auto. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bcd39b928fea..d87a877b1b38 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -254,11 +254,13 @@ enum spectre_v2_user_cmd { SPECTRE_V2_USER_CMD_NONE, SPECTRE_V2_USER_CMD_AUTO, SPECTRE_V2_USER_CMD_FORCE, + SPECTRE_V2_USER_CMD_PRCTL, }; static const char * const spectre_v2_user_strings[] = { [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", + [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", }; static const struct { @@ -269,6 +271,7 @@ static const struct { { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, { "off", SPECTRE_V2_USER_CMD_NONE, false }, { "on", SPECTRE_V2_USER_CMD_FORCE, true }, + { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, }; static void __init spec_v2_user_print_cond(const char *reason, bool secure) @@ -323,12 +326,15 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) smt_possible = false; switch (spectre_v2_parse_user_cmdline(v2_cmd)) { - case SPECTRE_V2_USER_CMD_AUTO: case SPECTRE_V2_USER_CMD_NONE: goto set_mode; case SPECTRE_V2_USER_CMD_FORCE: mode = SPECTRE_V2_USER_STRICT; break; + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_PRCTL: + mode = SPECTRE_V2_USER_PRCTL; + break; } /* Initialize Indirect Branch Prediction Barrier */ @@ -339,6 +345,9 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) case SPECTRE_V2_USER_STRICT: static_branch_enable(&switch_mm_always_ibpb); break; + case SPECTRE_V2_USER_PRCTL: + static_branch_enable(&switch_mm_cond_ibpb); + break; default: break; } @@ -351,6 +360,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; + /* + * If SMT is not possible or STIBP is not available clear the STIPB + * mode. + */ + if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP)) + mode = SPECTRE_V2_USER_NONE; set_mode: spectre_v2_user = mode; /* Only print the STIBP mode when SMT possible */ @@ -549,6 +564,15 @@ static void update_stibp_strict(void) on_each_cpu(update_stibp_msr, NULL, 1); } +/* Update the static key controlling the evaluation of TIF_SPEC_IB */ +static void update_indir_branch_cond(void) +{ + if (sched_smt_active()) + static_branch_enable(&switch_to_cond_stibp); + else + static_branch_disable(&switch_to_cond_stibp); +} + void arch_smt_update(void) { /* Enhanced IBRS implies STIBP. No update required. */ @@ -564,6 +588,7 @@ void arch_smt_update(void) update_stibp_strict(); break; case SPECTRE_V2_USER_PRCTL: + update_indir_branch_cond(); break; } @@ -1035,7 +1060,8 @@ static char *stibp_state(void) case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; case SPECTRE_V2_USER_PRCTL: - return ""; + if (static_key_enabled(&switch_to_cond_stibp)) + return ", STIBP: conditional"; } return ""; } @@ -1043,14 +1069,11 @@ static char *stibp_state(void) static char *ibpb_state(void) { if (boot_cpu_has(X86_FEATURE_IBPB)) { - switch (spectre_v2_user) { - case SPECTRE_V2_USER_NONE: - return ", IBPB: disabled"; - case SPECTRE_V2_USER_STRICT: + if (static_key_enabled(&switch_mm_always_ibpb)) return ", IBPB: always-on"; - case SPECTRE_V2_USER_PRCTL: - return ""; - } + if (static_key_enabled(&switch_mm_cond_ibpb)) + return ", IBPB: conditional"; + return ", IBPB: disabled"; } return ""; } From ca97dd0009e688a67f6e22ce3fce0b4523741243 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:55 +0100 Subject: [PATCH 0807/2608] x86/speculation: Add seccomp Spectre v2 user space protection mode commit 6b3e64c237c072797a9ec918654a60e3a46488e2 upstream If 'prctl' mode of user space protection from spectre v2 is selected on the kernel command-line, STIBP and IBPB are applied on tasks which restrict their indirect branch speculation via prctl. SECCOMP enables the SSBD mitigation for sandboxed tasks already, so it makes sense to prevent spectre v2 user space to user space attacks as well. The Intel mitigation guide documents how STIPB works: Setting bit 1 (STIBP) of the IA32_SPEC_CTRL MSR on a logical processor prevents the predicted targets of indirect branches on any logical processor of that core from being controlled by software that executes (or executed previously) on another logical processor of the same core. Ergo setting STIBP protects the task itself from being attacked from a task running on a different hyper-thread and protects the tasks running on different hyper-threads from being attacked. While the document suggests that the branch predictors are shielded between the logical processors, the observed performance regressions suggest that STIBP simply disables the branch predictor more or less completely. Of course the document wording is vague, but the fact that there is also no requirement for issuing IBPB when STIBP is used points clearly in that direction. The kernel still issues IBPB even when STIBP is used until Intel clarifies the whole mechanism. IBPB is issued when the task switches out, so malicious sandbox code cannot mistrain the branch predictor for the next user space task on the same logical processor. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185006.051663132@linutronix.de Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 9 ++++++++- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 17 ++++++++++++++++- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index dd9783e43443..10a28f7b1ec6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4041,9 +4041,16 @@ per thread. The mitigation control state is inherited on fork. + seccomp + - Same as "prctl" above, but all seccomp + threads will enable the mitigation unless + they explicitly opt out. + auto - Kernel selects the mitigation depending on the available CPU features and vulnerability. - Default is prctl. + + Default mitigation: + If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl" Not specifying this option is equivalent to spectre_v2_user=auto. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 35ef92c5cc66..a633767419f2 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -233,6 +233,7 @@ enum spectre_v2_user_mitigation { SPECTRE_V2_USER_NONE, SPECTRE_V2_USER_STRICT, SPECTRE_V2_USER_PRCTL, + SPECTRE_V2_USER_SECCOMP, }; /* The Speculative Store Bypass disable variants */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d87a877b1b38..5a03082ee189 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -255,12 +255,14 @@ enum spectre_v2_user_cmd { SPECTRE_V2_USER_CMD_AUTO, SPECTRE_V2_USER_CMD_FORCE, SPECTRE_V2_USER_CMD_PRCTL, + SPECTRE_V2_USER_CMD_SECCOMP, }; static const char * const spectre_v2_user_strings[] = { [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", + [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl", }; static const struct { @@ -272,6 +274,7 @@ static const struct { { "off", SPECTRE_V2_USER_CMD_NONE, false }, { "on", SPECTRE_V2_USER_CMD_FORCE, true }, { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, + { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, }; static void __init spec_v2_user_print_cond(const char *reason, bool secure) @@ -331,10 +334,16 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) case SPECTRE_V2_USER_CMD_FORCE: mode = SPECTRE_V2_USER_STRICT; break; - case SPECTRE_V2_USER_CMD_AUTO: case SPECTRE_V2_USER_CMD_PRCTL: mode = SPECTRE_V2_USER_PRCTL; break; + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_SECCOMP: + if (IS_ENABLED(CONFIG_SECCOMP)) + mode = SPECTRE_V2_USER_SECCOMP; + else + mode = SPECTRE_V2_USER_PRCTL; + break; } /* Initialize Indirect Branch Prediction Barrier */ @@ -346,6 +355,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) static_branch_enable(&switch_mm_always_ibpb); break; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: static_branch_enable(&switch_mm_cond_ibpb); break; default: @@ -588,6 +598,7 @@ void arch_smt_update(void) update_stibp_strict(); break; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: update_indir_branch_cond(); break; } @@ -830,6 +841,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) { if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); + if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP) + ib_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -861,6 +874,7 @@ static int ib_prctl_get(struct task_struct *task) case SPECTRE_V2_USER_NONE: return PR_SPEC_ENABLE; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) @@ -1060,6 +1074,7 @@ static char *stibp_state(void) case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: if (static_key_enabled(&switch_to_cond_stibp)) return ", STIBP: conditional"; } From 78085d7e3816606d6aa211245ab2cde3463d7795 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:56 +0100 Subject: [PATCH 0808/2608] x86/speculation: Provide IBPB always command line options commit 55a974021ec952ee460dc31ca08722158639de72 upstream Provide the possibility to enable IBPB always in combination with 'prctl' and 'seccomp'. Add the extra command line options and rework the IBPB selection to evaluate the command instead of the mode selected by the STIPB switch case. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185006.144047038@linutronix.de Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 12 +++++++ arch/x86/kernel/cpu/bugs.c | 34 +++++++++++++------ 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 10a28f7b1ec6..5f3d58142600 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4041,11 +4041,23 @@ per thread. The mitigation control state is inherited on fork. + prctl,ibpb + - Like "prctl" above, but only STIBP is + controlled per thread. IBPB is issued + always when switching between different user + space processes. + seccomp - Same as "prctl" above, but all seccomp threads will enable the mitigation unless they explicitly opt out. + seccomp,ibpb + - Like "seccomp" above, but only STIBP is + controlled per thread. IBPB is issued + always when switching between different + user space processes. + auto - Kernel selects the mitigation depending on the available CPU features and vulnerability. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5a03082ee189..f7a6d6203e13 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -255,7 +255,9 @@ enum spectre_v2_user_cmd { SPECTRE_V2_USER_CMD_AUTO, SPECTRE_V2_USER_CMD_FORCE, SPECTRE_V2_USER_CMD_PRCTL, + SPECTRE_V2_USER_CMD_PRCTL_IBPB, SPECTRE_V2_USER_CMD_SECCOMP, + SPECTRE_V2_USER_CMD_SECCOMP_IBPB, }; static const char * const spectre_v2_user_strings[] = { @@ -270,11 +272,13 @@ static const struct { enum spectre_v2_user_cmd cmd; bool secure; } v2_user_options[] __initdata = { - { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, - { "off", SPECTRE_V2_USER_CMD_NONE, false }, - { "on", SPECTRE_V2_USER_CMD_FORCE, true }, - { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, - { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, + { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, + { "off", SPECTRE_V2_USER_CMD_NONE, false }, + { "on", SPECTRE_V2_USER_CMD_FORCE, true }, + { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, + { "prctl,ibpb", SPECTRE_V2_USER_CMD_PRCTL_IBPB, false }, + { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, + { "seccomp,ibpb", SPECTRE_V2_USER_CMD_SECCOMP_IBPB, false }, }; static void __init spec_v2_user_print_cond(const char *reason, bool secure) @@ -320,6 +324,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) { enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; bool smt_possible = IS_ENABLED(CONFIG_SMP); + enum spectre_v2_user_cmd cmd; if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP)) return; @@ -328,17 +333,20 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) cpu_smt_control == CPU_SMT_NOT_SUPPORTED) smt_possible = false; - switch (spectre_v2_parse_user_cmdline(v2_cmd)) { + cmd = spectre_v2_parse_user_cmdline(v2_cmd); + switch (cmd) { case SPECTRE_V2_USER_CMD_NONE: goto set_mode; case SPECTRE_V2_USER_CMD_FORCE: mode = SPECTRE_V2_USER_STRICT; break; case SPECTRE_V2_USER_CMD_PRCTL: + case SPECTRE_V2_USER_CMD_PRCTL_IBPB: mode = SPECTRE_V2_USER_PRCTL; break; case SPECTRE_V2_USER_CMD_AUTO: case SPECTRE_V2_USER_CMD_SECCOMP: + case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: if (IS_ENABLED(CONFIG_SECCOMP)) mode = SPECTRE_V2_USER_SECCOMP; else @@ -350,12 +358,15 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - switch (mode) { - case SPECTRE_V2_USER_STRICT: + switch (cmd) { + case SPECTRE_V2_USER_CMD_FORCE: + case SPECTRE_V2_USER_CMD_PRCTL_IBPB: + case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: static_branch_enable(&switch_mm_always_ibpb); break; - case SPECTRE_V2_USER_PRCTL: - case SPECTRE_V2_USER_SECCOMP: + case SPECTRE_V2_USER_CMD_PRCTL: + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_SECCOMP: static_branch_enable(&switch_mm_cond_ibpb); break; default: @@ -363,7 +374,8 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) } pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", - mode == SPECTRE_V2_USER_STRICT ? "always-on" : "conditional"); + static_key_enabled(&switch_mm_always_ibpb) ? + "always-on" : "conditional"); } /* If enhanced IBRS is enabled no STIPB required */ From a06361526cc71cb0ebef6ab7aa698ec63465b562 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Wed, 31 Oct 2018 14:53:57 -0700 Subject: [PATCH 0809/2608] kvm: mmu: Fix race in emulated page table writes commit 0e0fee5c539b61fdd098332e0e2cc375d9073706 upstream. When a guest page table is updated via an emulated write, kvm_mmu_pte_write() is called to update the shadow PTE using the just written guest PTE value. But if two emulated guest PTE writes happened concurrently, it is possible that the guest PTE and the shadow PTE end up being out of sync. Emulated writes do not mark the shadow page as unsync-ed, so this inconsistency will not be resolved even by a guest TLB flush (unless the page was marked as unsync-ed at some other point). This is fixed by re-reading the current value of the guest PTE after the MMU lock has been acquired instead of just using the value that was written prior to calling kvm_mmu_pte_write(). Signed-off-by: Junaid Shahid Reviewed-by: Wanpeng Li Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d755e0d44ac1..364d9895dd56 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4734,9 +4734,9 @@ static bool need_remote_flush(u64 old, u64 new) } static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, - const u8 *new, int *bytes) + int *bytes) { - u64 gentry; + u64 gentry = 0; int r; /* @@ -4748,22 +4748,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ *gpa &= ~(gpa_t)7; *bytes = 8; - r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8); - if (r) - gentry = 0; - new = (const u8 *)&gentry; } - switch (*bytes) { - case 4: - gentry = *(const u32 *)new; - break; - case 8: - gentry = *(const u64 *)new; - break; - default: - gentry = 0; - break; + if (*bytes == 4 || *bytes == 8) { + r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes); + if (r) + gentry = 0; } return gentry; @@ -4876,8 +4866,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); - gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes); - /* * No need to care whether allocation memory is successful * or not since pte prefetch is skiped if it does not have @@ -4886,6 +4874,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, mmu_topup_memory_caches(vcpu); spin_lock(&vcpu->kvm->mmu_lock); + + gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes); + ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); From 57e972ecad4fdee027a690b64c7d619dae489015 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 22 May 2018 09:54:20 -0700 Subject: [PATCH 0810/2608] kvm: svm: Ensure an IBPB on all affected CPUs when freeing a vmcb commit fd65d3142f734bc4376053c8d75670041903134d upstream. Previously, we only called indirect_branch_prediction_barrier on the logical CPU that freed a vmcb. This function should be called on all logical CPUs that last loaded the vmcb in question. Fixes: 15d45071523d ("KVM/x86: Add IBPB support") Reported-by: Neel Natu Signed-off-by: Jim Mattson Reviewed-by: Konrad Rzeszutek Wilk Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e9dbd62bb46e..17f08db34547 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1733,21 +1733,31 @@ out: return ERR_PTR(err); } +static void svm_clear_current_vmcb(struct vmcb *vmcb) +{ + int i; + + for_each_online_cpu(i) + cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL); +} + static void svm_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * The vmcb page can be recycled, causing a false negative in + * svm_vcpu_load(). So, ensure that no logical CPU has this + * vmcb page recorded as its current vmcb. + */ + svm_clear_current_vmcb(svm->vmcb); + __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); __free_page(virt_to_page(svm->nested.hsave)); __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); - /* - * The vmcb page can be recycled, causing a false negative in - * svm_vcpu_load(). So do a full IBPB now. - */ - indirect_branch_prediction_barrier(); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) From 08b9a96720a1e67dfed4b22a5192892d556fc5fc Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Thu, 8 Nov 2018 00:43:06 +0200 Subject: [PATCH 0811/2608] KVM: x86: Fix kernel info-leak in KVM_HC_CLOCK_PAIRING hypercall commit bcbfbd8ec21096027f1ee13ce6c185e8175166f6 upstream. kvm_pv_clock_pairing() allocates local var "struct kvm_clock_pairing clock_pairing" on stack and initializes all it's fields besides padding (clock_pairing.pad[]). Because clock_pairing var is written completely (including padding) to guest memory, failure to init struct padding results in kernel info-leak. Fix the issue by making sure to also init the padding with zeroes. Fixes: 55dd00a73a51 ("KVM: x86: add KVM_HC_CLOCK_PAIRING hypercall") Reported-by: syzbot+a8ef68d71211ba264f56@syzkaller.appspotmail.com Reviewed-by: Mark Kanda Signed-off-by: Liran Alon Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8d688b213504..7c4d02dba110 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6378,6 +6378,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, clock_pairing.nsec = ts.tv_nsec; clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle); clock_pairing.flags = 0; + memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad)); ret = 0; if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing, From 83f00ab9a7c03e9f1410727d985b7fe9473002e1 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 20 Nov 2018 16:34:18 +0800 Subject: [PATCH 0812/2608] KVM: X86: Fix scan ioapic use-before-initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e97f852fd4561e77721bb9a4e0ea9d98305b1e93 upstream. Reported by syzkaller: BUG: unable to handle kernel NULL pointer dereference at 00000000000001c8 PGD 80000003ec4da067 P4D 80000003ec4da067 PUD 3f7bfa067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 7 PID: 5059 Comm: debug Tainted: G OE 4.19.0-rc5 #16 RIP: 0010:__lock_acquire+0x1a6/0x1990 Call Trace: lock_acquire+0xdb/0x210 _raw_spin_lock+0x38/0x70 kvm_ioapic_scan_entry+0x3e/0x110 [kvm] vcpu_enter_guest+0x167e/0x1910 [kvm] kvm_arch_vcpu_ioctl_run+0x35c/0x610 [kvm] kvm_vcpu_ioctl+0x3e9/0x6d0 [kvm] do_vfs_ioctl+0xa5/0x690 ksys_ioctl+0x6d/0x80 __x64_sys_ioctl+0x1a/0x20 do_syscall_64+0x83/0x6e0 entry_SYSCALL_64_after_hwframe+0x49/0xbe The reason is that the testcase writes hyperv synic HV_X64_MSR_SINT6 msr and triggers scan ioapic logic to load synic vectors into EOI exit bitmap. However, irqchip is not initialized by this simple testcase, ioapic/apic objects should not be accessed. This can be triggered by the following program: #define _GNU_SOURCE #include #include #include #include #include #include #include #include uint64_t r[3] = {0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff}; int main(void) { syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0); long res = 0; memcpy((void*)0x20000040, "/dev/kvm", 9); res = syscall(__NR_openat, 0xffffffffffffff9c, 0x20000040, 0, 0); if (res != -1) r[0] = res; res = syscall(__NR_ioctl, r[0], 0xae01, 0); if (res != -1) r[1] = res; res = syscall(__NR_ioctl, r[1], 0xae41, 0); if (res != -1) r[2] = res; memcpy( (void*)0x20000080, "\x01\x00\x00\x00\x00\x5b\x61\xbb\x96\x00\x00\x40\x00\x00\x00\x00\x01\x00" "\x08\x00\x00\x00\x00\x00\x0b\x77\xd1\x78\x4d\xd8\x3a\xed\xb1\x5c\x2e\x43" "\xaa\x43\x39\xd6\xff\xf5\xf0\xa8\x98\xf2\x3e\x37\x29\x89\xde\x88\xc6\x33" "\xfc\x2a\xdb\xb7\xe1\x4c\xac\x28\x61\x7b\x9c\xa9\xbc\x0d\xa0\x63\xfe\xfe" "\xe8\x75\xde\xdd\x19\x38\xdc\x34\xf5\xec\x05\xfd\xeb\x5d\xed\x2e\xaf\x22" "\xfa\xab\xb7\xe4\x42\x67\xd0\xaf\x06\x1c\x6a\x35\x67\x10\x55\xcb", 106); syscall(__NR_ioctl, r[2], 0x4008ae89, 0x20000080); syscall(__NR_ioctl, r[2], 0xae80, 0); return 0; } This patch fixes it by bailing out scan ioapic if ioapic is not initialized in kernel. Reported-by: Wei Wu Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Wei Wu Signed-off-by: Wanpeng Li Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7c4d02dba110..f24329659bea 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6885,7 +6885,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) else { if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active) kvm_x86_ops->sync_pir_to_irr(vcpu); - kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); + if (ioapic_in_kernel(vcpu->kvm)) + kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, vcpu_to_synic(vcpu)->vec_bitmap, 256); From d039837ab8cb7fb7095ec9746fb04a854987179e Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 26 Nov 2018 13:29:41 -0800 Subject: [PATCH 0813/2608] xtensa: enable coprocessors that are being flushed commit 2958b66694e018c552be0b60521fec27e8d12988 upstream. coprocessor_flush_all may be called from a context of a thread that is different from the thread being flushed. In that case contents of the cpenable special register may not match ti->cpenable of the target thread, resulting in unhandled coprocessor exception in the kernel context. Set cpenable special register to the ti->cpenable of the target register for the duration of the flush and restore it afterwards. This fixes the following crash caused by coprocessor register inspection in native gdb: (gdb) p/x $w0 Illegal instruction in kernel: sig: 9 [#1] PREEMPT Call Trace: ___might_sleep+0x184/0x1a4 __might_sleep+0x41/0xac exit_signals+0x14/0x218 do_exit+0xc9/0x8b8 die+0x99/0xa0 do_illegal_instruction+0x18/0x6c common_exception+0x77/0x77 coprocessor_flush+0x16/0x3c arch_ptrace+0x46c/0x674 sys_ptrace+0x2ce/0x3b4 system_call+0x54/0x80 common_exception+0x77/0x77 note: gdb[100] exited with preempt_count 1 Killed Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/process.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index ff4f0ecb03dd..f1c46bc5d465 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -88,18 +88,21 @@ void coprocessor_release_all(struct thread_info *ti) void coprocessor_flush_all(struct thread_info *ti) { - unsigned long cpenable; + unsigned long cpenable, old_cpenable; int i; preempt_disable(); + RSR_CPENABLE(old_cpenable); cpenable = ti->cpenable; + WSR_CPENABLE(cpenable); for (i = 0; i < XCHAL_CP_MAX; i++) { if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti) coprocessor_flush(ti, i); cpenable >>= 1; } + WSR_CPENABLE(old_cpenable); preempt_enable(); } From b35182226166c03713a10ffebcf2838569c95d57 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 26 Nov 2018 15:18:26 -0800 Subject: [PATCH 0814/2608] xtensa: fix coprocessor context offset definitions commit 03bc996af0cc71c7f30c384d8ce7260172423b34 upstream. Coprocessor context offsets are used by the assembly code that moves coprocessor context between the individual fields of the thread_info::xtregs_cp structure and coprocessor registers. This fixes coprocessor context clobbering on flushing and reloading during normal user code execution and user process debugging in the presence of more than one coprocessor in the core configuration. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/asm-offsets.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c index bcb5beb81177..7df02fc934a9 100644 --- a/arch/xtensa/kernel/asm-offsets.c +++ b/arch/xtensa/kernel/asm-offsets.c @@ -91,14 +91,14 @@ int main(void) DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp)); DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable)); #if XTENSA_HAVE_COPROCESSORS - DEFINE(THREAD_XTREGS_CP0, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP1, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP2, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP3, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP4, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP5, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP6, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP7, offsetof (struct thread_info, xtregs_cp)); + DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0)); + DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1)); + DEFINE(THREAD_XTREGS_CP2, offsetof(struct thread_info, xtregs_cp.cp2)); + DEFINE(THREAD_XTREGS_CP3, offsetof(struct thread_info, xtregs_cp.cp3)); + DEFINE(THREAD_XTREGS_CP4, offsetof(struct thread_info, xtregs_cp.cp4)); + DEFINE(THREAD_XTREGS_CP5, offsetof(struct thread_info, xtregs_cp.cp5)); + DEFINE(THREAD_XTREGS_CP6, offsetof(struct thread_info, xtregs_cp.cp6)); + DEFINE(THREAD_XTREGS_CP7, offsetof(struct thread_info, xtregs_cp.cp7)); #endif DEFINE(THREAD_XTREGS_USER, offsetof (struct thread_info, xtregs_user)); DEFINE(XTREGS_USER_SIZE, sizeof(xtregs_user_t)); From d36e4ee3b3ce4cf93e8fa145841287bbdb140fec Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 26 Nov 2018 18:06:01 -0800 Subject: [PATCH 0815/2608] xtensa: fix coprocessor part of ptrace_{get,set}xregs commit 38a35a78c5e270cbe53c4fef6b0d3c2da90dd849 upstream. Layout of coprocessor registers in the elf_xtregs_t and xtregs_coprocessor_t may be different due to alignment. Thus it is not always possible to copy data between the xtregs_coprocessor_t structure and the elf_xtregs_t and get correct values for all registers. Use a table of offsets and sizes of individual coprocessor register groups to do coprocessor context copying in the ptrace_getxregs and ptrace_setxregs. This fixes incorrect coprocessor register values reading from the user process by the native gdb on an xtensa core with multiple coprocessors and registers with high alignment requirements. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/ptrace.c | 42 +++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index e2461968efb2..7c3ed7d78075 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -127,12 +127,37 @@ static int ptrace_setregs(struct task_struct *child, void __user *uregs) } +#if XTENSA_HAVE_COPROCESSORS +#define CP_OFFSETS(cp) \ + { \ + .elf_xtregs_offset = offsetof(elf_xtregs_t, cp), \ + .ti_offset = offsetof(struct thread_info, xtregs_cp.cp), \ + .sz = sizeof(xtregs_ ## cp ## _t), \ + } + +static const struct { + size_t elf_xtregs_offset; + size_t ti_offset; + size_t sz; +} cp_offsets[] = { + CP_OFFSETS(cp0), + CP_OFFSETS(cp1), + CP_OFFSETS(cp2), + CP_OFFSETS(cp3), + CP_OFFSETS(cp4), + CP_OFFSETS(cp5), + CP_OFFSETS(cp6), + CP_OFFSETS(cp7), +}; +#endif + static int ptrace_getxregs(struct task_struct *child, void __user *uregs) { struct pt_regs *regs = task_pt_regs(child); struct thread_info *ti = task_thread_info(child); elf_xtregs_t __user *xtregs = uregs; int ret = 0; + int i __maybe_unused; if (!access_ok(VERIFY_WRITE, uregs, sizeof(elf_xtregs_t))) return -EIO; @@ -140,8 +165,13 @@ static int ptrace_getxregs(struct task_struct *child, void __user *uregs) #if XTENSA_HAVE_COPROCESSORS /* Flush all coprocessor registers to memory. */ coprocessor_flush_all(ti); - ret |= __copy_to_user(&xtregs->cp0, &ti->xtregs_cp, - sizeof(xtregs_coprocessor_t)); + + for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i) + ret |= __copy_to_user((char __user *)xtregs + + cp_offsets[i].elf_xtregs_offset, + (const char *)ti + + cp_offsets[i].ti_offset, + cp_offsets[i].sz); #endif ret |= __copy_to_user(&xtregs->opt, ®s->xtregs_opt, sizeof(xtregs->opt)); @@ -157,6 +187,7 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs) struct pt_regs *regs = task_pt_regs(child); elf_xtregs_t *xtregs = uregs; int ret = 0; + int i __maybe_unused; if (!access_ok(VERIFY_READ, uregs, sizeof(elf_xtregs_t))) return -EFAULT; @@ -166,8 +197,11 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs) coprocessor_flush_all(ti); coprocessor_release_all(ti); - ret |= __copy_from_user(&ti->xtregs_cp, &xtregs->cp0, - sizeof(xtregs_coprocessor_t)); + for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i) + ret |= __copy_from_user((char *)ti + cp_offsets[i].ti_offset, + (const char __user *)xtregs + + cp_offsets[i].elf_xtregs_offset, + cp_offsets[i].sz); #endif ret |= __copy_from_user(®s->xtregs_opt, &xtregs->opt, sizeof(xtregs->opt)); From 52fa8eaac8149594d57bd4fe0cfeaa329037d400 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 14 Nov 2018 11:35:24 +0000 Subject: [PATCH 0816/2608] Btrfs: ensure path name is null terminated at btrfs_control_ioctl commit f505754fd6599230371cb01b9332754ddc104be1 upstream. We were using the path name received from user space without checking that it is null terminated. While btrfs-progs is well behaved and does proper validation and null termination, someone could call the ioctl and pass a non-null terminated patch, leading to buffer overrun problems in the kernel. The ioctl is protected by CAP_SYS_ADMIN. So just set the last byte of the path to a null character, similar to what we do in other ioctls (add/remove/resize device, snapshot creation, etc). CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Anand Jain Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index fe960d5e8913..49a02bf091ae 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2176,6 +2176,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, vol = memdup_user((void __user *)arg, sizeof(*vol)); if (IS_ERR(vol)) return PTR_ERR(vol); + vol->name[BTRFS_PATH_NAME_MAX] = '\0'; switch (cmd) { case BTRFS_IOC_SCAN_DEV: From e380f318e63d851607cfc8b288281668aad11d53 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Fri, 23 Nov 2018 18:10:15 +0800 Subject: [PATCH 0817/2608] btrfs: relocation: set trans to be NULL after ending transaction commit 42a657f57628402c73237547f0134e083e2f6764 upstream. The function relocate_block_group calls btrfs_end_transaction to release trans when update_backref_cache returns 1, and then continues the loop body. If btrfs_block_rsv_refill fails this time, it will jump out the loop and the freed trans will be accessed. This may result in a use-after-free bug. The patch assigns NULL to trans after trans is released so that it will not be accessed. Fixes: 0647bf564f1 ("Btrfs: improve forever loop when doing balance relocation") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Qu Wenruo Signed-off-by: Pan Bian Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/relocation.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index eeae2c3ab17e..5feb8b03ffe8 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4048,6 +4048,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) restart: if (update_backref_cache(trans, &rc->backref_cache)) { btrfs_end_transaction(trans); + trans = NULL; continue; } From ee48a9df170c233b406d49aa8840625a51ee9c0c Mon Sep 17 00:00:00 2001 From: Hou Zhiqiang Date: Wed, 7 Nov 2018 05:16:49 +0000 Subject: [PATCH 0818/2608] PCI: layerscape: Fix wrong invocation of outbound window disable accessor commit c6fd6fe9dea44732cdcd970f1130b8cc50ad685a upstream. The order of parameters is not correct when invoking the outbound window disable routine. Fix it. Fixes: 4a2745d760fa ("PCI: layerscape: Disable outbound windows configured by bootloader") Signed-off-by: Hou Zhiqiang [lorenzo.pieralisi@arm.com: commit log] Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/dwc/pci-layerscape.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/dwc/pci-layerscape.c b/drivers/pci/dwc/pci-layerscape.c index 87fa486bee2c..1ede4b60aac3 100644 --- a/drivers/pci/dwc/pci-layerscape.c +++ b/drivers/pci/dwc/pci-layerscape.c @@ -89,7 +89,7 @@ static void ls_pcie_disable_outbound_atus(struct ls_pcie *pcie) int i; for (i = 0; i < PCIE_IATU_NUM; i++) - dw_pcie_disable_atu(pcie->pci, DW_PCIE_REGION_OUTBOUND, i); + dw_pcie_disable_atu(pcie->pci, i, DW_PCIE_REGION_OUTBOUND); } static int ls1021_pcie_link_up(struct dw_pcie *pci) From ad953dfdfa9543d224abd18fbf12bcac6454c376 Mon Sep 17 00:00:00 2001 From: Christoph Muellner Date: Tue, 13 Nov 2018 11:25:35 +0100 Subject: [PATCH 0819/2608] arm64: dts: rockchip: Fix PCIe reset polarity for rk3399-puma-haikou. commit c1d91f86a1b4c9c05854d59c6a0abd5d0f75b849 upstream. This patch fixes the wrong polarity setting for the PCIe host driver's pre-reset pin for rk3399-puma-haikou. Without this patch link training will most likely fail. Fixes: 60fd9f72ce8a ("arm64: dts: rockchip: add Haikou baseboard with RK3399-Q7 SoM") Cc: stable@vger.kernel.org Signed-off-by: Christoph Muellner Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index 9a7486058455..eea7f8f070cf 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -130,7 +130,7 @@ }; &pcie0 { - ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_LOW>; + ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_HIGH>; num-lanes = <4>; pinctrl-names = "default"; pinctrl-0 = <&pcie_clkreqn_cpm>; From 855eefd9124a5a963625c046faca862c9369b37c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 27 Nov 2018 14:41:37 +0100 Subject: [PATCH 0820/2608] x86/MCE/AMD: Fix the thresholding machinery initialization order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 60c8144afc287ef09ce8c1230c6aa972659ba1bb upstream. Currently, the code sets up the thresholding interrupt vector and only then goes about initializing the thresholding banks. Which is wrong, because an early thresholding interrupt would cause a NULL pointer dereference when accessing those banks and prevent the machine from booting. Therefore, set the thresholding interrupt vector only *after* having initialized the banks successfully. Fixes: 18807ddb7f88 ("x86/mce/AMD: Reset Threshold Limit after logging error") Reported-by: Rafał Miłecki Reported-by: John Clemens Signed-off-by: Borislav Petkov Tested-by: Rafał Miłecki Tested-by: John Clemens Cc: Aravind Gopalakrishnan Cc: linux-edac@vger.kernel.org Cc: stable@vger.kernel.org Cc: Tony Luck Cc: x86@kernel.org Cc: Yazen Ghannam Link: https://lkml.kernel.org/r/20181127101700.2964-1-zajec5@gmail.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=201291 Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index dbcb01006749..beec0daecbc5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -56,7 +56,7 @@ /* Threshold LVT offset is at MSR0xC0000410[15:12] */ #define SMCA_THR_LVT_OFF 0xF000 -static bool thresholding_en; +static bool thresholding_irq_en; static const char * const th_names[] = { "load_store", @@ -533,9 +533,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, set_offset: offset = setup_APIC_mce_threshold(offset, new); - - if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt)) - mce_threshold_vector = amd_threshold_interrupt; + if (offset == new) + thresholding_irq_en = true; done: mce_threshold_block_init(&b, offset); @@ -1356,9 +1355,6 @@ int mce_threshold_remove_device(unsigned int cpu) { unsigned int bank; - if (!thresholding_en) - return 0; - for (bank = 0; bank < mca_cfg.banks; ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; @@ -1376,9 +1372,6 @@ int mce_threshold_create_device(unsigned int cpu) struct threshold_bank **bp; int err = 0; - if (!thresholding_en) - return 0; - bp = per_cpu(threshold_banks, cpu); if (bp) return 0; @@ -1407,9 +1400,6 @@ static __init int threshold_init_device(void) { unsigned lcpu = 0; - if (mce_threshold_vector == amd_threshold_interrupt) - thresholding_en = true; - /* to hit CPUs online before the notifier is up */ for_each_online_cpu(lcpu) { int err = mce_threshold_create_device(lcpu); @@ -1418,6 +1408,9 @@ static __init int threshold_init_device(void) return err; } + if (thresholding_irq_en) + mce_threshold_vector = amd_threshold_interrupt; + return 0; } /* From e8499ab5b93aca271c343786084c000c6f7839b5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 20 Nov 2018 11:26:35 +0100 Subject: [PATCH 0821/2608] x86/fpu: Disable bottom halves while loading FPU registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 68239654acafe6aad5a3c1dc7237e60accfebc03 upstream. The sequence fpu->initialized = 1; /* step A */ preempt_disable(); /* step B */ fpu__restore(fpu); preempt_enable(); in __fpu__restore_sig() is racy in regard to a context switch. For 32bit frames, __fpu__restore_sig() prepares the FPU state within fpu->state. To ensure that a context switch (switch_fpu_prepare() in particular) does not modify fpu->state it uses fpu__drop() which sets fpu->initialized to 0. After fpu->initialized is cleared, the CPU's FPU state is not saved to fpu->state during a context switch. The new state is loaded via fpu__restore(). It gets loaded into fpu->state from userland and ensured it is sane. fpu->initialized is then set to 1 in order to avoid fpu__initialize() doing anything (overwrite the new state) which is part of fpu__restore(). A context switch between step A and B above would save CPU's current FPU registers to fpu->state and overwrite the newly prepared state. This looks like a tiny race window but the Kernel Test Robot reported this back in 2016 while we had lazy FPU support. Borislav Petkov made the link between that report and another patch that has been posted. Since the removal of the lazy FPU support, this race goes unnoticed because the warning has been removed. Disable bottom halves around the restore sequence to avoid the race. BH need to be disabled because BH is allowed to run (even with preemption disabled) and might invoke kernel_fpu_begin() by doing IPsec. [ bp: massage commit message a bit. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Acked-by: Ingo Molnar Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: stable@vger.kernel.org Cc: x86-ml Link: http://lkml.kernel.org/r/20181120102635.ddv3fvavxajjlfqk@linutronix.de Link: https://lkml.kernel.org/r/20160226074940.GA28911@pd.tnic Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/signal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 61a949d84dfa..d99a8ee9e185 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); } + local_bh_disable(); fpu->initialized = 1; - preempt_disable(); fpu__restore(fpu); - preempt_enable(); + local_bh_enable(); return err; } else { From fae1bec5c2b29e809bd81b2ce8b5ba979ad23a92 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 21 Nov 2018 11:16:10 +0100 Subject: [PATCH 0822/2608] perf/x86/intel: Move branch tracing setup to the Intel-specific source file commit ed6101bbf6266ee83e620b19faa7c6ad56bb41ab upstream. Moving branch tracing setup to Intel core object into separate intel_pmu_bts_config function, because it's Intel specific. Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20181121101612.16272-1-jolsa@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/core.c | 20 ------------------ arch/x86/events/intel/core.c | 41 +++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 21 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index e5097dc85a06..7d12b0d1f359 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -438,26 +438,6 @@ int x86_setup_perfctr(struct perf_event *event) if (config == -1LL) return -EINVAL; - /* - * Branch tracing: - */ - if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !attr->freq && hwc->sample_period == 1) { - /* BTS is not supported by this architecture. */ - if (!x86_pmu.bts_active) - return -EOPNOTSUPP; - - /* BTS is currently only allowed for user-mode. */ - if (!attr->exclude_kernel) - return -EOPNOTSUPP; - - /* disallow bts if conflicting events are present */ - if (x86_add_exclusive(x86_lbr_exclusive_lbr)) - return -EBUSY; - - event->destroy = hw_perf_lbr_event_destroy; - } - hwc->config |= config; return 0; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 228732654cfe..20501336fd94 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2973,10 +2973,49 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event) return flags; } +static int intel_pmu_bts_config(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + + if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && + !attr->freq && hwc->sample_period == 1) { + /* BTS is not supported by this architecture. */ + if (!x86_pmu.bts_active) + return -EOPNOTSUPP; + + /* BTS is currently only allowed for user-mode. */ + if (!attr->exclude_kernel) + return -EOPNOTSUPP; + + /* disallow bts if conflicting events are present */ + if (x86_add_exclusive(x86_lbr_exclusive_lbr)) + return -EBUSY; + + event->destroy = hw_perf_lbr_event_destroy; + } + + return 0; +} + +static int core_pmu_hw_config(struct perf_event *event) +{ + int ret = x86_pmu_hw_config(event); + + if (ret) + return ret; + + return intel_pmu_bts_config(event); +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); + if (ret) + return ret; + + ret = intel_pmu_bts_config(event); if (ret) return ret; @@ -3462,7 +3501,7 @@ static __initconst const struct x86_pmu core_pmu = { .enable_all = core_pmu_enable_all, .enable = core_pmu_enable_event, .disable = x86_pmu_disable_event, - .hw_config = x86_pmu_hw_config, + .hw_config = core_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, From ecef7c1ad46bfdc64c99574488252f4ba029afbc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 21 Nov 2018 11:16:11 +0100 Subject: [PATCH 0823/2608] perf/x86/intel: Add generic branch tracing check to intel_pmu_has_bts() commit 67266c1080ad56c31af72b9c18355fde8ccc124a upstream. Currently we check the branch tracing only by checking for the PERF_COUNT_HW_BRANCH_INSTRUCTIONS event of PERF_TYPE_HARDWARE type. But we can define the same event with the PERF_TYPE_RAW type. Changing the intel_pmu_has_bts() code to check on event's final hw config value, so both HW types are covered. Adding unlikely to intel_pmu_has_bts() condition calls, because it was used in the original code in intel_bts_constraints. Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20181121101612.16272-2-jolsa@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 17 +++-------------- arch/x86/events/perf_event.h | 13 +++++++++---- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 20501336fd94..7bb80151bfff 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2345,16 +2345,7 @@ done: static struct event_constraint * intel_bts_constraints(struct perf_event *event) { - struct hw_perf_event *hwc = &event->hw; - unsigned int hw_event, bts_event; - - if (event->attr.freq) - return NULL; - - hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; - bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - - if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) + if (unlikely(intel_pmu_has_bts(event))) return &bts_constraint; return NULL; @@ -2976,10 +2967,8 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event) static int intel_pmu_bts_config(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; - struct hw_perf_event *hwc = &event->hw; - if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !attr->freq && hwc->sample_period == 1) { + if (unlikely(intel_pmu_has_bts(event))) { /* BTS is not supported by this architecture. */ if (!x86_pmu.bts_active) return -EOPNOTSUPP; @@ -3038,7 +3027,7 @@ static int intel_pmu_hw_config(struct perf_event *event) /* * BTS is set up earlier in this path, so don't account twice */ - if (!intel_pmu_has_bts(event)) { + if (!unlikely(intel_pmu_has_bts(event))) { /* disallow lbr if conflicting events are present */ if (x86_add_exclusive(x86_lbr_exclusive_lbr)) return -EBUSY; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index c6698c63c047..3c51fcaf1e34 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -850,11 +850,16 @@ static inline int amd_pmu_init(void) static inline bool intel_pmu_has_bts(struct perf_event *event) { - if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !event->attr.freq && event->hw.sample_period == 1) - return true; + struct hw_perf_event *hwc = &event->hw; + unsigned int hw_event, bts_event; - return false; + if (event->attr.freq) + return false; + + hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; + bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); + + return hw_event == bts_event && hwc->sample_period == 1; } int intel_pmu_save_and_restart(struct perf_event *event); From b7c769ebd9db6e67a6e01c962f7c92b706fa6acf Mon Sep 17 00:00:00 2001 From: Maximilian Heyne Date: Fri, 30 Nov 2018 08:35:14 -0700 Subject: [PATCH 0824/2608] fs: fix lost error code in dio_complete commit 41e817bca3acd3980efe5dd7d28af0e6f4ab9247 upstream. commit e259221763a40403d5bb232209998e8c45804ab8 ("fs: simplify the generic_write_sync prototype") reworked callers of generic_write_sync(), and ended up dropping the error return for the directio path. Prior to that commit, in dio_complete(), an error would be bubbled up the stack, but after that commit, errors passed on to dio_complete were eaten up. This was reported on the list earlier, and a fix was proposed in https://lore.kernel.org/lkml/20160921141539.GA17898@infradead.org/, but never followed up with. We recently hit this bug in our testing where fencing io errors, which were previously erroring out with EIO, were being returned as success operations after this commit. The fix proposed on the list earlier was a little short -- it would have still called generic_write_sync() in case `ret` already contained an error. This fix ensures generic_write_sync() is only called when there's no pending error in the write. Additionally, transferred is replaced with ret to bring this code in line with other callers. Fixes: e259221763a4 ("fs: simplify the generic_write_sync prototype") Reported-by: Ravi Nankani Signed-off-by: Maximilian Heyne Reviewed-by: Christoph Hellwig CC: Torsten Mehlan CC: Uwe Dannowski CC: Amit Shah CC: David Woodhouse CC: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/direct-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index 625a84aa6484..40567501015f 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -304,8 +304,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) */ dio->iocb->ki_pos += transferred; - if (dio->op == REQ_OP_WRITE) - ret = generic_write_sync(dio->iocb, transferred); + if (ret > 0 && dio->op == REQ_OP_WRITE) + ret = generic_write_sync(dio->iocb, ret); dio->iocb->ki_complete(dio->iocb, ret, 0); } From 0d542d58b6b5fe01095484c2628e46afe88312a9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 Nov 2018 18:16:33 +0100 Subject: [PATCH 0825/2608] ALSA: wss: Fix invalid snd_free_pages() at error path commit 7b69154171b407844c273ab4c10b5f0ddcd6aa29 upstream. Some spurious calls of snd_free_pages() have been overlooked and remain in the error paths of wss driver code. Since runtime->dma_area is managed by the PCM core helper, we shouldn't release manually. Drop the superfluous calls. Reviewed-by: Takashi Sakamoto Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/isa/wss/wss_lib.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/isa/wss/wss_lib.c b/sound/isa/wss/wss_lib.c index 8a852042a066..91cd305cabd7 100644 --- a/sound/isa/wss/wss_lib.c +++ b/sound/isa/wss/wss_lib.c @@ -1531,7 +1531,6 @@ static int snd_wss_playback_open(struct snd_pcm_substream *substream) if (err < 0) { if (chip->release_dma) chip->release_dma(chip, chip->dma_private_data, chip->dma1); - snd_free_pages(runtime->dma_area, runtime->dma_bytes); return err; } chip->playback_substream = substream; @@ -1572,7 +1571,6 @@ static int snd_wss_capture_open(struct snd_pcm_substream *substream) if (err < 0) { if (chip->release_dma) chip->release_dma(chip, chip->dma_private_data, chip->dma2); - snd_free_pages(runtime->dma_area, runtime->dma_bytes); return err; } chip->capture_substream = substream; From ef8944bf5a97557bf0cb7b0da78bc60eb78bff33 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 Nov 2018 15:44:00 +0100 Subject: [PATCH 0826/2608] ALSA: ac97: Fix incorrect bit shift at AC97-SPSA control write commit 7194eda1ba0872d917faf3b322540b4f57f11ba5 upstream. The function snd_ac97_put_spsa() gets the bit shift value from the associated private_value, but it extracts too much; the current code extracts 8 bit values in bits 8-15, but this is a combination of two nibbles (bits 8-11 and bits 12-15) for left and right shifts. Due to the incorrect bits extraction, the actual shift may go beyond the 32bit value, as spotted recently by UBSAN check: UBSAN: Undefined behaviour in sound/pci/ac97/ac97_codec.c:836:7 shift exponent 68 is too large for 32-bit type 'int' This patch fixes the shift value extraction by masking the properly with 0x0f instead of 0xff. Reported-and-tested-by: Meelis Roos Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/ac97/ac97_codec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index 1ef7cdf1d3e8..38f355ae1863 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -824,7 +824,7 @@ static int snd_ac97_put_spsa(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_ { struct snd_ac97 *ac97 = snd_kcontrol_chip(kcontrol); int reg = kcontrol->private_value & 0xff; - int shift = (kcontrol->private_value >> 8) & 0xff; + int shift = (kcontrol->private_value >> 8) & 0x0f; int mask = (kcontrol->private_value >> 16) & 0xff; // int invert = (kcontrol->private_value >> 24) & 0xff; unsigned short value, old, new; From 73ce314c172d4f7340b217c08861863665888972 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 22 Nov 2018 14:36:17 +0100 Subject: [PATCH 0827/2608] ALSA: control: Fix race between adding and removing a user element commit e1a7bfe3807974e66f971f2589d4e0197ec0fced upstream. The procedure for adding a user control element has some window opened for race against the concurrent removal of a user element. This was caught by syzkaller, hitting a KASAN use-after-free error. This patch addresses the bug by wrapping the whole procedure to add a user control element with the card->controls_rwsem, instead of only around the increment of card->user_ctl_count. This required a slight code refactoring, too. The function snd_ctl_add() is split to two parts: a core function to add the control element and a part calling it. The former is called from the function for adding a user control element inside the controls_rwsem. One change to be noted is that snd_ctl_notify() for adding a control element gets called inside the controls_rwsem as well while it was called outside the rwsem. But this should be OK, as snd_ctl_notify() takes another (finer) rwlock instead of rwsem, and the call of snd_ctl_notify() inside rwsem is already done in another code path. Reported-by: syzbot+dc09047bce3820621ba2@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/control.c | 80 +++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 35 deletions(-) diff --git a/sound/core/control.c b/sound/core/control.c index af7e6165e21e..36571cd49be3 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -347,6 +347,40 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count) return 0; } +/* add a new kcontrol object; call with card->controls_rwsem locked */ +static int __snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol) +{ + struct snd_ctl_elem_id id; + unsigned int idx; + unsigned int count; + + id = kcontrol->id; + if (id.index > UINT_MAX - kcontrol->count) + return -EINVAL; + + if (snd_ctl_find_id(card, &id)) { + dev_err(card->dev, + "control %i:%i:%i:%s:%i is already present\n", + id.iface, id.device, id.subdevice, id.name, id.index); + return -EBUSY; + } + + if (snd_ctl_find_hole(card, kcontrol->count) < 0) + return -ENOMEM; + + list_add_tail(&kcontrol->list, &card->controls); + card->controls_count += kcontrol->count; + kcontrol->id.numid = card->last_numid + 1; + card->last_numid += kcontrol->count; + + id = kcontrol->id; + count = kcontrol->count; + for (idx = 0; idx < count; idx++, id.index++, id.numid++) + snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id); + + return 0; +} + /** * snd_ctl_add - add the control instance to the card * @card: the card instance @@ -363,45 +397,18 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count) */ int snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol) { - struct snd_ctl_elem_id id; - unsigned int idx; - unsigned int count; int err = -EINVAL; if (! kcontrol) return err; if (snd_BUG_ON(!card || !kcontrol->info)) goto error; - id = kcontrol->id; - if (id.index > UINT_MAX - kcontrol->count) - goto error; down_write(&card->controls_rwsem); - if (snd_ctl_find_id(card, &id)) { - up_write(&card->controls_rwsem); - dev_err(card->dev, "control %i:%i:%i:%s:%i is already present\n", - id.iface, - id.device, - id.subdevice, - id.name, - id.index); - err = -EBUSY; - goto error; - } - if (snd_ctl_find_hole(card, kcontrol->count) < 0) { - up_write(&card->controls_rwsem); - err = -ENOMEM; - goto error; - } - list_add_tail(&kcontrol->list, &card->controls); - card->controls_count += kcontrol->count; - kcontrol->id.numid = card->last_numid + 1; - card->last_numid += kcontrol->count; - id = kcontrol->id; - count = kcontrol->count; + err = __snd_ctl_add(card, kcontrol); up_write(&card->controls_rwsem); - for (idx = 0; idx < count; idx++, id.index++, id.numid++) - snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id); + if (err < 0) + goto error; return 0; error: @@ -1360,9 +1367,12 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, kctl->tlv.c = snd_ctl_elem_user_tlv; /* This function manage to free the instance on failure. */ - err = snd_ctl_add(card, kctl); - if (err < 0) - return err; + down_write(&card->controls_rwsem); + err = __snd_ctl_add(card, kctl); + if (err < 0) { + snd_ctl_free_one(kctl); + goto unlock; + } offset = snd_ctl_get_ioff(kctl, &info->id); snd_ctl_build_ioff(&info->id, kctl, offset); /* @@ -1373,10 +1383,10 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, * which locks the element. */ - down_write(&card->controls_rwsem); card->user_ctl_count++; - up_write(&card->controls_rwsem); + unlock: + up_write(&card->controls_rwsem); return 0; } From 46663071adbc33e7b2a299b8e15bf36166101bcf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 Nov 2018 18:18:30 +0100 Subject: [PATCH 0828/2608] ALSA: sparc: Fix invalid snd_free_pages() at error path commit 9a20332ab373b1f8f947e0a9c923652b32dab031 upstream. Some spurious calls of snd_free_pages() have been overlooked and remain in the error paths of sparc cs4231 driver code. Since runtime->dma_area is managed by the PCM core helper, we shouldn't release manually. Drop the superfluous calls. Reviewed-by: Takashi Sakamoto Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/sparc/cs4231.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c index e73c962590eb..079063d8038d 100644 --- a/sound/sparc/cs4231.c +++ b/sound/sparc/cs4231.c @@ -1146,10 +1146,8 @@ static int snd_cs4231_playback_open(struct snd_pcm_substream *substream) runtime->hw = snd_cs4231_playback; err = snd_cs4231_open(chip, CS4231_MODE_PLAY); - if (err < 0) { - snd_free_pages(runtime->dma_area, runtime->dma_bytes); + if (err < 0) return err; - } chip->playback_substream = substream; chip->p_periods_sent = 0; snd_pcm_set_sync(substream); @@ -1167,10 +1165,8 @@ static int snd_cs4231_capture_open(struct snd_pcm_substream *substream) runtime->hw = snd_cs4231_capture; err = snd_cs4231_open(chip, CS4231_MODE_RECORD); - if (err < 0) { - snd_free_pages(runtime->dma_area, runtime->dma_bytes); + if (err < 0) return err; - } chip->capture_substream = substream; chip->c_periods_sent = 0; snd_pcm_set_sync(substream); From e50476e9da270e1dfc742187aa1520ac4edb3a74 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 8 Nov 2018 16:36:15 +0800 Subject: [PATCH 0829/2608] ALSA: hda/realtek - Support ALC300 commit 1078bef0cd9291355a20369b21cd823026ab8eaa upstream. This patch will enable ALC300. [ It's almost equivalent with other ALC269-compatible ones, and apparently has no loopback mixer -- tiwai ] Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index eb8807de3ebc..31c71ba3248e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -343,6 +343,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0285: case 0x10ec0298: case 0x10ec0289: + case 0x10ec0300: alc_update_coef_idx(codec, 0x10, 1<<9, 0); break; case 0x10ec0275: @@ -2758,6 +2759,7 @@ enum { ALC269_TYPE_ALC215, ALC269_TYPE_ALC225, ALC269_TYPE_ALC294, + ALC269_TYPE_ALC300, ALC269_TYPE_ALC700, }; @@ -2792,6 +2794,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec) case ALC269_TYPE_ALC215: case ALC269_TYPE_ALC225: case ALC269_TYPE_ALC294: + case ALC269_TYPE_ALC300: case ALC269_TYPE_ALC700: ssids = alc269_ssids; break; @@ -7089,6 +7092,10 @@ static int patch_alc269(struct hda_codec *codec) spec->gen.mixer_nid = 0; /* ALC2x4 does not have any loopback mixer path */ alc_update_coef_idx(codec, 0x6b, 0x0018, (1<<4) | (1<<3)); /* UAJ MIC Vref control by verb */ break; + case 0x10ec0300: + spec->codec_variant = ALC269_TYPE_ALC300; + spec->gen.mixer_nid = 0; /* no loopback on ALC300 */ + break; case 0x10ec0700: case 0x10ec0701: case 0x10ec0703: @@ -8160,6 +8167,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0295, "ALC295", patch_alc269), HDA_CODEC_ENTRY(0x10ec0298, "ALC298", patch_alc269), HDA_CODEC_ENTRY(0x10ec0299, "ALC299", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0300, "ALC300", patch_alc269), HDA_CODEC_REV_ENTRY(0x10ec0861, 0x100340, "ALC660", patch_alc861), HDA_CODEC_ENTRY(0x10ec0660, "ALC660-VD", patch_alc861vd), HDA_CODEC_ENTRY(0x10ec0861, "ALC861", patch_alc861), From e2ec0cb4d09277730decd5e1a9c75fb18ccb7d7a Mon Sep 17 00:00:00 2001 From: Anisse Astier Date: Fri, 23 Nov 2018 17:59:11 +0100 Subject: [PATCH 0830/2608] ALSA: hda/realtek - fix headset mic detection for MSI MS-B171 commit 8cd65271f8e545ddeed10ecc2e417936bdff168e upstream. MSI Cubi N 8GL (MS-B171) needs the same fixup as its older model, the MS-B120, in order for the headset mic to be properly detected. They both use a single 3-way jack for both mic and headset with an ALC283 codec, with the same pins used. Cc: stable@vger.kernel.org Signed-off-by: Anisse Astier Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 31c71ba3248e..66b0a124beae 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6411,6 +6411,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), From 086d1f60f874829124ebeaf7d2223a414221ca3b Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 25 Nov 2018 08:58:02 +0800 Subject: [PATCH 0831/2608] ext2: fix potential use after free commit ecebf55d27a11538ea84aee0be643dd953f830d5 upstream. The function ext2_xattr_set calls brelse(bh) to drop the reference count of bh. After that, bh may be freed. However, following brelse(bh), it reads bh->b_data via macro HDR(bh). This may result in a use-after-free bug. This patch moves brelse(bh) after reading field. CC: stable@vger.kernel.org Signed-off-by: Pan Bian Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext2/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 62d9a659a8ff..dd8f10db82e9 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -612,9 +612,9 @@ skip_replace: } cleanup: - brelse(bh); if (!(bh && header == HDR(bh))) kfree(header); + brelse(bh); up_write(&EXT2_I(inode)->xattr_sem); return error; From 2a9443a9358031c6192c876f3e51e1fa00c4d497 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Sun, 18 Nov 2018 20:03:02 +0100 Subject: [PATCH 0832/2608] ARM: dts: rockchip: Remove @0 from the veyron memory node commit 672e60b72bbe7aace88721db55b380b6a51fb8f9 upstream. The Coreboot version on veyron ChromeOS devices seems to ignore memory@0 nodes when updating the available memory and instead inserts another memory node without the address. This leads to 4GB systems only ever be using 2GB as the memory@0 node takes precedence. So remove the @0 for veyron devices. Fixes: 0b639b815f15 ("ARM: dts: rockchip: Add missing unit name to memory nodes in rk3288 boards") Cc: stable@vger.kernel.org Reported-by: Heikki Lindholm Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/rk3288-veyron.dtsi | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi index 6e5bd8974f22..679b839bb2eb 100644 --- a/arch/arm/boot/dts/rk3288-veyron.dtsi +++ b/arch/arm/boot/dts/rk3288-veyron.dtsi @@ -47,7 +47,11 @@ #include "rk3288.dtsi" / { - memory@0 { + /* + * The default coreboot on veyron devices ignores memory@0 nodes + * and would instead create another memory node. + */ + memory { device_type = "memory"; reg = <0x0 0x0 0x0 0x80000000>; }; From 7e572222d3c9d67581f5409d7dbaf1e0e7b8de19 Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Tue, 27 Nov 2018 17:06:34 +0100 Subject: [PATCH 0833/2608] dmaengine: at_hdmac: fix memory leak in at_dma_xlate() commit 98f5f932254b88ce828bc8e4d1642d14e5854caa upstream. The leak was found when opening/closing a serial port a great number of time, increasing kmalloc-32 in slabinfo. Each time the port was opened, dma_request_slave_channel() was called. Then, in at_dma_xlate(), atslave was allocated with devm_kzalloc() and never freed. (Well, it was free at module unload, but that's not what we want). So, here, kzalloc is more suited for the job since it has to be freed in atc_free_chan_resources(). Cc: stable@vger.kernel.org Fixes: bbe89c8e3d59 ("at_hdmac: move to generic DMA binding") Reported-by: Mario Forner Suggested-by: Alexandre Belloni Acked-by: Alexandre Belloni Acked-by: Ludovic Desroches Signed-off-by: Richard Genoud Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index a861b5b4d443..b87892a14ba9 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1641,6 +1641,12 @@ static void atc_free_chan_resources(struct dma_chan *chan) atchan->descs_allocated = 0; atchan->status = 0; + /* + * Free atslave allocated in at_dma_xlate() + */ + kfree(chan->private); + chan->private = NULL; + dev_vdbg(chan2dev(chan), "free_chan_resources: done\n"); } @@ -1675,7 +1681,7 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec, dma_cap_zero(mask); dma_cap_set(DMA_SLAVE, mask); - atslave = devm_kzalloc(&dmac_pdev->dev, sizeof(*atslave), GFP_KERNEL); + atslave = kzalloc(sizeof(*atslave), GFP_KERNEL); if (!atslave) return NULL; From 39c49a757d7c3e27d3bfabd1e102e9544372e103 Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Tue, 27 Nov 2018 17:06:35 +0100 Subject: [PATCH 0834/2608] dmaengine: at_hdmac: fix module unloading commit 77e75fda94d2ebb86aa9d35fb1860f6395bf95de upstream. of_dma_controller_free() was not called on module onloading. This lead to a soft lockup: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! Modules linked in: at_hdmac [last unloaded: at_hdmac] when of_dma_request_slave_channel() tried to call ofdma->of_dma_xlate(). Cc: stable@vger.kernel.org Fixes: bbe89c8e3d59 ("at_hdmac: move to generic DMA binding") Acked-by: Ludovic Desroches Signed-off-by: Richard Genoud Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index b87892a14ba9..21ed0e20c5d9 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -2006,6 +2006,8 @@ static int at_dma_remove(struct platform_device *pdev) struct resource *io; at_dma_off(atdma); + if (pdev->dev.of_node) + of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(&atdma->dma_common); dma_pool_destroy(atdma->memset_pool); From 85df1f9f8fb0c9c4801f5cc8ab1b25e77c1f06cd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 3 Dec 2018 13:06:57 +0200 Subject: [PATCH 0835/2608] btrfs: release metadata before running delayed refs We want to release the unused reservation we have since it refills the delayed refs reserve, which will make everything go smoother when running the delayed refs if we're short on our reservation. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Omar Sandoval Reviewed-by: Liu Bo Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/transaction.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f74005ca8f08..73c1fbca0c35 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1955,6 +1955,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) return ret; } + btrfs_trans_release_metadata(trans, fs_info); + trans->block_rsv = NULL; + /* make a pass through all the delayed refs we have so far * any runnings procs may add more while we are here */ @@ -1964,9 +1967,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) return ret; } - btrfs_trans_release_metadata(trans, fs_info); - trans->block_rsv = NULL; - cur_trans = trans->transaction; /* From ca0908ddcd303d5e91982c6e5a91ad3d99cc99b6 Mon Sep 17 00:00:00 2001 From: Ben Wolsieffer Date: Sat, 3 Nov 2018 19:32:20 -0400 Subject: [PATCH 0836/2608] staging: vchiq_arm: fix compat VCHIQ_IOC_AWAIT_COMPLETION commit 5a96b2d38dc054c0bbcbcd585b116566cbd877fe upstream. The compatibility ioctl wrapper for VCHIQ_IOC_AWAIT_COMPLETION assumes that the native ioctl always uses a message buffer and decrements msgbufcount. Certain message types do not use a message buffer and in this case msgbufcount is not decremented, and completion->header for the message is NULL. Because the wrapper unconditionally decrements msgbufcount, the calling process may assume that a message buffer has been used even when it has not. This results in a memory leak in the userspace code that interfaces with this driver. When msgbufcount is decremented, the userspace code assumes that the buffer can be freed though the reference in completion->header, which cannot happen when the reference is NULL. This patch causes the wrapper to only decrement msgbufcount when the native ioctl decrements it. Note that we cannot simply copy the native ioctl's value of msgbufcount, because the wrapper only retrieves messages from the native ioctl one at a time, while userspace may request multiple messages. See https://github.com/raspberrypi/linux/pull/2703 for more discussion of this patch. Fixes: 5569a1260933 ("staging: vchiq_arm: Add compatibility wrappers for ioctls") Signed-off-by: Ben Wolsieffer Acked-by: Stefan Wahren Cc: stable Signed-off-by: Greg Kroah-Hartman --- .../staging/vc04_services/interface/vchiq_arm/vchiq_arm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 314ffac50bb8..f05e9af4fe81 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -1461,6 +1461,7 @@ vchiq_compat_ioctl_await_completion(struct file *file, struct vchiq_await_completion32 args32; struct vchiq_completion_data32 completion32; unsigned int *msgbufcount32; + unsigned int msgbufcount_native; compat_uptr_t msgbuf32; void *msgbuf; void **msgbufptr; @@ -1572,7 +1573,11 @@ vchiq_compat_ioctl_await_completion(struct file *file, sizeof(completion32))) return -EFAULT; - args32.msgbufcount--; + if (get_user(msgbufcount_native, &args->msgbufcount)) + return -EFAULT; + + if (!msgbufcount_native) + args32.msgbufcount--; msgbufcount32 = &((struct vchiq_await_completion32 __user *)arg)->msgbufcount; From 74abe400e12b45f871c98c99f0f65521cc0c8ca6 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 8 Nov 2018 23:30:09 -0600 Subject: [PATCH 0837/2608] staging: rtl8723bs: Add missing return for cfg80211_rtw_get_station commit 8561fb31a1f9594e2807681f5c0721894e367f19 upstream. With Androidx86 8.1, wificond returns "failed to get nl80211_sta_info_tx_failed" and wificondControl returns "Invalid signal poll result from wificond". The fix is to OR sinfo->filled with BIT_ULL(NL80211_STA_INFO_TX_FAILED). This missing bit is apparently not needed with NetworkManager, but it does no harm in that case. Reported-and-Tested-by: youling257 Cc: linux-wireless@vger.kernel.org Cc: youling257 Signed-off-by: Larry Finger Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index bd4352fe2de3..83852f323c5e 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -1293,7 +1293,7 @@ static int cfg80211_rtw_get_station(struct wiphy *wiphy, sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS); sinfo->tx_packets = psta->sta_stats.tx_pkts; - + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED); } /* for Ad-Hoc/AP mode */ From 403a2001bd795c43f59a5dd6695273fdfe0a3dcf Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 23 Nov 2018 08:42:19 +0000 Subject: [PATCH 0838/2608] USB: usb-storage: Add new IDs to ums-realtek commit a84a1bcc992f0545a51d2e120b8ca2ef20e2ea97 upstream. There are two new Realtek card readers require ums-realtek to work correctly. Add the new IDs to support them. Signed-off-by: Kai-Heng Feng Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_realtek.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/storage/unusual_realtek.h b/drivers/usb/storage/unusual_realtek.h index 8fe624ad302a..7ca779493671 100644 --- a/drivers/usb/storage/unusual_realtek.h +++ b/drivers/usb/storage/unusual_realtek.h @@ -39,4 +39,14 @@ UNUSUAL_DEV(0x0bda, 0x0159, 0x0000, 0x9999, "USB Card Reader", USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0), +UNUSUAL_DEV(0x0bda, 0x0177, 0x0000, 0x9999, + "Realtek", + "USB Card Reader", + USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0), + +UNUSUAL_DEV(0x0bda, 0x0184, 0x0000, 0x9999, + "Realtek", + "USB Card Reader", + USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0), + #endif /* defined(CONFIG_USB_STORAGE_REALTEK) || ... */ From 2b7456f46ff957a3f04b3658a8d71b645d8cf172 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Niew=C3=B6hner?= Date: Sun, 25 Nov 2018 17:57:33 +0100 Subject: [PATCH 0839/2608] usb: core: quirks: add RESET_RESUME quirk for Cherry G230 Stream series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit effd14f66cc1ef6701a19c5a56e39c35f4d395a5 upstream. Cherry G230 Stream 2.0 (G85-231) and 3.0 (G85-232) need this quirk to function correctly. This fixes a but where double pressing numlock locks up the device completely with need to replug the keyboard. Signed-off-by: Michael Niewöhner Tested-by: Michael Niewöhner Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 1e8f68960014..808437c5ec49 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -64,6 +64,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Microsoft LifeCam-VX700 v2.0 */ { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Cherry Stream G230 2.0 (G85-231) and 3.0 (G85-232) */ + { USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */ { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT }, From f1a4876f0435dc43fc55f42bd30763de5e5ef9ec Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Mon, 19 Nov 2018 08:34:04 +0200 Subject: [PATCH 0840/2608] Revert "usb: dwc3: gadget: skip Set/Clear Halt when invalid" commit 38317f5c0f2faae5110854f36edad810f841d62f upstream. This reverts commit ffb80fc672c3a7b6afd0cefcb1524fb99917b2f3. Turns out that commit is wrong. Host controllers are allowed to use Clear Feature HALT as means to sync data toggle between host and periperal. Cc: Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index ac8d619ff887..b8704c0678f9 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1511,9 +1511,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) unsigned transfer_in_flight; unsigned started; - if (dep->flags & DWC3_EP_STALL) - return 0; - if (dep->number > 1) trb = dwc3_ep_prev_trb(dep, dep->trb_enqueue); else @@ -1535,8 +1532,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) else dep->flags |= DWC3_EP_STALL; } else { - if (!(dep->flags & DWC3_EP_STALL)) - return 0; ret = dwc3_send_clear_stall_ep_cmd(dep); if (ret) From d08c62878fe2040e2cbc70554f86c54895a9f708 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Sun, 28 Oct 2018 20:18:53 -0700 Subject: [PATCH 0841/2608] iio:st_magn: Fix enable device after trigger commit fe5192ac81ad0d4dfe1395d11f393f0513c15f7f upstream. Currently, we enable the device before we enable the device trigger. At high frequencies, this can cause interrupts that don't yet have a poll function associated with them and are thus treated as spurious. At high frequencies with level interrupts, this can even cause an interrupt storm of repeated spurious interrupts (~100,000 on my Beagleboard with the LSM9DS1 magnetometer). If these repeat too much, the interrupt will get disabled and the device will stop functioning. To prevent these problems, enable the device prior to enabling the device trigger, and disable the divec prior to disabling the trigger. This means there's no window of time during which the device creates interrupts but we have no trigger to answer them. Fixes: 90efe055629 ("iio: st_sensors: harden interrupt handling") Signed-off-by: Martin Kelly Tested-by: Denis Ciocca Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/magnetometer/st_magn_buffer.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c index 0a9e8fadfa9d..37ab30566464 100644 --- a/drivers/iio/magnetometer/st_magn_buffer.c +++ b/drivers/iio/magnetometer/st_magn_buffer.c @@ -30,11 +30,6 @@ int st_magn_trig_set_state(struct iio_trigger *trig, bool state) return st_sensors_set_dataready_irq(indio_dev, state); } -static int st_magn_buffer_preenable(struct iio_dev *indio_dev) -{ - return st_sensors_set_enable(indio_dev, true); -} - static int st_magn_buffer_postenable(struct iio_dev *indio_dev) { int err; @@ -50,7 +45,7 @@ static int st_magn_buffer_postenable(struct iio_dev *indio_dev) if (err < 0) goto st_magn_buffer_postenable_error; - return err; + return st_sensors_set_enable(indio_dev, true); st_magn_buffer_postenable_error: kfree(mdata->buffer_data); @@ -63,11 +58,11 @@ static int st_magn_buffer_predisable(struct iio_dev *indio_dev) int err; struct st_sensor_data *mdata = iio_priv(indio_dev); - err = iio_triggered_buffer_predisable(indio_dev); + err = st_sensors_set_enable(indio_dev, false); if (err < 0) goto st_magn_buffer_predisable_error; - err = st_sensors_set_enable(indio_dev, false); + err = iio_triggered_buffer_predisable(indio_dev); st_magn_buffer_predisable_error: kfree(mdata->buffer_data); @@ -75,7 +70,6 @@ st_magn_buffer_predisable_error: } static const struct iio_buffer_setup_ops st_magn_buffer_setup_ops = { - .preenable = &st_magn_buffer_preenable, .postenable = &st_magn_buffer_postenable, .predisable = &st_magn_buffer_predisable, }; From 397dbde6149057b4b84acb37012b57801fbc9221 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 30 Nov 2018 14:09:21 -0800 Subject: [PATCH 0842/2608] lib/test_kmod.c: fix rmmod double free commit 5618cf031fecda63847cafd1091e7b8bd626cdb1 upstream. We free the misc device string twice on rmmod; fix this. Without this we cannot remove the module without crashing. Link: http://lkml.kernel.org/r/20181124050500.5257-1-mcgrof@kernel.org Signed-off-by: Luis Chamberlain Reported-by: Randy Dunlap Reviewed-by: Andrew Morton Cc: [4.12+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/test_kmod.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/test_kmod.c b/lib/test_kmod.c index 96c304fd656a..7abb59ce6613 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -1221,7 +1221,6 @@ void unregister_test_dev_kmod(struct kmod_test_device *test_dev) dev_info(test_dev->dev, "removing interface\n"); misc_deregister(&test_dev->misc_dev); - kfree(&test_dev->misc_dev.name); mutex_unlock(&test_dev->config_mutex); mutex_unlock(&test_dev->trigger_mutex); From 7c1ba1a1bb0d1a5dcebc737343f0c45840cf181d Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 30 Nov 2018 14:09:03 -0800 Subject: [PATCH 0843/2608] mm: use swp_offset as key in shmem_replace_page() commit c1cb20d43728aa9b5393bd8d489bc85c142949b2 upstream. We changed the key of swap cache tree from swp_entry_t.val to swp_offset. We need to do so in shmem_replace_page() as well. Hugh said: "shmem_replace_page() has been wrong since the day I wrote it: good enough to work on swap "type" 0, which is all most people ever use (especially those few who need shmem_replace_page() at all), but broken once there are any non-0 swp_type bits set in the higher order bits" Link: http://lkml.kernel.org/r/20181121215442.138545-1-yuzhao@google.com Fixes: f6ab1f7f6b2d ("mm, swap: use offset of swap entry as key of swap cache") Signed-off-by: Yu Zhao Reviewed-by: Matthew Wilcox Acked-by: Hugh Dickins Cc: [4.9+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 177fef62cbbd..ab7ff0aeae2d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1532,11 +1532,13 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, { struct page *oldpage, *newpage; struct address_space *swap_mapping; + swp_entry_t entry; pgoff_t swap_index; int error; oldpage = *pagep; - swap_index = page_private(oldpage); + entry.val = page_private(oldpage); + swap_index = swp_offset(entry); swap_mapping = page_mapping(oldpage); /* @@ -1555,7 +1557,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, __SetPageLocked(newpage); __SetPageSwapBacked(newpage); SetPageUptodate(newpage); - set_page_private(newpage, swap_index); + set_page_private(newpage, entry.val); SetPageSwapCache(newpage); /* From 3c44b197448dfb8bef4fa9cab92b7a73153a6f76 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 26 Nov 2018 02:29:56 +0000 Subject: [PATCH 0844/2608] Drivers: hv: vmbus: check the creation_status in vmbus_establish_gpadl() commit eceb05965489784f24bbf4d61ba60e475a983016 upstream. This is a longstanding issue: if the vmbus upper-layer drivers try to consume too many GPADLs, the host may return with an error 0xC0000044 (STATUS_QUOTA_EXCEEDED), but currently we forget to check the creation_status, and hence we can pass an invalid GPADL handle into the OPEN_CHANNEL message, and get an error code 0xc0000225 in open_info->response.open_result.status, and finally we hang in vmbus_open() -> "goto error_free_info" -> vmbus_teardown_gpadl(). With this patch, we can exit gracefully on STATUS_QUOTA_EXCEEDED. Cc: Stephen Hemminger Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: stable@vger.kernel.org Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index d96b09fea835..e05de5032f0c 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -454,6 +454,14 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, } wait_for_completion(&msginfo->waitevent); + if (msginfo->response.gpadl_created.creation_status != 0) { + pr_err("Failed to establish GPADL: err = 0x%x\n", + msginfo->response.gpadl_created.creation_status); + + ret = -EDQUOT; + goto cleanup; + } + if (channel->rescind) { ret = -ENODEV; goto cleanup; From f7d33988bd923310934c8481a2e68b53f716df13 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 14 Nov 2018 01:57:03 +0000 Subject: [PATCH 0845/2608] misc: mic/scif: fix copy-paste error in scif_create_remote_lookup commit 6484a677294aa5d08c0210f2f387ebb9be646115 upstream. gcc '-Wunused-but-set-variable' warning: drivers/misc/mic/scif/scif_rma.c: In function 'scif_create_remote_lookup': drivers/misc/mic/scif/scif_rma.c:373:25: warning: variable 'vmalloc_num_pages' set but not used [-Wunused-but-set-variable] 'vmalloc_num_pages' should be used to determine if the address is within the vmalloc range. Fixes: ba612aa8b487 ("misc: mic: SCIF memory registration and unregistration") Signed-off-by: YueHaibing Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mic/scif/scif_rma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c index 329727e00e97..95745dc4e0ec 100644 --- a/drivers/misc/mic/scif/scif_rma.c +++ b/drivers/misc/mic/scif/scif_rma.c @@ -417,7 +417,7 @@ static int scif_create_remote_lookup(struct scif_dev *remote_dev, if (err) goto error_window; err = scif_map_page(&window->num_pages_lookup.lookup[j], - vmalloc_dma_phys ? + vmalloc_num_pages ? vmalloc_to_page(&window->num_pages[i]) : virt_to_page(&window->num_pages[i]), remote_dev); From fd6cc33d0775b0d902906d88dd05cc1a2a059f8d Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Tue, 6 Nov 2018 15:55:32 -0800 Subject: [PATCH 0846/2608] binder: fix race that allows malicious free of live buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7bada55ab50697861eee6bb7d60b41e68a961a9c upstream. Malicious code can attempt to free buffers using the BC_FREE_BUFFER ioctl to binder. There are protections against a user freeing a buffer while in use by the kernel, however there was a window where BC_FREE_BUFFER could be used to free a recently allocated buffer that was not completely initialized. This resulted in a use-after-free detected by KASAN with a malicious test program. This window is closed by setting the buffer's allow_user_free attribute to 0 when the buffer is allocated or when the user has previously freed it instead of waiting for the caller to set it. The problem was that when the struct buffer was recycled, allow_user_free was stale and set to 1 allowing a free to go through. Signed-off-by: Todd Kjos Acked-by: Arve Hjønnevåg Cc: stable # 4.14 Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 21 ++++++++++++--------- drivers/android/binder_alloc.c | 14 ++++++-------- drivers/android/binder_alloc.h | 3 +-- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index a86c27948fca..96a0f940e54d 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2918,7 +2918,6 @@ static void binder_transaction(struct binder_proc *proc, t->buffer = NULL; goto err_binder_alloc_buf_failed; } - t->buffer->allow_user_free = 0; t->buffer->debug_id = t->debug_id; t->buffer->transaction = t; t->buffer->target_node = target_node; @@ -3407,14 +3406,18 @@ static int binder_thread_write(struct binder_proc *proc, buffer = binder_alloc_prepare_to_free(&proc->alloc, data_ptr); - if (buffer == NULL) { - binder_user_error("%d:%d BC_FREE_BUFFER u%016llx no match\n", - proc->pid, thread->pid, (u64)data_ptr); - break; - } - if (!buffer->allow_user_free) { - binder_user_error("%d:%d BC_FREE_BUFFER u%016llx matched unreturned buffer\n", - proc->pid, thread->pid, (u64)data_ptr); + if (IS_ERR_OR_NULL(buffer)) { + if (PTR_ERR(buffer) == -EPERM) { + binder_user_error( + "%d:%d BC_FREE_BUFFER u%016llx matched unreturned or currently freeing buffer\n", + proc->pid, thread->pid, + (u64)data_ptr); + } else { + binder_user_error( + "%d:%d BC_FREE_BUFFER u%016llx no match\n", + proc->pid, thread->pid, + (u64)data_ptr); + } break; } binder_debug(BINDER_DEBUG_FREE_BUFFER, diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 58e4658f9dd6..b9281f2725a6 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -149,14 +149,12 @@ static struct binder_buffer *binder_alloc_prepare_to_free_locked( else { /* * Guard against user threads attempting to - * free the buffer twice + * free the buffer when in use by kernel or + * after it's already been freed. */ - if (buffer->free_in_progress) { - pr_err("%d:%d FREE_BUFFER u%016llx user freed buffer twice\n", - alloc->pid, current->pid, (u64)user_ptr); - return NULL; - } - buffer->free_in_progress = 1; + if (!buffer->allow_user_free) + return ERR_PTR(-EPERM); + buffer->allow_user_free = 0; return buffer; } } @@ -486,7 +484,7 @@ struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc, rb_erase(best_fit, &alloc->free_buffers); buffer->free = 0; - buffer->free_in_progress = 0; + buffer->allow_user_free = 0; binder_insert_allocated_buffer_locked(alloc, buffer); binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: binder_alloc_buf size %zd got %pK\n", diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index 2dd33b6df104..a3ad7683b6f2 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h @@ -50,8 +50,7 @@ struct binder_buffer { unsigned free:1; unsigned allow_user_free:1; unsigned async_transaction:1; - unsigned free_in_progress:1; - unsigned debug_id:28; + unsigned debug_id:29; struct binder_transaction *transaction; From 14118df4e7b4738815f222f5b20fceb3957ab206 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 27 Jul 2018 19:45:36 +0200 Subject: [PATCH 0847/2608] libceph: weaken sizeof check in ceph_x_verify_authorizer_reply() commit f1d10e04637924f2b00a0fecdd2ca4565f5cfc3f upstream. Allow for extending ceph_x_authorize_reply in the future. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/auth_x.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 10eb759bbcb4..2bf9d9f7ddf3 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -737,8 +737,10 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN); if (ret < 0) return ret; - if (ret != sizeof(*reply)) - return -EPERM; + if (ret < sizeof(*reply)) { + pr_err("bad size %d for ceph_x_authorize_reply\n", ret); + return -EINVAL; + } if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one)) ret = -EPERM; From 0c5f2e899241aceab46e6dcba01c6fb2223bdeb5 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 27 Jul 2018 19:40:30 +0200 Subject: [PATCH 0848/2608] libceph: check authorizer reply/challenge length before reading commit 130f52f2b203aa0aec179341916ffb2e905f3afd upstream. Avoid scribbling over memory if the received reply/challenge is larger than the buffer supplied with the authorizer. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/messenger.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index ad33baa2008d..f864807284d4 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1754,6 +1754,13 @@ static int read_partial_connect(struct ceph_connection *con) if (con->auth) { size = le32_to_cpu(con->in_reply.authorizer_len); + if (size > con->auth->authorizer_reply_buf_len) { + pr_err("authorizer reply too big: %d > %zu\n", size, + con->auth->authorizer_reply_buf_len); + ret = -EINVAL; + goto out; + } + end += size; ret = read_partial(con, end, size, con->auth->authorizer_reply_buf); From 862e7aaa90f63b1e3e3ab498dc3a3ac9a011b8d0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 27 Sep 2018 22:15:31 -0700 Subject: [PATCH 0849/2608] f2fs: fix missing up_read commit 89d13c38501df730cbb2e02c4499da1b5187119d upstream. This patch fixes missing up_read call. Fixes: c9b60788fc76 ("f2fs: fix to do sanity check with block address in main area") Cc: # 4.19+ Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6adb6c60f017..65de72d65562 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1399,8 +1399,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, } if (__is_valid_data_blkaddr(ni.blk_addr) && - !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) + !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) { + up_read(&sbi->node_write); goto redirty_out; + } if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; From 7152401aeedd9685fcf1cb58138c868890d9164f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 5 Dec 2018 19:41:27 +0100 Subject: [PATCH 0850/2608] Linux 4.14.86 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 58a248264090..572bd98d2344 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 85 +SUBLEVEL = 86 EXTRAVERSION = NAME = Petit Gorille From f5ec9987bfbd1004b643e2c914306dc79ce40912 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Thu, 11 Jan 2018 17:22:29 +0800 Subject: [PATCH 0851/2608] Kbuild: suppress packed-not-aligned warning for default setting only commit 321cb0308a9e76841394b4bbab6a1107cfedbae0 upstream. gcc-8 reports many -Wpacked-not-aligned warnings. The below are some examples. ./include/linux/ceph/msgr.h:67:1: warning: alignment 1 of 'struct ceph_entity_addr' is less than 8 [-Wpacked-not-aligned] } __attribute__ ((packed)); ./include/linux/ceph/msgr.h:67:1: warning: alignment 1 of 'struct ceph_entity_addr' is less than 8 [-Wpacked-not-aligned] } __attribute__ ((packed)); ./include/linux/ceph/msgr.h:67:1: warning: alignment 1 of 'struct ceph_entity_addr' is less than 8 [-Wpacked-not-aligned] } __attribute__ ((packed)); This patch suppresses this kind of warnings for default setting. Signed-off-by: Xiongfeng Wang Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- scripts/Makefile.extrawarn | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index c6ebf4239e64..8d5357053f86 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -11,6 +11,8 @@ # are not supported by all versions of the compiler # ========================================================================== +KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned) + ifeq ("$(origin W)", "command line") export KBUILD_ENABLE_EXTRA_GCC_CHECKS := $(W) endif @@ -26,6 +28,7 @@ warning-1 += -Wold-style-definition warning-1 += $(call cc-option, -Wmissing-include-dirs) warning-1 += $(call cc-option, -Wunused-but-set-variable) warning-1 += $(call cc-option, -Wunused-const-variable) +warning-1 += $(call cc-option, -Wpacked-not-aligned) warning-1 += $(call cc-disable-warning, missing-field-initializers) warning-1 += $(call cc-disable-warning, sign-compare) From 9f8d10971a2395359751487c517bf33e5d8f9e43 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 31 Aug 2018 07:47:28 +1000 Subject: [PATCH 0852/2608] disable stringop truncation warnings for now commit 217c3e0196758662aa0429863b09d1c13da1c5d6 upstream. They are too noisy Signed-off-by: Stephen Rothwell Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 572bd98d2344..77ffc1dc4e79 100644 --- a/Makefile +++ b/Makefile @@ -803,6 +803,9 @@ KBUILD_CFLAGS += $(call cc-option,-Wdeclaration-after-statement,) # disable pointer signed / unsigned warnings in gcc 4.0 KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign) +# disable stringop warnings in gcc 8+ +KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation) + # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) From 337f2837c5b29c1587dd7efc4befc9f6f84a2746 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 30 Nov 2018 12:13:15 -0800 Subject: [PATCH 0853/2608] test_hexdump: use memcpy instead of strncpy commit b1286ed7158e9b62787508066283ab0b8850b518 upstream. New versions of gcc reasonably warn about the odd pattern of strncpy(p, q, strlen(q)); which really doesn't make sense: the strncpy() ends up being just a slow and odd way to write memcpy() in this case. Apparently there was a patch for this floating around earlier, but it got lost. Acked-again-by: Andy Shevchenko Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/test_hexdump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c index 3f415d8101f3..1c3c513add77 100644 --- a/lib/test_hexdump.c +++ b/lib/test_hexdump.c @@ -81,7 +81,7 @@ static void __init test_hexdump_prepare_test(size_t len, int rowsize, const char *q = *result++; size_t amount = strlen(q); - strncpy(p, q, amount); + memcpy(p, q, amount); p += amount; *p++ = ' '; From af882cb0bcb5573014e2203dbfd67e2737bb10d3 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 1 Jul 2018 13:57:16 -0700 Subject: [PATCH 0854/2608] kobject: Replace strncpy with memcpy commit 77d2a24b6107bd9b3bf2403a65c1428a9da83dd0 upstream. gcc 8.1.0 complains: lib/kobject.c:128:3: warning: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length [-Wstringop-truncation] lib/kobject.c: In function 'kobject_get_path': lib/kobject.c:125:13: note: length computed here Using strncpy() is indeed less than perfect since the length of data to be copied has already been determined with strlen(). Replace strncpy() with memcpy() to address the warning and optimize the code a little. Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/kobject.c b/lib/kobject.c index 34f847252c02..bbbb067de8ec 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -127,7 +127,7 @@ static void fill_kobj_path(struct kobject *kobj, char *path, int length) int cur = strlen(kobject_name(parent)); /* back up enough to print this name with '/' */ length -= cur; - strncpy(path + length, kobject_name(parent), cur); + memcpy(path + length, kobject_name(parent), cur); *(path + --length) = '/'; } From 23df63002205f17d307af118fe0025f6e49d389a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 27 Jun 2018 14:59:00 +0200 Subject: [PATCH 0855/2608] ALSA: intel_hdmi: Use strlcpy() instead of strncpy() commit c288248f5b26cd5563112fcdc077bf44964a942d upstream. hdmi_lpe_audio_probe() copies the pcm name string via strncpy(), but as a gcc8 warning suggests, it misses a NUL terminator, and unlikely the expected result. Use the proper one, strlcpy() instead. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/x86/intel_hdmi_audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index 697872d8308e..8b7abbd69116 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1839,7 +1839,7 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) /* setup private data which can be retrieved when required */ pcm->private_data = ctx; pcm->info_flags = 0; - strncpy(pcm->name, card->shortname, strlen(card->shortname)); + strlcpy(pcm->name, card->shortname, strlen(card->shortname)); /* setup the ops for playabck */ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &had_pcm_ops); From f5028606c29248d39c8fba26730517c7c624cdaa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 30 Nov 2018 14:45:01 -0800 Subject: [PATCH 0856/2608] unifdef: use memcpy instead of strncpy commit 38c7b224ce22c25fed04007839edf974bd13439d upstream. New versions of gcc reasonably warn about the odd pattern of strncpy(p, q, strlen(q)); which really doesn't make sense: the strncpy() ends up being just a slow and odd way to write memcpy() in this case. There was a comment about _why_ the code used strncpy - to avoid the terminating NUL byte, but memcpy does the same and avoids the warning. Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- scripts/unifdef.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/unifdef.c b/scripts/unifdef.c index 7493c0ee51cc..db00e3e30a59 100644 --- a/scripts/unifdef.c +++ b/scripts/unifdef.c @@ -395,7 +395,7 @@ usage(void) * When we have processed a group that starts off with a known-false * #if/#elif sequence (which has therefore been deleted) followed by a * #elif that we don't understand and therefore must keep, we edit the - * latter into a #if to keep the nesting correct. We use strncpy() to + * latter into a #if to keep the nesting correct. We use memcpy() to * overwrite the 4 byte token "elif" with "if " without a '\0' byte. * * When we find a true #elif in a group, the following block will @@ -450,7 +450,7 @@ static void Idrop (void) { Fdrop(); ignoreon(); } static void Itrue (void) { Ftrue(); ignoreon(); } static void Ifalse(void) { Ffalse(); ignoreon(); } /* modify this line */ -static void Mpass (void) { strncpy(keyword, "if ", 4); Pelif(); } +static void Mpass (void) { memcpy(keyword, "if ", 4); Pelif(); } static void Mtrue (void) { keywordedit("else"); state(IS_TRUE_MIDDLE); } static void Melif (void) { keywordedit("endif"); state(IS_FALSE_TRAILER); } static void Melse (void) { keywordedit("endif"); state(IS_FALSE_ELSE); } From e26457da0cdb5498724fd46279057d49c95cbba6 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 1 Jul 2018 13:57:13 -0700 Subject: [PATCH 0857/2608] kernfs: Replace strncpy with memcpy commit 166126c1e54d927c2e8efa2702d420e0ce301fd9 upstream. gcc 8.1.0 complains: fs/kernfs/symlink.c:91:3: warning: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length fs/kernfs/symlink.c: In function 'kernfs_iop_get_link': fs/kernfs/symlink.c:88:14: note: length computed here Using strncpy() is indeed less than perfect since the length of data to be copied has already been determined with strlen(). Replace strncpy() with memcpy() to address the warning and optimize the code a little. Signed-off-by: Guenter Roeck Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/symlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 08ccabd7047f..5145ae2f0572 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -88,7 +88,7 @@ static int kernfs_get_target_path(struct kernfs_node *parent, int slen = strlen(kn->name); len -= slen; - strncpy(s + len, kn->name, slen); + memcpy(s + len, kn->name, slen); if (len) s[--len] = '/'; From bb1d0ac3ef02d0e3e893b75933e556e86c915cfe Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Wed, 6 Jun 2018 15:56:54 -0700 Subject: [PATCH 0858/2608] ip_tunnel: Fix name string concatenate in __ip_tunnel_create() commit 000ade8016400d93b4d7c89970d96b8c14773d45 upstream. By passing a limit of 2 bytes to strncat, strncat is limited to writing fewer bytes than what it's supposed to append to the name here. Since the bounds are checked on the line above this, just remove the string bounds checks entirely since they're unneeded. Signed-off-by: Sultan Alsawaf Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 72eee34092ae..fabc299cb875 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -261,8 +261,8 @@ static struct net_device *__ip_tunnel_create(struct net *net, } else { if (strlen(ops->kind) > (IFNAMSIZ - 3)) goto failed; - strlcpy(name, ops->kind, IFNAMSIZ); - strncat(name, "%d", 2); + strcpy(name, ops->kind); + strcat(name, "%d"); } ASSERT_RTNL(); From 4e4b875eaf1e615cdaa789c22222565405349145 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Sep 2017 09:47:26 +0200 Subject: [PATCH 0859/2608] drm: gma500: fix logic error commit 67a3b63a54cbe18944191f43d644686731cf30c7 upstream. gcc-8 points out a condition that almost certainly doesn't do what the author had in mind: drivers/gpu/drm/gma500/mdfld_intel_display.c: In function 'mdfldWaitForPipeEnable': drivers/gpu/drm/gma500/mdfld_intel_display.c:102:37: error: bitwise comparison always evaluates to false [-Werror=tautological-compare] This changes it to a simple bit mask operation to check whether the bit is set. Fixes: 026abc333205 ("gma500: initial medfield merge") Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170905074741.435324-1-arnd@arndb.de Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/gma500/mdfld_intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/gma500/mdfld_intel_display.c b/drivers/gpu/drm/gma500/mdfld_intel_display.c index 531e4450c000..5c066448be5b 100644 --- a/drivers/gpu/drm/gma500/mdfld_intel_display.c +++ b/drivers/gpu/drm/gma500/mdfld_intel_display.c @@ -99,7 +99,7 @@ void mdfldWaitForPipeEnable(struct drm_device *dev, int pipe) /* Wait for for the pipe enable to take effect. */ for (count = 0; count < COUNT_MAX; count++) { temp = REG_READ(map->conf); - if ((temp & PIPEACONF_PIPE_STATE) == 1) + if (temp & PIPEACONF_PIPE_STATE) break; } } From 94d6d9e5f3c2532c1f13521a58ac9f120f30c4ba Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Dec 2017 15:47:00 +0100 Subject: [PATCH 0860/2608] scsi: bfa: convert to strlcpy/strlcat commit 8c5a50e8e7ad812a62f7ccf28d9a5e74fddf3000 upstream. The bfa driver has a number of real issues with string termination that gcc-8 now points out: drivers/scsi/bfa/bfad_bsg.c: In function 'bfad_iocmd_port_get_attr': drivers/scsi/bfa/bfad_bsg.c:320:9: error: argument to 'sizeof' in 'strncpy' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c: In function 'bfa_fcs_fabric_psymb_init': drivers/scsi/bfa/bfa_fcs.c:775:9: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c:781:9: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c:788:9: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c:801:10: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c:808:10: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c: In function 'bfa_fcs_fabric_nsymb_init': drivers/scsi/bfa/bfa_fcs.c:837:10: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c:844:10: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c:852:10: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c: In function 'bfa_fcs_fabric_psymb_init': drivers/scsi/bfa/bfa_fcs.c:778:2: error: 'strncat' output may be truncated copying 10 bytes from a string of length 63 [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs.c:784:2: error: 'strncat' output may be truncated copying 30 bytes from a string of length 63 [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs.c:803:3: error: 'strncat' output may be truncated copying 44 bytes from a string of length 63 [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs.c:811:3: error: 'strncat' output may be truncated copying 16 bytes from a string of length 63 [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs.c: In function 'bfa_fcs_fabric_nsymb_init': drivers/scsi/bfa/bfa_fcs.c:840:2: error: 'strncat' output may be truncated copying 10 bytes from a string of length 63 [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs.c:847:2: error: 'strncat' output may be truncated copying 30 bytes from a string of length 63 [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs_lport.c: In function 'bfa_fcs_fdmi_get_hbaattr': drivers/scsi/bfa/bfa_fcs_lport.c:2657:10: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs_lport.c:2659:11: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs_lport.c: In function 'bfa_fcs_lport_ms_gmal_response': drivers/scsi/bfa/bfa_fcs_lport.c:3232:5: error: 'strncpy' output may be truncated copying 16 bytes from a string of length 247 [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs_lport.c: In function 'bfa_fcs_lport_ns_send_rspn_id': drivers/scsi/bfa/bfa_fcs_lport.c:4670:3: error: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs_lport.c:4682:3: error: 'strncat' output truncated before terminating nul copying as many bytes from a string as its length [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs_lport.c: In function 'bfa_fcs_lport_ns_util_send_rspn_id': drivers/scsi/bfa/bfa_fcs_lport.c:5206:3: error: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs_lport.c:5215:3: error: 'strncat' output truncated before terminating nul copying as many bytes from a string as its length [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs_lport.c: In function 'bfa_fcs_fdmi_get_portattr': drivers/scsi/bfa/bfa_fcs_lport.c:2751:2: error: 'strncpy' specified bound 128 equals destination size [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcbuild.c: In function 'fc_rspnid_build': drivers/scsi/bfa/bfa_fcbuild.c:1254:2: error: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcbuild.c:1253:25: note: length computed here drivers/scsi/bfa/bfa_fcbuild.c: In function 'fc_rsnn_nn_build': drivers/scsi/bfa/bfa_fcbuild.c:1275:2: error: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length [-Werror=stringop-truncation] In most cases, this can be addressed by correctly calling strlcpy and strlcat instead of strncpy/strncat, with the size of the destination buffer as the last argument. For consistency, I'm changing the other callers of strncpy() in this driver the same way. Signed-off-by: Arnd Bergmann Reviewed-by: Johannes Thumshirn Acked-by: Sudarsana Kalluru Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/bfa/bfa_fcbuild.c | 8 ++-- drivers/scsi/bfa/bfa_fcs.c | 78 ++++++++++++++++---------------- drivers/scsi/bfa/bfa_fcs_lport.c | 60 ++++++++++-------------- drivers/scsi/bfa/bfa_ioc.c | 2 +- drivers/scsi/bfa/bfa_svc.c | 4 +- drivers/scsi/bfa/bfad.c | 20 ++++---- drivers/scsi/bfa/bfad_attr.c | 2 +- drivers/scsi/bfa/bfad_bsg.c | 6 +-- 8 files changed, 83 insertions(+), 97 deletions(-) diff --git a/drivers/scsi/bfa/bfa_fcbuild.c b/drivers/scsi/bfa/bfa_fcbuild.c index b8dadc9cc993..d3b00a475aeb 100644 --- a/drivers/scsi/bfa/bfa_fcbuild.c +++ b/drivers/scsi/bfa/bfa_fcbuild.c @@ -1250,8 +1250,8 @@ fc_rspnid_build(struct fchs_s *fchs, void *pyld, u32 s_id, u16 ox_id, memset(rspnid, 0, sizeof(struct fcgs_rspnid_req_s)); rspnid->dap = s_id; - rspnid->spn_len = (u8) strlen((char *)name); - strncpy((char *)rspnid->spn, (char *)name, rspnid->spn_len); + strlcpy(rspnid->spn, name, sizeof(rspnid->spn)); + rspnid->spn_len = (u8) strlen(rspnid->spn); return sizeof(struct fcgs_rspnid_req_s) + sizeof(struct ct_hdr_s); } @@ -1271,8 +1271,8 @@ fc_rsnn_nn_build(struct fchs_s *fchs, void *pyld, u32 s_id, memset(rsnn_nn, 0, sizeof(struct fcgs_rsnn_nn_req_s)); rsnn_nn->node_name = node_name; - rsnn_nn->snn_len = (u8) strlen((char *)name); - strncpy((char *)rsnn_nn->snn, (char *)name, rsnn_nn->snn_len); + strlcpy(rsnn_nn->snn, name, sizeof(rsnn_nn->snn)); + rsnn_nn->snn_len = (u8) strlen(rsnn_nn->snn); return sizeof(struct fcgs_rsnn_nn_req_s) + sizeof(struct ct_hdr_s); } diff --git a/drivers/scsi/bfa/bfa_fcs.c b/drivers/scsi/bfa/bfa_fcs.c index 4aa61e20e82d..932feb0ed4da 100644 --- a/drivers/scsi/bfa/bfa_fcs.c +++ b/drivers/scsi/bfa/bfa_fcs.c @@ -769,23 +769,23 @@ bfa_fcs_fabric_psymb_init(struct bfa_fcs_fabric_s *fabric) bfa_ioc_get_adapter_model(&fabric->fcs->bfa->ioc, model); /* Model name/number */ - strncpy((char *)&port_cfg->sym_name, model, - BFA_FCS_PORT_SYMBNAME_MODEL_SZ); - strncat((char *)&port_cfg->sym_name, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); + strlcpy(port_cfg->sym_name.symname, model, + BFA_SYMNAME_MAXLEN); + strlcat(port_cfg->sym_name.symname, BFA_FCS_PORT_SYMBNAME_SEPARATOR, + BFA_SYMNAME_MAXLEN); /* Driver Version */ - strncat((char *)&port_cfg->sym_name, (char *)driver_info->version, - BFA_FCS_PORT_SYMBNAME_VERSION_SZ); - strncat((char *)&port_cfg->sym_name, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); + strlcat(port_cfg->sym_name.symname, driver_info->version, + BFA_SYMNAME_MAXLEN); + strlcat(port_cfg->sym_name.symname, BFA_FCS_PORT_SYMBNAME_SEPARATOR, + BFA_SYMNAME_MAXLEN); /* Host machine name */ - strncat((char *)&port_cfg->sym_name, - (char *)driver_info->host_machine_name, - BFA_FCS_PORT_SYMBNAME_MACHINENAME_SZ); - strncat((char *)&port_cfg->sym_name, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); + strlcat(port_cfg->sym_name.symname, + driver_info->host_machine_name, + BFA_SYMNAME_MAXLEN); + strlcat(port_cfg->sym_name.symname, BFA_FCS_PORT_SYMBNAME_SEPARATOR, + BFA_SYMNAME_MAXLEN); /* * Host OS Info : @@ -793,24 +793,24 @@ bfa_fcs_fabric_psymb_init(struct bfa_fcs_fabric_s *fabric) * OS name string and instead copy the entire OS info string (64 bytes). */ if (driver_info->host_os_patch[0] == '\0') { - strncat((char *)&port_cfg->sym_name, - (char *)driver_info->host_os_name, - BFA_FCS_OS_STR_LEN); - strncat((char *)&port_cfg->sym_name, + strlcat(port_cfg->sym_name.symname, + driver_info->host_os_name, + BFA_SYMNAME_MAXLEN); + strlcat(port_cfg->sym_name.symname, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); + BFA_SYMNAME_MAXLEN); } else { - strncat((char *)&port_cfg->sym_name, - (char *)driver_info->host_os_name, - BFA_FCS_PORT_SYMBNAME_OSINFO_SZ); - strncat((char *)&port_cfg->sym_name, + strlcat(port_cfg->sym_name.symname, + driver_info->host_os_name, + BFA_SYMNAME_MAXLEN); + strlcat(port_cfg->sym_name.symname, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); + BFA_SYMNAME_MAXLEN); /* Append host OS Patch Info */ - strncat((char *)&port_cfg->sym_name, - (char *)driver_info->host_os_patch, - BFA_FCS_PORT_SYMBNAME_OSPATCH_SZ); + strlcat(port_cfg->sym_name.symname, + driver_info->host_os_patch, + BFA_SYMNAME_MAXLEN); } /* null terminate */ @@ -830,26 +830,26 @@ bfa_fcs_fabric_nsymb_init(struct bfa_fcs_fabric_s *fabric) bfa_ioc_get_adapter_model(&fabric->fcs->bfa->ioc, model); /* Model name/number */ - strncpy((char *)&port_cfg->node_sym_name, model, - BFA_FCS_PORT_SYMBNAME_MODEL_SZ); - strncat((char *)&port_cfg->node_sym_name, + strlcpy(port_cfg->node_sym_name.symname, model, + BFA_SYMNAME_MAXLEN); + strlcat(port_cfg->node_sym_name.symname, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); + BFA_SYMNAME_MAXLEN); /* Driver Version */ - strncat((char *)&port_cfg->node_sym_name, (char *)driver_info->version, - BFA_FCS_PORT_SYMBNAME_VERSION_SZ); - strncat((char *)&port_cfg->node_sym_name, + strlcat(port_cfg->node_sym_name.symname, (char *)driver_info->version, + BFA_SYMNAME_MAXLEN); + strlcat(port_cfg->node_sym_name.symname, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); + BFA_SYMNAME_MAXLEN); /* Host machine name */ - strncat((char *)&port_cfg->node_sym_name, - (char *)driver_info->host_machine_name, - BFA_FCS_PORT_SYMBNAME_MACHINENAME_SZ); - strncat((char *)&port_cfg->node_sym_name, + strlcat(port_cfg->node_sym_name.symname, + driver_info->host_machine_name, + BFA_SYMNAME_MAXLEN); + strlcat(port_cfg->node_sym_name.symname, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); + BFA_SYMNAME_MAXLEN); /* null terminate */ port_cfg->node_sym_name.symname[BFA_SYMNAME_MAXLEN - 1] = 0; diff --git a/drivers/scsi/bfa/bfa_fcs_lport.c b/drivers/scsi/bfa/bfa_fcs_lport.c index 638c0a2857f7..b4f2c1d8742e 100644 --- a/drivers/scsi/bfa/bfa_fcs_lport.c +++ b/drivers/scsi/bfa/bfa_fcs_lport.c @@ -2642,10 +2642,10 @@ bfa_fcs_fdmi_get_hbaattr(struct bfa_fcs_lport_fdmi_s *fdmi, bfa_ioc_get_adapter_fw_ver(&port->fcs->bfa->ioc, hba_attr->fw_version); - strncpy(hba_attr->driver_version, (char *)driver_info->version, + strlcpy(hba_attr->driver_version, (char *)driver_info->version, sizeof(hba_attr->driver_version)); - strncpy(hba_attr->os_name, driver_info->host_os_name, + strlcpy(hba_attr->os_name, driver_info->host_os_name, sizeof(hba_attr->os_name)); /* @@ -2653,23 +2653,23 @@ bfa_fcs_fdmi_get_hbaattr(struct bfa_fcs_lport_fdmi_s *fdmi, * to the os name along with a separator */ if (driver_info->host_os_patch[0] != '\0') { - strncat(hba_attr->os_name, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); - strncat(hba_attr->os_name, driver_info->host_os_patch, - sizeof(driver_info->host_os_patch)); + strlcat(hba_attr->os_name, BFA_FCS_PORT_SYMBNAME_SEPARATOR, + sizeof(hba_attr->os_name)); + strlcat(hba_attr->os_name, driver_info->host_os_patch, + sizeof(hba_attr->os_name)); } /* Retrieve the max frame size from the port attr */ bfa_fcs_fdmi_get_portattr(fdmi, &fcs_port_attr); hba_attr->max_ct_pyld = fcs_port_attr.max_frm_size; - strncpy(hba_attr->node_sym_name.symname, + strlcpy(hba_attr->node_sym_name.symname, port->port_cfg.node_sym_name.symname, BFA_SYMNAME_MAXLEN); strcpy(hba_attr->vendor_info, "QLogic"); hba_attr->num_ports = cpu_to_be32(bfa_ioc_get_nports(&port->fcs->bfa->ioc)); hba_attr->fabric_name = port->fabric->lps->pr_nwwn; - strncpy(hba_attr->bios_ver, hba_attr->option_rom_ver, BFA_VERSION_LEN); + strlcpy(hba_attr->bios_ver, hba_attr->option_rom_ver, BFA_VERSION_LEN); } @@ -2736,20 +2736,20 @@ bfa_fcs_fdmi_get_portattr(struct bfa_fcs_lport_fdmi_s *fdmi, /* * OS device Name */ - strncpy(port_attr->os_device_name, (char *)driver_info->os_device_name, + strlcpy(port_attr->os_device_name, driver_info->os_device_name, sizeof(port_attr->os_device_name)); /* * Host name */ - strncpy(port_attr->host_name, (char *)driver_info->host_machine_name, + strlcpy(port_attr->host_name, driver_info->host_machine_name, sizeof(port_attr->host_name)); port_attr->node_name = bfa_fcs_lport_get_nwwn(port); port_attr->port_name = bfa_fcs_lport_get_pwwn(port); - strncpy(port_attr->port_sym_name.symname, - (char *)&bfa_fcs_lport_get_psym_name(port), BFA_SYMNAME_MAXLEN); + strlcpy(port_attr->port_sym_name.symname, + bfa_fcs_lport_get_psym_name(port).symname, BFA_SYMNAME_MAXLEN); bfa_fcs_lport_get_attr(port, &lport_attr); port_attr->port_type = cpu_to_be32(lport_attr.port_type); port_attr->scos = pport_attr.cos_supported; @@ -3229,7 +3229,7 @@ bfa_fcs_lport_ms_gmal_response(void *fcsarg, struct bfa_fcxp_s *fcxp, rsp_str[gmal_entry->len-1] = 0; /* copy IP Address to fabric */ - strncpy(bfa_fcs_lport_get_fabric_ipaddr(port), + strlcpy(bfa_fcs_lport_get_fabric_ipaddr(port), gmal_entry->ip_addr, BFA_FCS_FABRIC_IPADDR_SZ); break; @@ -4667,21 +4667,13 @@ bfa_fcs_lport_ns_send_rspn_id(void *ns_cbarg, struct bfa_fcxp_s *fcxp_alloced) * to that of the base port. */ - strncpy((char *)psymbl, - (char *) & - (bfa_fcs_lport_get_psym_name + strlcpy(symbl, + (char *)&(bfa_fcs_lport_get_psym_name (bfa_fcs_get_base_port(port->fcs))), - strlen((char *) & - bfa_fcs_lport_get_psym_name(bfa_fcs_get_base_port - (port->fcs)))); + sizeof(symbl)); - /* Ensure we have a null terminating string. */ - ((char *)psymbl)[strlen((char *) & - bfa_fcs_lport_get_psym_name(bfa_fcs_get_base_port - (port->fcs)))] = 0; - strncat((char *)psymbl, - (char *) &(bfa_fcs_lport_get_psym_name(port)), - strlen((char *) &bfa_fcs_lport_get_psym_name(port))); + strlcat(symbl, (char *)&(bfa_fcs_lport_get_psym_name(port)), + sizeof(symbl)); } else { psymbl = (u8 *) &(bfa_fcs_lport_get_psym_name(port)); } @@ -5173,7 +5165,6 @@ bfa_fcs_lport_ns_util_send_rspn_id(void *cbarg, struct bfa_fcxp_s *fcxp_alloced) struct fchs_s fchs; struct bfa_fcxp_s *fcxp; u8 symbl[256]; - u8 *psymbl = &symbl[0]; int len; /* Avoid sending RSPN in the following states. */ @@ -5203,22 +5194,17 @@ bfa_fcs_lport_ns_util_send_rspn_id(void *cbarg, struct bfa_fcxp_s *fcxp_alloced) * For Vports, we append the vport's port symbolic name * to that of the base port. */ - strncpy((char *)psymbl, (char *)&(bfa_fcs_lport_get_psym_name + strlcpy(symbl, (char *)&(bfa_fcs_lport_get_psym_name (bfa_fcs_get_base_port(port->fcs))), - strlen((char *)&bfa_fcs_lport_get_psym_name( - bfa_fcs_get_base_port(port->fcs)))); + sizeof(symbl)); - /* Ensure we have a null terminating string. */ - ((char *)psymbl)[strlen((char *)&bfa_fcs_lport_get_psym_name( - bfa_fcs_get_base_port(port->fcs)))] = 0; - - strncat((char *)psymbl, + strlcat(symbl, (char *)&(bfa_fcs_lport_get_psym_name(port)), - strlen((char *)&bfa_fcs_lport_get_psym_name(port))); + sizeof(symbl)); } len = fc_rspnid_build(&fchs, bfa_fcxp_get_reqbuf(fcxp), - bfa_fcs_lport_get_fcid(port), 0, psymbl); + bfa_fcs_lport_get_fcid(port), 0, symbl); bfa_fcxp_send(fcxp, NULL, port->fabric->vf_id, port->lp_tag, BFA_FALSE, FC_CLASS_3, len, &fchs, NULL, NULL, FC_MAX_PDUSZ, 0); diff --git a/drivers/scsi/bfa/bfa_ioc.c b/drivers/scsi/bfa/bfa_ioc.c index 256f4afaccf9..a1a183ece093 100644 --- a/drivers/scsi/bfa/bfa_ioc.c +++ b/drivers/scsi/bfa/bfa_ioc.c @@ -2803,7 +2803,7 @@ void bfa_ioc_get_adapter_manufacturer(struct bfa_ioc_s *ioc, char *manufacturer) { memset((void *)manufacturer, 0, BFA_ADAPTER_MFG_NAME_LEN); - strncpy(manufacturer, BFA_MFG_NAME, BFA_ADAPTER_MFG_NAME_LEN); + strlcpy(manufacturer, BFA_MFG_NAME, BFA_ADAPTER_MFG_NAME_LEN); } void diff --git a/drivers/scsi/bfa/bfa_svc.c b/drivers/scsi/bfa/bfa_svc.c index e640223bab3c..7356fdec79f5 100644 --- a/drivers/scsi/bfa/bfa_svc.c +++ b/drivers/scsi/bfa/bfa_svc.c @@ -350,8 +350,8 @@ bfa_plog_str(struct bfa_plog_s *plog, enum bfa_plog_mid mid, lp.eid = event; lp.log_type = BFA_PL_LOG_TYPE_STRING; lp.misc = misc; - strncpy(lp.log_entry.string_log, log_str, - BFA_PL_STRING_LOG_SZ - 1); + strlcpy(lp.log_entry.string_log, log_str, + BFA_PL_STRING_LOG_SZ); lp.log_entry.string_log[BFA_PL_STRING_LOG_SZ - 1] = '\0'; bfa_plog_add(plog, &lp); } diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index 5caf5f3ff642..ae37010af50f 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -983,20 +983,20 @@ bfad_start_ops(struct bfad_s *bfad) { /* Fill the driver_info info to fcs*/ memset(&driver_info, 0, sizeof(driver_info)); - strncpy(driver_info.version, BFAD_DRIVER_VERSION, - sizeof(driver_info.version) - 1); + strlcpy(driver_info.version, BFAD_DRIVER_VERSION, + sizeof(driver_info.version)); if (host_name) - strncpy(driver_info.host_machine_name, host_name, - sizeof(driver_info.host_machine_name) - 1); + strlcpy(driver_info.host_machine_name, host_name, + sizeof(driver_info.host_machine_name)); if (os_name) - strncpy(driver_info.host_os_name, os_name, - sizeof(driver_info.host_os_name) - 1); + strlcpy(driver_info.host_os_name, os_name, + sizeof(driver_info.host_os_name)); if (os_patch) - strncpy(driver_info.host_os_patch, os_patch, - sizeof(driver_info.host_os_patch) - 1); + strlcpy(driver_info.host_os_patch, os_patch, + sizeof(driver_info.host_os_patch)); - strncpy(driver_info.os_device_name, bfad->pci_name, - sizeof(driver_info.os_device_name) - 1); + strlcpy(driver_info.os_device_name, bfad->pci_name, + sizeof(driver_info.os_device_name)); /* FCS driver info init */ spin_lock_irqsave(&bfad->bfad_lock, flags); diff --git a/drivers/scsi/bfa/bfad_attr.c b/drivers/scsi/bfa/bfad_attr.c index 13db3b7bc873..d0a504af5b4f 100644 --- a/drivers/scsi/bfa/bfad_attr.c +++ b/drivers/scsi/bfa/bfad_attr.c @@ -843,7 +843,7 @@ bfad_im_symbolic_name_show(struct device *dev, struct device_attribute *attr, char symname[BFA_SYMNAME_MAXLEN]; bfa_fcs_lport_get_attr(&bfad->bfa_fcs.fabric.bport, &port_attr); - strncpy(symname, port_attr.port_cfg.sym_name.symname, + strlcpy(symname, port_attr.port_cfg.sym_name.symname, BFA_SYMNAME_MAXLEN); return snprintf(buf, PAGE_SIZE, "%s\n", symname); } diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c index 1aa46d0763a0..9081a5f93aae 100644 --- a/drivers/scsi/bfa/bfad_bsg.c +++ b/drivers/scsi/bfa/bfad_bsg.c @@ -127,7 +127,7 @@ bfad_iocmd_ioc_get_attr(struct bfad_s *bfad, void *cmd) /* fill in driver attr info */ strcpy(iocmd->ioc_attr.driver_attr.driver, BFAD_DRIVER_NAME); - strncpy(iocmd->ioc_attr.driver_attr.driver_ver, + strlcpy(iocmd->ioc_attr.driver_attr.driver_ver, BFAD_DRIVER_VERSION, BFA_VERSION_LEN); strcpy(iocmd->ioc_attr.driver_attr.fw_ver, iocmd->ioc_attr.adapter_attr.fw_ver); @@ -315,9 +315,9 @@ bfad_iocmd_port_get_attr(struct bfad_s *bfad, void *cmd) iocmd->attr.port_type = port_attr.port_type; iocmd->attr.loopback = port_attr.loopback; iocmd->attr.authfail = port_attr.authfail; - strncpy(iocmd->attr.port_symname.symname, + strlcpy(iocmd->attr.port_symname.symname, port_attr.port_cfg.sym_name.symname, - sizeof(port_attr.port_cfg.sym_name.symname)); + sizeof(iocmd->attr.port_symname.symname)); iocmd->status = BFA_STATUS_OK; return 0; From 46762a64c0b03fc9cab3406bf2b3bbea043cf6e8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Sep 2017 09:33:32 +0200 Subject: [PATCH 0861/2608] staging: rts5208: fix gcc-8 logic error warning commit 58930cced012adb01bc78b3687049b17ef44d0a3 upstream. As gcc-8 points out, the bit mask check makes no sense here: drivers/staging/rts5208/sd.c: In function 'ext_sd_send_cmd_get_rsp': drivers/staging/rts5208/sd.c:4130:25: error: bitwise comparison always evaluates to true [-Werror=tautological-compare] However, the code is even more bogus, as we have already checked for the SD_RSP_TYPE_R0 case earlier in the function and returned success. As seen in the mmc/sd driver core, SD_RSP_TYPE_R0 means "no response" anyway, so checking for a particular response would not help either. This just removes the nonsensical code to get rid of the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rts5208/sd.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/staging/rts5208/sd.c b/drivers/staging/rts5208/sd.c index d98d5fe25a17..0421dd9277a8 100644 --- a/drivers/staging/rts5208/sd.c +++ b/drivers/staging/rts5208/sd.c @@ -4125,12 +4125,6 @@ RTY_SEND_CMD: rtsx_trace(chip); return STATUS_FAIL; } - - } else if (rsp_type == SD_RSP_TYPE_R0) { - if ((ptr[3] & 0x1E) != 0x03) { - rtsx_trace(chip); - return STATUS_FAIL; - } } } } From 184adf40d184ff2761923e7f2f64a12702843ad8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Feb 2018 15:59:40 +0100 Subject: [PATCH 0862/2608] kdb: use memmove instead of overlapping memcpy commit 2cf2f0d5b91fd1b06a6ae260462fc7945ea84add upstream. gcc discovered that the memcpy() arguments in kdbnearsym() overlap, so we should really use memmove(), which is defined to handle that correctly: In function 'memcpy', inlined from 'kdbnearsym' at /git/arm-soc/kernel/debug/kdb/kdb_support.c:132:4: /git/arm-soc/include/linux/string.h:353:9: error: '__builtin_memcpy' accessing 792 bytes at offsets 0 and 8 overlaps 784 bytes at offset 8 [-Werror=restrict] return __builtin_memcpy(p, q, size); Signed-off-by: Arnd Bergmann Signed-off-by: Jason Wessel Signed-off-by: Greg Kroah-Hartman --- kernel/debug/kdb/kdb_support.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 014f6fbb3832..b14b0925c184 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -129,13 +129,13 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) } if (i >= ARRAY_SIZE(kdb_name_table)) { debug_kfree(kdb_name_table[0]); - memcpy(kdb_name_table, kdb_name_table+1, + memmove(kdb_name_table, kdb_name_table+1, sizeof(kdb_name_table[0]) * (ARRAY_SIZE(kdb_name_table)-1)); } else { debug_kfree(knt1); knt1 = kdb_name_table[i]; - memcpy(kdb_name_table+i, kdb_name_table+i+1, + memmove(kdb_name_table+i, kdb_name_table+i+1, sizeof(kdb_name_table[0]) * (ARRAY_SIZE(kdb_name_table)-i-1)); } From 48c11537ee6d6731d1e24343150e41057f593a8b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 14 Nov 2018 10:17:01 -0800 Subject: [PATCH 0863/2608] iser: set sector for ambiguous mr status errors commit 24c3456c8d5ee6fc1933ca40f7b4406130682668 upstream. If for some reason we failed to query the mr status, we need to make sure to provide sufficient information for an ambiguous error (guard error on sector 0). Fixes: 0a7a08ad6f5f ("IB/iser: Implement check_protection") Cc: Reported-by: Dan Carpenter Signed-off-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/iser/iser_verbs.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 55a73b0ed4c6..e28a5d713d1a 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1108,7 +1108,9 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, IB_MR_CHECK_SIG_STATUS, &mr_status); if (ret) { pr_err("ib_check_mr_status failed, ret %d\n", ret); - goto err; + /* Not a lot we can do, return ambiguous guard error */ + *sector = 0; + return 0x1; } if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { @@ -1136,9 +1138,6 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, } return 0; -err: - /* Not alot we can do here, return ambiguous guard error */ - return 0x1; } void iser_err_comp(struct ib_wc *wc, const char *type) From 8d3ab1ccc063e618ef1a2711e9da9ce9e7d27396 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Thu, 22 Nov 2018 17:10:31 +0100 Subject: [PATCH 0864/2608] uprobes: Fix handle_swbp() vs. unregister() + register() race once more commit 09d3f015d1e1b4fee7e9bbdcf54201d239393391 upstream. Commit: 142b18ddc8143 ("uprobes: Fix handle_swbp() vs unregister() + register() race") added the UPROBE_COPY_INSN flag, and corresponding smp_wmb() and smp_rmb() memory barriers, to ensure that handle_swbp() uses fully-initialized uprobes only. However, the smp_rmb() is mis-placed: this barrier should be placed after handle_swbp() has tested for the flag, thus guaranteeing that (program-order) subsequent loads from the uprobe can see the initial stores performed by prepare_uprobe(). Move the smp_rmb() accordingly. Also amend the comments associated to the two memory barriers to indicate their actual locations. Signed-off-by: Andrea Parri Acked-by: Oleg Nesterov Cc: Alexander Shishkin Cc: Andrew Morton Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@kernel.org Fixes: 142b18ddc8143 ("uprobes: Fix handle_swbp() vs unregister() + register() race") Link: http://lkml.kernel.org/r/20181122161031.15179-1-andrea.parri@amarulasolutions.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/uprobes.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 267f6ef91d97..01941cffa9c2 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -616,7 +616,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, BUG_ON((uprobe->offset & ~PAGE_MASK) + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); - smp_wmb(); /* pairs with rmb() in find_active_uprobe() */ + smp_wmb(); /* pairs with the smp_rmb() in handle_swbp() */ set_bit(UPROBE_COPY_INSN, &uprobe->flags); out: @@ -1910,10 +1910,18 @@ static void handle_swbp(struct pt_regs *regs) * After we hit the bp, _unregister + _register can install the * new and not-yet-analyzed uprobe at the same address, restart. */ - smp_rmb(); /* pairs with wmb() in install_breakpoint() */ if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) goto out; + /* + * Pairs with the smp_wmb() in prepare_uprobe(). + * + * Guarantees that if we see the UPROBE_COPY_INSN bit set, then + * we must also see the stores to &uprobe->arch performed by the + * prepare_uprobe() call. + */ + smp_rmb(); + /* Tracing handlers use ->utask to communicate with fetch methods */ if (!get_utask()) goto out; From 9aab50aec56772ff6e1a2dead6097cf2921fe944 Mon Sep 17 00:00:00 2001 From: Mathias Kresin Date: Mon, 26 Nov 2018 11:25:40 +0100 Subject: [PATCH 0865/2608] MIPS: ralink: Fix mt7620 nd_sd pinmux commit 7d35baa4e9ec4b717bc0e58a39cdb6a1c50f5465 upstream. In case the nd_sd group is set to the sd-card function, Pins 45 + 46 are configured as GPIOs. If they are blocked by the sd function, they can't be used as GPIOs. Reported-by: Kristian Evensen Signed-off-by: Mathias Kresin Signed-off-by: Paul Burton Fixes: f576fb6a0700 ("MIPS: ralink: cleanup the soc specific pinmux data") Patchwork: https://patchwork.linux-mips.org/patch/21220/ Cc: John Crispin Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/ralink/mt7620.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c index 41b71c4352c2..c1ce6f43642b 100644 --- a/arch/mips/ralink/mt7620.c +++ b/arch/mips/ralink/mt7620.c @@ -84,7 +84,7 @@ static struct rt2880_pmx_func pcie_rst_grp[] = { }; static struct rt2880_pmx_func nd_sd_grp[] = { FUNC("nand", MT7620_GPIO_MODE_NAND, 45, 15), - FUNC("sd", MT7620_GPIO_MODE_SD, 45, 15) + FUNC("sd", MT7620_GPIO_MODE_SD, 47, 13) }; static struct rt2880_pmx_group mt7620a_pinmux_data[] = { From d3c0999433352dacda6da37c237388bfa9bae1c2 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Wed, 21 Nov 2018 22:14:39 +0300 Subject: [PATCH 0866/2608] mips: fix mips_get_syscall_arg o32 check commit c50cbd85cd7027d32ac5945bb60217936b4f7eaf upstream. When checking for TIF_32BIT_REGS flag, mips_get_syscall_arg() should use the task specified as its argument instead of the current task. This potentially affects all syscall_get_arguments() users who specify tasks different from the current. Fixes: c0ff3c53d4f99 ("MIPS: Enable HAVE_ARCH_TRACEHOOK.") Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/21185/ Cc: Elvira Khabirova Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/syscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index 7c713025b23f..53ee82b1efa7 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -51,7 +51,7 @@ static inline unsigned long mips_get_syscall_arg(unsigned long *arg, #ifdef CONFIG_64BIT case 4: case 5: case 6: case 7: #ifdef CONFIG_MIPS32_O32 - if (test_thread_flag(TIF_32BIT_REGS)) + if (test_tsk_thread_flag(task, TIF_32BIT_REGS)) return get_user(*arg, (int *)usp + n); else #endif From 53f6341a937be3dad213f5f4b2e188012c024ed2 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Wed, 21 Nov 2018 15:03:54 +0200 Subject: [PATCH 0867/2608] IB/mlx5: Avoid load failure due to unknown link width commit db7a691a1551a748cb92d9c89c6b190ea87e28d5 upstream. If the firmware reports a connection width that is not 1x, 4x, 8x or 12x it causes the driver to fail during initialization. To prevent this failure every time a new width is introduced to the RDMA stack, we will set a default 4x width for these widths which ar unknown to the driver. This is needed to allow to run old kernels with new firmware. Cc: # 4.1 Fixes: 1b5daf11b015 ("IB/mlx5: Avoid using the MAD_IFC command under ISSI > 0 mode") Signed-off-by: Michael Guralnik Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/main.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c3a4f5d92391..13a92062e9ca 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -869,31 +869,26 @@ enum mlx5_ib_width { MLX5_IB_WIDTH_12X = 1 << 4 }; -static int translate_active_width(struct ib_device *ibdev, u8 active_width, +static void translate_active_width(struct ib_device *ibdev, u8 active_width, u8 *ib_width) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - int err = 0; - if (active_width & MLX5_IB_WIDTH_1X) { + if (active_width & MLX5_IB_WIDTH_1X) *ib_width = IB_WIDTH_1X; - } else if (active_width & MLX5_IB_WIDTH_2X) { - mlx5_ib_dbg(dev, "active_width %d is not supported by IB spec\n", - (int)active_width); - err = -EINVAL; - } else if (active_width & MLX5_IB_WIDTH_4X) { + else if (active_width & MLX5_IB_WIDTH_4X) *ib_width = IB_WIDTH_4X; - } else if (active_width & MLX5_IB_WIDTH_8X) { + else if (active_width & MLX5_IB_WIDTH_8X) *ib_width = IB_WIDTH_8X; - } else if (active_width & MLX5_IB_WIDTH_12X) { + else if (active_width & MLX5_IB_WIDTH_12X) *ib_width = IB_WIDTH_12X; - } else { - mlx5_ib_dbg(dev, "Invalid active_width %d\n", + else { + mlx5_ib_dbg(dev, "Invalid active_width %d, setting width to default value: 4x\n", (int)active_width); - err = -EINVAL; + *ib_width = IB_WIDTH_4X; } - return err; + return; } static int mlx5_mtu_to_ib_mtu(int mtu) @@ -1001,10 +996,8 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, if (err) goto out; - err = translate_active_width(ibdev, ib_link_width_oper, - &props->active_width); - if (err) - goto out; + translate_active_width(ibdev, ib_link_width_oper, &props->active_width); + err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port); if (err) goto out; From 37b927309286a4e4f9bf13aa2f7dbdd001637346 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Mon, 5 Nov 2018 16:57:47 +1100 Subject: [PATCH 0868/2608] drm/ast: Fix incorrect free on ioregs commit dc25ab067645eabd037f1a23d49a666f9e0b8c68 upstream. If the platform has no IO space, ioregs is placed next to the already allocated regs. In this case, it should not be separately freed. This prevents a kernel warning from __vunmap "Trying to vfree() nonexistent vm area" when unloading the driver. Fixes: 0dd68309b9c5 ("drm/ast: Try to use MMIO registers when PIO isn't supported") Signed-off-by: Sam Bobroff Cc: Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index dac355812adc..373700c05a00 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -583,7 +583,8 @@ void ast_driver_unload(struct drm_device *dev) drm_mode_config_cleanup(dev); ast_mm_fini(ast); - pci_iounmap(dev->pdev, ast->ioregs); + if (ast->ioregs != ast->regs + AST_IO_MM_OFFSET) + pci_iounmap(dev->pdev, ast->ioregs); pci_iounmap(dev->pdev, ast->regs); kfree(ast); } From 4e97ad08e1e3f4e9dbd5884b1dd7c4fdbe038eb1 Mon Sep 17 00:00:00 2001 From: Sergio Correia Date: Thu, 22 Nov 2018 02:33:29 -0300 Subject: [PATCH 0869/2608] drm: set is_master to 0 upon drm_new_set_master() failure commit 23a336b34258aba3b50ea6863cca4e81b5ef6384 upstream. When drm_new_set_master() fails, set is_master to 0, to prevent a possible NULL pointer deref. Here is a problematic flow: we check is_master in drm_is_current_master(), then proceed to call drm_lease_owner() passing master. If we do not restore is_master status when drm_new_set_master() fails, we may have a situation in which is_master will be 1 and master itself, NULL, leading to the deref of a NULL pointer in drm_lease_owner(). This fixes the following OOPS, observed on an ArchLinux running a 4.19.2 kernel: [ 97.804282] BUG: unable to handle kernel NULL pointer dereference at 0000000000000080 [ 97.807224] PGD 0 P4D 0 [ 97.807224] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 97.807224] CPU: 0 PID: 1348 Comm: xfwm4 Tainted: P OE 4.19.2-arch1-1-ARCH #1 [ 97.807224] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./AB350 Pro4, BIOS P5.10 10/16/2018 [ 97.807224] RIP: 0010:drm_lease_owner+0xd/0x20 [drm] [ 97.807224] Code: 83 c4 18 5b 5d c3 b8 ea ff ff ff eb e2 b8 ed ff ff ff eb db e8 b4 ca 68 fb 0f 1f 40 00 0f 1f 44 00 00 48 89 f8 eb 03 48 89 d0 <48> 8b 90 80 00 00 00 48 85 d2 75 f1 c3 66 0f 1f 44 00 00 0f 1f 44 [ 97.807224] RSP: 0018:ffffb8cf08e07bb0 EFLAGS: 00010202 [ 97.807224] RAX: 0000000000000000 RBX: ffff9cf0f2586c00 RCX: ffff9cf0f2586c88 [ 97.807224] RDX: ffff9cf0ddbd8000 RSI: 0000000000000000 RDI: 0000000000000000 [ 97.807224] RBP: ffff9cf1040e9800 R08: 0000000000000000 R09: 0000000000000000 [ 97.807224] R10: ffffdeb30fd5d680 R11: ffffdeb30f5d6808 R12: ffff9cf1040e9888 [ 97.807224] R13: 0000000000000000 R14: dead000000000200 R15: ffff9cf0f2586cc8 [ 97.807224] FS: 00007f4145513180(0000) GS:ffff9cf10ea00000(0000) knlGS:0000000000000000 [ 97.807224] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 97.807224] CR2: 0000000000000080 CR3: 00000003d7548000 CR4: 00000000003406f0 [ 97.807224] Call Trace: [ 97.807224] drm_is_current_master+0x1a/0x30 [drm] [ 97.807224] drm_master_release+0x3e/0x130 [drm] [ 97.807224] drm_file_free.part.0+0x2be/0x2d0 [drm] [ 97.807224] drm_open+0x1ba/0x1e0 [drm] [ 97.807224] drm_stub_open+0xaf/0xe0 [drm] [ 97.807224] chrdev_open+0xa3/0x1b0 [ 97.807224] ? cdev_put.part.0+0x20/0x20 [ 97.807224] do_dentry_open+0x132/0x340 [ 97.807224] path_openat+0x2d1/0x14e0 [ 97.807224] ? mem_cgroup_commit_charge+0x7a/0x520 [ 97.807224] do_filp_open+0x93/0x100 [ 97.807224] ? __check_object_size+0x102/0x189 [ 97.807224] ? _raw_spin_unlock+0x16/0x30 [ 97.807224] do_sys_open+0x186/0x210 [ 97.807224] do_syscall_64+0x5b/0x170 [ 97.807224] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 97.807224] RIP: 0033:0x7f4147b07976 [ 97.807224] Code: 89 54 24 08 e8 7b f4 ff ff 8b 74 24 0c 48 8b 3c 24 41 89 c0 44 8b 54 24 08 b8 01 01 00 00 89 f2 48 89 fe bf 9c ff ff ff 0f 05 <48> 3d 00 f0 ff ff 77 30 44 89 c7 89 44 24 08 e8 a6 f4 ff ff 8b 44 [ 97.807224] RSP: 002b:00007ffcced96ca0 EFLAGS: 00000293 ORIG_RAX: 0000000000000101 [ 97.807224] RAX: ffffffffffffffda RBX: 00005619d5037f80 RCX: 00007f4147b07976 [ 97.807224] RDX: 0000000000000002 RSI: 00005619d46b969c RDI: 00000000ffffff9c [ 98.040039] RBP: 0000000000000024 R08: 0000000000000000 R09: 0000000000000000 [ 98.040039] R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000000024 [ 98.040039] R13: 0000000000000012 R14: 00005619d5035950 R15: 0000000000000012 [ 98.040039] Modules linked in: nct6775 hwmon_vid algif_skcipher af_alg nls_iso8859_1 nls_cp437 vfat fat uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_common arc4 videodev media snd_usb_audio snd_hda_codec_hdmi snd_usbmidi_lib snd_rawmidi snd_seq_device mousedev input_leds iwlmvm mac80211 snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec edac_mce_amd kvm_amd snd_hda_core kvm iwlwifi snd_hwdep r8169 wmi_bmof cfg80211 snd_pcm irqbypass snd_timer snd libphy soundcore pinctrl_amd rfkill pcspkr sp5100_tco evdev gpio_amdpt k10temp mac_hid i2c_piix4 wmi pcc_cpufreq acpi_cpufreq vboxnetflt(OE) vboxnetadp(OE) vboxpci(OE) vboxdrv(OE) msr sg crypto_user ip_tables x_tables ext4 crc32c_generic crc16 mbcache jbd2 fscrypto uas usb_storage dm_crypt hid_generic usbhid hid [ 98.040039] dm_mod raid1 md_mod sd_mod crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel pcbc ahci libahci aesni_intel aes_x86_64 libata crypto_simd cryptd glue_helper ccp xhci_pci rng_core scsi_mod xhci_hcd nvidia_drm(POE) drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm agpgart nvidia_uvm(POE) nvidia_modeset(POE) nvidia(POE) ipmi_devintf ipmi_msghandler [ 98.040039] CR2: 0000000000000080 [ 98.040039] ---[ end trace 3b65093b6fe62b2f ]--- [ 98.040039] RIP: 0010:drm_lease_owner+0xd/0x20 [drm] [ 98.040039] Code: 83 c4 18 5b 5d c3 b8 ea ff ff ff eb e2 b8 ed ff ff ff eb db e8 b4 ca 68 fb 0f 1f 40 00 0f 1f 44 00 00 48 89 f8 eb 03 48 89 d0 <48> 8b 90 80 00 00 00 48 85 d2 75 f1 c3 66 0f 1f 44 00 00 0f 1f 44 [ 98.040039] RSP: 0018:ffffb8cf08e07bb0 EFLAGS: 00010202 [ 98.040039] RAX: 0000000000000000 RBX: ffff9cf0f2586c00 RCX: ffff9cf0f2586c88 [ 98.040039] RDX: ffff9cf0ddbd8000 RSI: 0000000000000000 RDI: 0000000000000000 [ 98.040039] RBP: ffff9cf1040e9800 R08: 0000000000000000 R09: 0000000000000000 [ 98.040039] R10: ffffdeb30fd5d680 R11: ffffdeb30f5d6808 R12: ffff9cf1040e9888 [ 98.040039] R13: 0000000000000000 R14: dead000000000200 R15: ffff9cf0f2586cc8 [ 98.040039] FS: 00007f4145513180(0000) GS:ffff9cf10ea00000(0000) knlGS:0000000000000000 [ 98.040039] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 98.040039] CR2: 0000000000000080 CR3: 00000003d7548000 CR4: 00000000003406f0 Signed-off-by: Sergio Correia Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20181122053329.2692-1-sergio@correia.cc Signed-off-by: Sean Paul Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_auth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c index 7ff697389d74..fe85d041d0ba 100644 --- a/drivers/gpu/drm/drm_auth.c +++ b/drivers/gpu/drm/drm_auth.c @@ -133,6 +133,7 @@ static int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv) lockdep_assert_held_once(&dev->master_mutex); + WARN_ON(fpriv->is_master); old_master = fpriv->master; fpriv->master = drm_master_create(dev); if (!fpriv->master) { @@ -161,6 +162,7 @@ out_err: /* drop references and restore old master on failure */ drm_master_put(&fpriv->master); fpriv->master = old_master; + fpriv->is_master = 0; return ret; } From 9576f5215818f54c299eb7880e396ad75fc777fc Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Sat, 24 Nov 2018 14:12:38 -0500 Subject: [PATCH 0870/2608] drm/meson: Enable fast_io in meson_dw_hdmi_regmap_config commit 995b278e4723b26f8ebf0e7c119286d16c712747 upstream. Seeing as we use this registermap in the context of our IRQ handlers, we need to be using spinlocks for reading/writing registers so that we can still read them from IRQ handlers without having to grab any mutexes and accidentally sleep. We don't currently do this, as pointed out by lockdep: [ 18.403770] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:908 [ 18.406744] in_atomic(): 1, irqs_disabled(): 128, pid: 68, name: kworker/u17:0 [ 18.413864] INFO: lockdep is turned off. [ 18.417675] irq event stamp: 12 [ 18.420778] hardirqs last enabled at (11): [] _raw_spin_unlock_irq+0x2c/0x60 [ 18.429510] hardirqs last disabled at (12): [] __schedule+0xc4/0xa60 [ 18.437345] softirqs last enabled at (0): [] copy_process.isra.4.part.5+0x4d8/0x1c50 [ 18.446684] softirqs last disabled at (0): [<0000000000000000>] (null) [ 18.453979] CPU: 0 PID: 68 Comm: kworker/u17:0 Tainted: G W O 4.20.0-rc3Lyude-Test+ #9 [ 18.469839] Hardware name: amlogic khadas-vim2/khadas-vim2, BIOS 2018.07-rc2-armbian 09/11/2018 [ 18.480037] Workqueue: hci0 hci_power_on [bluetooth] [ 18.487138] Call trace: [ 18.494192] dump_backtrace+0x0/0x1b8 [ 18.501280] show_stack+0x14/0x20 [ 18.508361] dump_stack+0xbc/0xf4 [ 18.515427] ___might_sleep+0x140/0x1d8 [ 18.522515] __might_sleep+0x50/0x88 [ 18.529582] __mutex_lock+0x60/0x870 [ 18.536621] mutex_lock_nested+0x1c/0x28 [ 18.543660] regmap_lock_mutex+0x10/0x18 [ 18.550696] regmap_read+0x38/0x70 [ 18.557727] dw_hdmi_hardirq+0x58/0x138 [dw_hdmi] [ 18.564804] __handle_irq_event_percpu+0xac/0x410 [ 18.571891] handle_irq_event_percpu+0x34/0x88 [ 18.578982] handle_irq_event+0x48/0x78 [ 18.586051] handle_fasteoi_irq+0xac/0x160 [ 18.593061] generic_handle_irq+0x24/0x38 [ 18.599989] __handle_domain_irq+0x60/0xb8 [ 18.606857] gic_handle_irq+0x50/0xa0 [ 18.613659] el1_irq+0xb4/0x130 [ 18.620394] debug_lockdep_rcu_enabled+0x2c/0x30 [ 18.627111] schedule+0x38/0xa0 [ 18.633781] schedule_timeout+0x3a8/0x510 [ 18.640389] wait_for_common+0x15c/0x180 [ 18.646905] wait_for_completion+0x14/0x20 [ 18.653319] mmc_wait_for_req_done+0x28/0x168 [ 18.659693] mmc_wait_for_req+0xa8/0xe8 [ 18.665978] mmc_wait_for_cmd+0x64/0x98 [ 18.672180] mmc_io_rw_direct_host+0x94/0x130 [ 18.678385] mmc_io_rw_direct+0x10/0x18 [ 18.684516] sdio_enable_func+0xe8/0x1d0 [ 18.690627] btsdio_open+0x24/0xc0 [btsdio] [ 18.696821] hci_dev_do_open+0x64/0x598 [bluetooth] [ 18.703025] hci_power_on+0x50/0x270 [bluetooth] [ 18.709163] process_one_work+0x2a0/0x6e0 [ 18.715252] worker_thread+0x40/0x448 [ 18.721310] kthread+0x12c/0x130 [ 18.727326] ret_from_fork+0x10/0x1c [ 18.735555] ------------[ cut here ]------------ [ 18.741430] do not call blocking ops when !TASK_RUNNING; state=2 set at [<000000006265ec59>] wait_for_common+0x140/0x180 [ 18.752417] WARNING: CPU: 0 PID: 68 at kernel/sched/core.c:6096 __might_sleep+0x7c/0x88 [ 18.760553] Modules linked in: dm_mirror dm_region_hash dm_log dm_mod btsdio bluetooth snd_soc_hdmi_codec dw_hdmi_i2s_audio ecdh_generic brcmfmac brcmutil cfg80211 rfkill ir_nec_decoder meson_dw_hdmi(O) dw_hdmi rc_geekbox meson_rng meson_ir ao_cec rng_core rc_core cec leds_pwm efivars nfsd ip_tables x_tables crc32_generic f2fs uas meson_gxbb_wdt pwm_meson efivarfs ipv6 [ 18.799469] CPU: 0 PID: 68 Comm: kworker/u17:0 Tainted: G W O 4.20.0-rc3Lyude-Test+ #9 [ 18.808858] Hardware name: amlogic khadas-vim2/khadas-vim2, BIOS 2018.07-rc2-armbian 09/11/2018 [ 18.818045] Workqueue: hci0 hci_power_on [bluetooth] [ 18.824088] pstate: 80000085 (Nzcv daIf -PAN -UAO) [ 18.829891] pc : __might_sleep+0x7c/0x88 [ 18.835722] lr : __might_sleep+0x7c/0x88 [ 18.841256] sp : ffff000008003cb0 [ 18.846751] x29: ffff000008003cb0 x28: 0000000000000000 [ 18.852269] x27: ffff00000938e000 x26: ffff800010283000 [ 18.857726] x25: ffff800010353280 x24: ffff00000868ef50 [ 18.863166] x23: 0000000000000000 x22: 0000000000000000 [ 18.868551] x21: 0000000000000000 x20: 000000000000038c [ 18.873850] x19: ffff000008cd08c0 x18: 0000000000000010 [ 18.879081] x17: ffff000008a68cb0 x16: 0000000000000000 [ 18.884197] x15: 0000000000aaaaaa x14: 0e200e200e200e20 [ 18.889239] x13: 0000000000000001 x12: 00000000ffffffff [ 18.894261] x11: ffff000008adfa48 x10: 0000000000000001 [ 18.899517] x9 : ffff0000092a0158 x8 : 0000000000000000 [ 18.904674] x7 : ffff00000812136c x6 : 0000000000000000 [ 18.909895] x5 : 0000000000000000 x4 : 0000000000000001 [ 18.915080] x3 : 0000000000000007 x2 : 0000000000000007 [ 18.920269] x1 : 99ab8e9ebb6c8500 x0 : 0000000000000000 [ 18.925443] Call trace: [ 18.929904] __might_sleep+0x7c/0x88 [ 18.934311] __mutex_lock+0x60/0x870 [ 18.938687] mutex_lock_nested+0x1c/0x28 [ 18.943076] regmap_lock_mutex+0x10/0x18 [ 18.947453] regmap_read+0x38/0x70 [ 18.951842] dw_hdmi_hardirq+0x58/0x138 [dw_hdmi] [ 18.956269] __handle_irq_event_percpu+0xac/0x410 [ 18.960712] handle_irq_event_percpu+0x34/0x88 [ 18.965176] handle_irq_event+0x48/0x78 [ 18.969612] handle_fasteoi_irq+0xac/0x160 [ 18.974058] generic_handle_irq+0x24/0x38 [ 18.978501] __handle_domain_irq+0x60/0xb8 [ 18.982938] gic_handle_irq+0x50/0xa0 [ 18.987351] el1_irq+0xb4/0x130 [ 18.991734] debug_lockdep_rcu_enabled+0x2c/0x30 [ 18.996180] schedule+0x38/0xa0 [ 19.000609] schedule_timeout+0x3a8/0x510 [ 19.005064] wait_for_common+0x15c/0x180 [ 19.009513] wait_for_completion+0x14/0x20 [ 19.013951] mmc_wait_for_req_done+0x28/0x168 [ 19.018402] mmc_wait_for_req+0xa8/0xe8 [ 19.022809] mmc_wait_for_cmd+0x64/0x98 [ 19.027177] mmc_io_rw_direct_host+0x94/0x130 [ 19.031563] mmc_io_rw_direct+0x10/0x18 [ 19.035922] sdio_enable_func+0xe8/0x1d0 [ 19.040294] btsdio_open+0x24/0xc0 [btsdio] [ 19.044742] hci_dev_do_open+0x64/0x598 [bluetooth] [ 19.049228] hci_power_on+0x50/0x270 [bluetooth] [ 19.053687] process_one_work+0x2a0/0x6e0 [ 19.058143] worker_thread+0x40/0x448 [ 19.062608] kthread+0x12c/0x130 [ 19.067064] ret_from_fork+0x10/0x1c [ 19.071513] irq event stamp: 12 [ 19.075937] hardirqs last enabled at (11): [] _raw_spin_unlock_irq+0x2c/0x60 [ 19.083560] hardirqs last disabled at (12): [] __schedule+0xc4/0xa60 [ 19.091401] softirqs last enabled at (0): [] copy_process.isra.4.part.5+0x4d8/0x1c50 [ 19.100801] softirqs last disabled at (0): [<0000000000000000>] (null) [ 19.108135] ---[ end trace 38c4920787b88c75 ]--- So, fix this by enabling the fast_io option in our regmap config so that regmap uses spinlocks for locking instead of mutexes. Signed-off-by: Lyude Paul Fixes: 3f68be7d8e96 ("drm/meson: Add support for HDMI encoder and DW-HDMI bridge + PHY") Cc: Daniel Vetter Cc: Neil Armstrong Cc: Carlo Caione Cc: Kevin Hilman Cc: dri-devel@lists.freedesktop.org Cc: linux-amlogic@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Cc: # v4.12+ Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20181124191238.28276-1-lyude@redhat.com Signed-off-by: Sean Paul Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/meson/meson_dw_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c index cef414466f9f..e5f7a7cf48fa 100644 --- a/drivers/gpu/drm/meson/meson_dw_hdmi.c +++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c @@ -697,6 +697,7 @@ static const struct regmap_config meson_dw_hdmi_regmap_config = { .reg_read = meson_dw_hdmi_reg_read, .reg_write = meson_dw_hdmi_reg_write, .max_register = 0x10000, + .fast_io = true, }; static bool meson_hdmi_connector_is_available(struct device *dev) From a88a1b3c215beee3f78ee9b8981518381fa073d6 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Sat, 24 Nov 2018 20:21:17 -0500 Subject: [PATCH 0871/2608] drm/meson: Fix OOB memory accesses in meson_viu_set_osd_lut() commit 97b2a3180a559a33852ac0cd77904166069484fd upstream. Currently on driver bringup with KASAN enabled, meson triggers an OOB memory access as shown below: [ 117.904528] ================================================================== [ 117.904560] BUG: KASAN: global-out-of-bounds in meson_viu_set_osd_lut+0x7a0/0x890 [ 117.904588] Read of size 4 at addr ffff20000a63ce24 by task systemd-udevd/498 [ 117.904601] [ 118.083372] CPU: 4 PID: 498 Comm: systemd-udevd Not tainted 4.20.0-rc3Lyude-Test+ #20 [ 118.091143] Hardware name: amlogic khadas-vim2/khadas-vim2, BIOS 2018.07-rc2-armbian 09/11/2018 [ 118.099768] Call trace: [ 118.102181] dump_backtrace+0x0/0x3e8 [ 118.105796] show_stack+0x14/0x20 [ 118.109083] dump_stack+0x130/0x1c4 [ 118.112539] print_address_description+0x60/0x25c [ 118.117214] kasan_report+0x1b4/0x368 [ 118.120851] __asan_report_load4_noabort+0x18/0x20 [ 118.125566] meson_viu_set_osd_lut+0x7a0/0x890 [ 118.129953] meson_viu_init+0x10c/0x290 [ 118.133741] meson_drv_bind_master+0x474/0x748 [ 118.138141] meson_drv_bind+0x10/0x18 [ 118.141760] try_to_bring_up_master+0x3d8/0x768 [ 118.146249] component_add+0x214/0x570 [ 118.149978] meson_dw_hdmi_probe+0x18/0x20 [meson_dw_hdmi] [ 118.155404] platform_drv_probe+0x98/0x138 [ 118.159455] really_probe+0x2a0/0xa70 [ 118.163070] driver_probe_device+0x1b4/0x2d8 [ 118.167299] __driver_attach+0x200/0x280 [ 118.171189] bus_for_each_dev+0x10c/0x1a8 [ 118.175144] driver_attach+0x38/0x50 [ 118.178681] bus_add_driver+0x330/0x608 [ 118.182471] driver_register+0x140/0x388 [ 118.186361] __platform_driver_register+0xc8/0x108 [ 118.191117] meson_dw_hdmi_platform_driver_init+0x1c/0x1000 [meson_dw_hdmi] [ 118.198022] do_one_initcall+0x12c/0x3bc [ 118.201883] do_init_module+0x1fc/0x638 [ 118.205673] load_module+0x4b4c/0x6808 [ 118.209387] __se_sys_init_module+0x2e8/0x3c0 [ 118.213699] __arm64_sys_init_module+0x68/0x98 [ 118.218100] el0_svc_common+0x104/0x210 [ 118.221893] el0_svc_handler+0x48/0xb8 [ 118.225594] el0_svc+0x8/0xc [ 118.228429] [ 118.229887] The buggy address belongs to the variable: [ 118.235007] eotf_33_linear_mapping+0x84/0xc0 [ 118.239301] [ 118.240752] Memory state around the buggy address: [ 118.245522] ffff20000a63cd00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 118.252695] ffff20000a63cd80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 118.259850] >ffff20000a63ce00: 00 00 00 00 04 fa fa fa fa fa fa fa 00 00 00 00 [ 118.267000] ^ [ 118.271222] ffff20000a63ce80: 00 fa fa fa fa fa fa fa 00 00 00 00 00 00 00 00 [ 118.278393] ffff20000a63cf00: 00 00 00 00 00 00 00 00 00 00 00 00 04 fa fa fa [ 118.285542] ================================================================== [ 118.292699] Disabling lock debugging due to kernel taint It seems that when looping through the OSD EOTF LUT maps, we use the same max iterator for OETF: 20. This is wrong though, since 20*2 is 40, which means that we'll stop out of bounds on the EOTF maps. But, this whole thing is already confusing enough to read through as-is, so let's just replace all of the hardcoded sizes with OSD_(OETF/EOTF)_LUT_SIZE / 2. Signed-off-by: Lyude Paul Fixes: bbbe775ec5b5 ("drm: Add support for Amlogic Meson Graphic Controller") Cc: Neil Armstrong Cc: Maxime Ripard Cc: Carlo Caione Cc: Kevin Hilman Cc: dri-devel@lists.freedesktop.org Cc: linux-amlogic@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Cc: # v4.10+ Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20181125012117.31915-1-lyude@redhat.com Signed-off-by: Sean Paul Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/meson/meson_viu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c index 6bcfa527c180..26a0857878bf 100644 --- a/drivers/gpu/drm/meson/meson_viu.c +++ b/drivers/gpu/drm/meson/meson_viu.c @@ -184,18 +184,18 @@ void meson_viu_set_osd_lut(struct meson_drm *priv, enum viu_lut_sel_e lut_sel, if (lut_sel == VIU_LUT_OSD_OETF) { writel(0, priv->io_base + _REG(addr_port)); - for (i = 0; i < 20; i++) + for (i = 0; i < (OSD_OETF_LUT_SIZE / 2); i++) writel(r_map[i * 2] | (r_map[i * 2 + 1] << 16), priv->io_base + _REG(data_port)); writel(r_map[OSD_OETF_LUT_SIZE - 1] | (g_map[0] << 16), priv->io_base + _REG(data_port)); - for (i = 0; i < 20; i++) + for (i = 0; i < (OSD_OETF_LUT_SIZE / 2); i++) writel(g_map[i * 2 + 1] | (g_map[i * 2 + 2] << 16), priv->io_base + _REG(data_port)); - for (i = 0; i < 20; i++) + for (i = 0; i < (OSD_OETF_LUT_SIZE / 2); i++) writel(b_map[i * 2] | (b_map[i * 2 + 1] << 16), priv->io_base + _REG(data_port)); @@ -211,18 +211,18 @@ void meson_viu_set_osd_lut(struct meson_drm *priv, enum viu_lut_sel_e lut_sel, } else if (lut_sel == VIU_LUT_OSD_EOTF) { writel(0, priv->io_base + _REG(addr_port)); - for (i = 0; i < 20; i++) + for (i = 0; i < (OSD_EOTF_LUT_SIZE / 2); i++) writel(r_map[i * 2] | (r_map[i * 2 + 1] << 16), priv->io_base + _REG(data_port)); writel(r_map[OSD_EOTF_LUT_SIZE - 1] | (g_map[0] << 16), priv->io_base + _REG(data_port)); - for (i = 0; i < 20; i++) + for (i = 0; i < (OSD_EOTF_LUT_SIZE / 2); i++) writel(g_map[i * 2 + 1] | (g_map[i * 2 + 2] << 16), priv->io_base + _REG(data_port)); - for (i = 0; i < 20; i++) + for (i = 0; i < (OSD_EOTF_LUT_SIZE / 2); i++) writel(b_map[i * 2] | (b_map[i * 2 + 1] << 16), priv->io_base + _REG(data_port)); From 82c5a8c0debac552750a00b4fc7551c89c7b34b8 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 30 Nov 2018 14:09:25 -0800 Subject: [PATCH 0872/2608] userfaultfd: use ENOENT instead of EFAULT if the atomic copy user fails commit 9e368259ad988356c4c95150fafd1a06af095d98 upstream. Patch series "userfaultfd shmem updates". Jann found two bugs in the userfaultfd shmem MAP_SHARED backend: the lack of the VM_MAYWRITE check and the lack of i_size checks. Then looking into the above we also fixed the MAP_PRIVATE case. Hugh by source review also found a data loss source if UFFDIO_COPY is used on shmem MAP_SHARED PROT_READ mappings (the production usages incidentally run with PROT_READ|PROT_WRITE, so the data loss couldn't happen in those production usages like with QEMU). The whole patchset is marked for stable. We verified QEMU postcopy live migration with guest running on shmem MAP_PRIVATE run as well as before after the fix of shmem MAP_PRIVATE. Regardless if it's shmem or hugetlbfs or MAP_PRIVATE or MAP_SHARED, QEMU unconditionally invokes a punch hole if the guest mapping is filebacked and a MADV_DONTNEED too (needed to get rid of the MAP_PRIVATE COWs and for the anon backend). This patch (of 5): We internally used EFAULT to communicate with the caller, switch to ENOENT, so EFAULT can be used as a non internal retval. Link: http://lkml.kernel.org/r/20181126173452.26955-2-aarcange@redhat.com Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support") Signed-off-by: Andrea Arcangeli Reviewed-by: Mike Rapoport Reviewed-by: Hugh Dickins Cc: Mike Kravetz Cc: Jann Horn Cc: Peter Xu Cc: "Dr. David Alan Gilbert" Cc: Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 2 +- mm/shmem.c | 2 +- mm/userfaultfd.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f46040aed2da..224cdd953a79 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4037,7 +4037,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, /* fallback to copy_from_user outside mmap_sem */ if (unlikely(ret)) { - ret = -EFAULT; + ret = -ENOENT; *pagep = page; /* don't free the page */ goto out; diff --git a/mm/shmem.c b/mm/shmem.c index ab7ff0aeae2d..9f856ecda73b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2266,7 +2266,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, *pagep = page; shmem_inode_unacct_blocks(inode, 1); /* don't free the page */ - return -EFAULT; + return -ENOENT; } } else { /* mfill_zeropage_atomic */ clear_highpage(page); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 81192701964d..c63c0fc5ecfa 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -49,7 +49,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, /* fallback to copy_from_user outside mmap_sem */ if (unlikely(ret)) { - ret = -EFAULT; + ret = -ENOENT; *pagep = page; /* don't free the page */ goto out; @@ -275,7 +275,7 @@ retry: cond_resched(); - if (unlikely(err == -EFAULT)) { + if (unlikely(err == -ENOENT)) { up_read(&dst_mm->mmap_sem); BUG_ON(!page); @@ -521,7 +521,7 @@ retry: src_addr, &page, zeropage); cond_resched(); - if (unlikely(err == -EFAULT)) { + if (unlikely(err == -ENOENT)) { void *page_kaddr; up_read(&dst_mm->mmap_sem); From 683b47330c498a7bb90b1e863f63c9b5035dbba9 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 30 Nov 2018 14:09:28 -0800 Subject: [PATCH 0873/2608] userfaultfd: shmem: allocate anonymous memory for MAP_PRIVATE shmem commit 5b51072e97d587186c2f5390c8c9c1fb7e179505 upstream. Userfaultfd did not create private memory when UFFDIO_COPY was invoked on a MAP_PRIVATE shmem mapping. Instead it wrote to the shmem file, even when that had not been opened for writing. Though, fortunately, that could only happen where there was a hole in the file. Fix the shmem-backed implementation of UFFDIO_COPY to create private memory for MAP_PRIVATE mappings. The hugetlbfs-backed implementation was already correct. This change is visible to userland, if userfaultfd has been used in unintended ways: so it introduces a small risk of incompatibility, but is necessary in order to respect file permissions. An app that uses UFFDIO_COPY for anything like postcopy live migration won't notice the difference, and in fact it'll run faster because there will be no copy-on-write and memory waste in the tmpfs pagecache anymore. Userfaults on MAP_PRIVATE shmem keep triggering only on file holes like before. The real zeropage can also be built on a MAP_PRIVATE shmem mapping through UFFDIO_ZEROPAGE and that's safe because the zeropage pte is never dirty, in turn even an mprotect upgrading the vma permission from PROT_READ to PROT_READ|PROT_WRITE won't make the zeropage pte writable. Link: http://lkml.kernel.org/r/20181126173452.26955-3-aarcange@redhat.com Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support") Signed-off-by: Andrea Arcangeli Reported-by: Mike Rapoport Reviewed-by: Hugh Dickins Cc: Cc: "Dr. David Alan Gilbert" Cc: Jann Horn Cc: Mike Kravetz Cc: Peter Xu Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/userfaultfd.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index c63c0fc5ecfa..cb82e50becf7 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -381,7 +381,17 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm, { ssize_t err; - if (vma_is_anonymous(dst_vma)) { + /* + * The normal page fault path for a shmem will invoke the + * fault, fill the hole in the file and COW it right away. The + * result generates plain anonymous memory. So when we are + * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll + * generate anonymous memory directly without actually filling + * the hole. For the MAP_PRIVATE case the robustness check + * only happens in the pagetable (to verify it's still none) + * and not in the radix tree. + */ + if (!(dst_vma->vm_flags & VM_SHARED)) { if (!zeropage) err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, src_addr, page); @@ -480,7 +490,8 @@ retry: * dst_vma. */ err = -ENOMEM; - if (vma_is_anonymous(dst_vma) && unlikely(anon_vma_prepare(dst_vma))) + if (!(dst_vma->vm_flags & VM_SHARED) && + unlikely(anon_vma_prepare(dst_vma))) goto out_unlock; while (src_addr < src_start + len) { From af3edb30cddfcc1eace505af10d08826a9522dbe Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 30 Nov 2018 14:09:37 -0800 Subject: [PATCH 0874/2608] userfaultfd: shmem: add i_size checks commit e2a50c1f64145a04959df2442305d57307e5395a upstream. With MAP_SHARED: recheck the i_size after taking the PT lock, to serialize against truncate with the PT lock. Delete the page from the pagecache if the i_size_read check fails. With MAP_PRIVATE: check the i_size after the PT lock before mapping anonymous memory or zeropages into the MAP_PRIVATE shmem mapping. A mostly irrelevant cleanup: like we do the delete_from_page_cache() pagecache removal after dropping the PT lock, the PT lock is a spinlock so drop it before the sleepable page lock. Link: http://lkml.kernel.org/r/20181126173452.26955-5-aarcange@redhat.com Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support") Signed-off-by: Andrea Arcangeli Reviewed-by: Mike Rapoport Reviewed-by: Hugh Dickins Reported-by: Jann Horn Cc: Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Peter Xu Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 18 ++++++++++++++++-- mm/userfaultfd.c | 26 ++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 9f856ecda73b..581e7baf0478 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2244,6 +2244,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, struct page *page; pte_t _dst_pte, *dst_pte; int ret; + pgoff_t offset, max_off; ret = -ENOMEM; if (!shmem_inode_acct_block(inode, 1)) @@ -2281,6 +2282,12 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, __SetPageSwapBacked(page); __SetPageUptodate(page); + ret = -EFAULT; + offset = linear_page_index(dst_vma, dst_addr); + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + if (unlikely(offset >= max_off)) + goto out_release; + ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false); if (ret) goto out_release; @@ -2299,8 +2306,14 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, if (dst_vma->vm_flags & VM_WRITE) _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte)); - ret = -EEXIST; dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); + + ret = -EFAULT; + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + if (unlikely(offset >= max_off)) + goto out_release_uncharge_unlock; + + ret = -EEXIST; if (!pte_none(*dst_pte)) goto out_release_uncharge_unlock; @@ -2318,13 +2331,14 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, /* No need to invalidate - it was non-present before */ update_mmu_cache(dst_vma, dst_addr, dst_pte); - unlock_page(page); pte_unmap_unlock(dst_pte, ptl); + unlock_page(page); ret = 0; out: return ret; out_release_uncharge_unlock: pte_unmap_unlock(dst_pte, ptl); + delete_from_page_cache(page); out_release_uncharge: mem_cgroup_cancel_charge(page, memcg, false); out_release: diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index cb82e50becf7..8da1cd8cea89 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -34,6 +34,8 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, void *page_kaddr; int ret; struct page *page; + pgoff_t offset, max_off; + struct inode *inode; if (!*pagep) { ret = -ENOMEM; @@ -74,8 +76,17 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, if (dst_vma->vm_flags & VM_WRITE) _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte)); - ret = -EEXIST; dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); + if (dst_vma->vm_file) { + /* the shmem MAP_PRIVATE case requires checking the i_size */ + inode = dst_vma->vm_file->f_inode; + offset = linear_page_index(dst_vma, dst_addr); + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + ret = -EFAULT; + if (unlikely(offset >= max_off)) + goto out_release_uncharge_unlock; + } + ret = -EEXIST; if (!pte_none(*dst_pte)) goto out_release_uncharge_unlock; @@ -109,11 +120,22 @@ static int mfill_zeropage_pte(struct mm_struct *dst_mm, pte_t _dst_pte, *dst_pte; spinlock_t *ptl; int ret; + pgoff_t offset, max_off; + struct inode *inode; _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr), dst_vma->vm_page_prot)); - ret = -EEXIST; dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); + if (dst_vma->vm_file) { + /* the shmem MAP_PRIVATE case requires checking the i_size */ + inode = dst_vma->vm_file->f_inode; + offset = linear_page_index(dst_vma, dst_addr); + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + ret = -EFAULT; + if (unlikely(offset >= max_off)) + goto out_unlock; + } + ret = -EEXIST; if (!pte_none(*dst_pte)) goto out_unlock; set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); From 46466e23bc1bc262c28a020df982233c309ed958 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 30 Nov 2018 14:09:43 -0800 Subject: [PATCH 0875/2608] userfaultfd: shmem: UFFDIO_COPY: set the page dirty if VM_WRITE is not set commit dcf7fe9d89763a28e0f43975b422ff141fe79e43 upstream. Set the page dirty if VM_WRITE is not set because in such case the pte won't be marked dirty and the page would be reclaimed without writepage (i.e. swapout in the shmem case). This was found by source review. Most apps (certainly including QEMU) only use UFFDIO_COPY on PROT_READ|PROT_WRITE mappings or the app can't modify the memory in the first place. This is for correctness and it could help the non cooperative use case to avoid unexpected data loss. Link: http://lkml.kernel.org/r/20181126173452.26955-6-aarcange@redhat.com Reviewed-by: Hugh Dickins Cc: stable@vger.kernel.org Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support") Reported-by: Hugh Dickins Signed-off-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Jann Horn Cc: Mike Kravetz Cc: Mike Rapoport Cc: Peter Xu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mm/shmem.c b/mm/shmem.c index 581e7baf0478..6c10f1d92251 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2305,6 +2305,16 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, _dst_pte = mk_pte(page, dst_vma->vm_page_prot); if (dst_vma->vm_flags & VM_WRITE) _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte)); + else { + /* + * We don't set the pte dirty if the vma has no + * VM_WRITE permission, so mark the page dirty or it + * could be freed from under us. We could do it + * unconditionally before unlock_page(), but doing it + * only if VM_WRITE is not set is faster. + */ + set_page_dirty(page); + } dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); @@ -2338,6 +2348,7 @@ out: return ret; out_release_uncharge_unlock: pte_unmap_unlock(dst_pte, ptl); + ClearPageDirty(page); delete_from_page_cache(page); out_release_uncharge: mem_cgroup_cancel_charge(page, memcg, false); From 6ea54af90988bf869e57265723c33af623b4abec Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Mon, 27 Nov 2017 23:47:35 +0100 Subject: [PATCH 0876/2608] scsi: scsi_devinfo: cleanly zero-pad devinfo strings commit 81df022b688d43d2a3667518b2f755d384397910 upstream. Cleanly fill memory for "vendor" and "model" with 0-bytes for the "compatible" case rather than adding only a single 0 byte. This simplifies the devinfo code a a bit, and avoids mistakes in other places of the code (not in current upstream, but we had one such mistake in the SUSE kernel). [mkp: applied by hand and added braces] Signed-off-by: Martin Wilck Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_devinfo.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index ea947a7c2596..6b594bc7d94a 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -34,7 +34,6 @@ struct scsi_dev_info_list_table { }; -static const char spaces[] = " "; /* 16 of them */ static unsigned scsi_default_dev_flags; static LIST_HEAD(scsi_dev_info_list); static char scsi_dev_flags[256]; @@ -296,20 +295,13 @@ static void scsi_strcpy_devinfo(char *name, char *to, size_t to_length, size_t from_length; from_length = strlen(from); - strncpy(to, from, min(to_length, from_length)); - if (from_length < to_length) { - if (compatible) { - /* - * NUL terminate the string if it is short. - */ - to[from_length] = '\0'; - } else { - /* - * space pad the string if it is short. - */ - strncpy(&to[from_length], spaces, - to_length - from_length); - } + /* this zero-pads the destination */ + strncpy(to, from, to_length); + if (from_length < to_length && !compatible) { + /* + * space pad the string if it is short. + */ + memset(&to[from_length], ' ', to_length - from_length); } if (from_length > to_length) printk(KERN_WARNING "%s: %s string '%s' is too long\n", From 705a2810a314ccf06fe6190ea555e586975f9b28 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 30 Nov 2018 14:09:32 -0800 Subject: [PATCH 0877/2608] userfaultfd: shmem/hugetlbfs: only allow to register VM_MAYWRITE vmas commit 29ec90660d68bbdd69507c1c8b4e33aa299278b1 upstream. After the VMA to register the uffd onto is found, check that it has VM_MAYWRITE set before allowing registration. This way we inherit all common code checks before allowing to fill file holes in shmem and hugetlbfs with UFFDIO_COPY. The userfaultfd memory model is not applicable for readonly files unless it's a MAP_PRIVATE. Link: http://lkml.kernel.org/r/20181126173452.26955-4-aarcange@redhat.com Fixes: ff62a3421044 ("hugetlb: implement memfd sealing") Signed-off-by: Andrea Arcangeli Reviewed-by: Mike Rapoport Reviewed-by: Hugh Dickins Reported-by: Jann Horn Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support") Cc: Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Peter Xu Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/userfaultfd.c | 15 +++++++++++++++ mm/userfaultfd.c | 15 ++++++--------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 3eda623e4cb4..f92e1f2fc846 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1362,6 +1362,19 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, ret = -EINVAL; if (!vma_can_userfault(cur)) goto out_unlock; + + /* + * UFFDIO_COPY will fill file holes even without + * PROT_WRITE. This check enforces that if this is a + * MAP_SHARED, the process has write permission to the backing + * file. If VM_MAYWRITE is set it also enforces that on a + * MAP_SHARED vma: there is no F_WRITE_SEAL and no further + * F_WRITE_SEAL can be taken until the vma is destroyed. + */ + ret = -EPERM; + if (unlikely(!(cur->vm_flags & VM_MAYWRITE))) + goto out_unlock; + /* * If this vma contains ending address, and huge pages * check alignment. @@ -1407,6 +1420,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, BUG_ON(!vma_can_userfault(vma)); BUG_ON(vma->vm_userfaultfd_ctx.ctx && vma->vm_userfaultfd_ctx.ctx != ctx); + WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); /* * Nothing to do: this vma is already registered into this @@ -1553,6 +1567,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, cond_resched(); BUG_ON(!vma_can_userfault(vma)); + WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); /* * Nothing to do: this vma is already registered into this diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 8da1cd8cea89..5d70fdbd8bc0 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -228,8 +228,9 @@ retry: if (!dst_vma || !is_vm_hugetlb_page(dst_vma)) goto out_unlock; /* - * Only allow __mcopy_atomic_hugetlb on userfaultfd - * registered ranges. + * Check the vma is registered in uffd, this is + * required to enforce the VM_MAYWRITE check done at + * uffd registration time. */ if (!dst_vma->vm_userfaultfd_ctx.ctx) goto out_unlock; @@ -472,13 +473,9 @@ retry: if (!dst_vma) goto out_unlock; /* - * Be strict and only allow __mcopy_atomic on userfaultfd - * registered ranges to prevent userland errors going - * unnoticed. As far as the VM consistency is concerned, it - * would be perfectly safe to remove this check, but there's - * no useful usage for __mcopy_atomic ouside of userfaultfd - * registered ranges. This is after all why these are ioctls - * belonging to the userfaultfd and not syscalls. + * Check the vma is registered in uffd, this is required to + * enforce the VM_MAYWRITE check done at uffd registration + * time. */ if (!dst_vma->vm_userfaultfd_ctx.ctx) goto out_unlock; From 6bf8c9fc962e5586397c9a3bbed9b806b1cdab55 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 26 Jul 2018 14:58:03 +0200 Subject: [PATCH 0878/2608] ALSA: trident: Suppress gcc string warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d6b340d7cb33c816ef4abe8143764ec5ab14a5cc upstream. The meddlesome gcc warns about the possible shortname string in trident driver code: sound/pci/trident/trident.c: In function ‘snd_trident_probe’: sound/pci/trident/trident.c:126:2: warning: ‘strcat’ accessing 17 or more bytes at offsets 36 and 20 may overlap 1 byte at offset 36 [-Wrestrict] strcat(card->shortname, card->driver); It happens since gcc calculates the possible string size from card->driver, but this can't be true since we did set the string just before that, and they are much shorter. For shutting it up, use the exactly same string set to card->driver for strcat() to card->shortname, too. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/trident/trident.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/trident/trident.c b/sound/pci/trident/trident.c index cedf13b64803..2f18b1cdc2cd 100644 --- a/sound/pci/trident/trident.c +++ b/sound/pci/trident/trident.c @@ -123,7 +123,7 @@ static int snd_trident_probe(struct pci_dev *pci, } else { strcpy(card->shortname, "Trident "); } - strcat(card->shortname, card->driver); + strcat(card->shortname, str); sprintf(card->longname, "%s PCI Audio at 0x%lx, irq %d", card->shortname, trident->port, trident->irq); From a293ef5e92b523f899d5d58bd9bfc26f2ea1ee3e Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 10 Sep 2018 16:20:14 -0700 Subject: [PATCH 0879/2608] kgdboc: Fix restrict error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2dd453168643d9475028cd867c57e65956a0f7f9 upstream. There's an error when compiled with restrict: drivers/tty/serial/kgdboc.c: In function ‘configure_kgdboc’: drivers/tty/serial/kgdboc.c:137:2: error: ‘strcpy’ source argument is the same as destination [-Werror=restrict] strcpy(config, opt); ^~~~~~~~~~~~~~~~~~~ As the error implies, this is from trying to use config as both source and destination. Drop the call to the function where config is the argument since nothing else happens in the function. Signed-off-by: Laura Abbott Reviewed-by: Daniel Thompson Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/kgdboc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index 2db68dfe497d..da020eea7bb5 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -162,15 +162,13 @@ static int configure_kgdboc(void) { struct tty_driver *p; int tty_line = 0; - int err; + int err = -ENODEV; char *cptr = config; struct console *cons; - err = kgdboc_option_setup(config); - if (err || !strlen(config) || isspace(config[0])) + if (!strlen(config) || isspace(config[0])) goto noconfig; - err = -ENODEV; kgdboc_io_ops.is_console = 0; kgdb_tty_driver = NULL; From e8693a9829d1135ff6da551023de0c707394733c Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 19 Sep 2018 18:59:01 -0700 Subject: [PATCH 0880/2608] kgdboc: Fix warning with module build commit 1cd25cbb2fedbc777f3a8c3cb1ba69b645aeaa64 upstream. After 2dd453168643 ("kgdboc: Fix restrict error"), kgdboc_option_setup is now only used when built in, resulting in a warning when compiled as a module: drivers/tty/serial/kgdboc.c:134:12: warning: 'kgdboc_option_setup' defined but not used [-Wunused-function] static int kgdboc_option_setup(char *opt) ^~~~~~~~~~~~~~~~~~~ Move the function under the appropriate ifdef for builtin only. Fixes: 2dd453168643 ("kgdboc: Fix restrict error") Reported-by: Stephen Rothwell Signed-off-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/kgdboc.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index da020eea7bb5..c448225ef5ca 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -131,24 +131,6 @@ static void kgdboc_unregister_kbd(void) #define kgdboc_restore_input() #endif /* ! CONFIG_KDB_KEYBOARD */ -static int kgdboc_option_setup(char *opt) -{ - if (!opt) { - pr_err("kgdboc: config string not provided\n"); - return -EINVAL; - } - - if (strlen(opt) >= MAX_CONFIG_LEN) { - printk(KERN_ERR "kgdboc: config string too long\n"); - return -ENOSPC; - } - strcpy(config, opt); - - return 0; -} - -__setup("kgdboc=", kgdboc_option_setup); - static void cleanup_kgdboc(void) { if (kgdb_unregister_nmi_console()) @@ -316,6 +298,25 @@ static struct kgdb_io kgdboc_io_ops = { }; #ifdef CONFIG_KGDB_SERIAL_CONSOLE +static int kgdboc_option_setup(char *opt) +{ + if (!opt) { + pr_err("config string not provided\n"); + return -EINVAL; + } + + if (strlen(opt) >= MAX_CONFIG_LEN) { + pr_err("config string too long\n"); + return -ENOSPC; + } + strcpy(config, opt); + + return 0; +} + +__setup("kgdboc=", kgdboc_option_setup); + + /* This is only available if kgdboc is a built in for early debugging */ static int __init kgdboc_early_init(char *opt) { From b002e9dc214f0f917ce1b4df753c1da0b8a9ac84 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 12 Nov 2018 12:23:14 +0000 Subject: [PATCH 0881/2608] svm: Add mutex_lock to protect apic_access_page_done on AMD systems commit 30510387a5e45bfcf8190e03ec7aa15b295828e2 upstream. There is a race condition when accessing kvm->arch.apic_access_page_done. Due to it, x86_set_memory_region will fail when creating the second vcpu for a svm guest. Add a mutex_lock to serialize the accesses to apic_access_page_done. This lock is also used by vmx for the same purpose. Signed-off-by: Wei Wang Signed-off-by: Amadeusz Juskowiak Signed-off-by: Julian Stecklina Signed-off-by: Suravee Suthikulpanit Reviewed-by: Joerg Roedel Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 17f08db34547..4dc79d139810 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1399,20 +1399,23 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, static int avic_init_access_page(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - int ret; + int ret = 0; + mutex_lock(&kvm->slots_lock); if (kvm->arch.apic_access_page_done) - return 0; + goto out; - ret = x86_set_memory_region(kvm, - APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, - APIC_DEFAULT_PHYS_BASE, - PAGE_SIZE); + ret = __x86_set_memory_region(kvm, + APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, + APIC_DEFAULT_PHYS_BASE, + PAGE_SIZE); if (ret) - return ret; + goto out; kvm->arch.apic_access_page_done = true; - return 0; +out: + mutex_unlock(&kvm->slots_lock); + return ret; } static int avic_init_backing_page(struct kvm_vcpu *vcpu) From 84ee7f89237f096a6fd2906bb7d909d3ba8a0432 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Aug 2018 10:21:47 +0200 Subject: [PATCH 0882/2608] drm/msm: fix OF child-node lookup commit f9a7082327e26f54067a49cac2316d31e0cc8ba7 upstream. Use the new of_get_compatible_child() helper to lookup the legacy pwrlevels child node instead of using of_find_compatible_node(), which searches the entire tree from a given start node and thus can return an unrelated (i.e. non-child) node. This also addresses a potential use-after-free (e.g. after probe deferral) as the tree-wide helper drops a reference to its first argument (i.e. the probed device's node). While at it, also fix the related child-node reference leak. Fixes: e2af8b6b0ca1 ("drm/msm: gpu: Use OPP tables if we can") Cc: stable # 4.12 Cc: Jordan Crouse Cc: Rob Clark Cc: David Airlie Signed-off-by: Johan Hovold Signed-off-by: Rob Herring [ johan: backport to 4.14 ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/adreno/adreno_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index c75c4df4bc39..2c379774d3f2 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -223,8 +223,7 @@ static int adreno_get_legacy_pwrlevels(struct device *dev) struct device_node *child, *node; int ret; - node = of_find_compatible_node(dev->of_node, NULL, - "qcom,gpu-pwrlevels"); + node = of_get_compatible_child(dev->of_node, "qcom,gpu-pwrlevels"); if (!node) { dev_err(dev, "Could not find the GPU powerlevels\n"); return -ENXIO; @@ -245,6 +244,8 @@ static int adreno_get_legacy_pwrlevels(struct device *dev) dev_pm_opp_add(dev, val, 0); } + of_node_put(node); + return 0; } From cf980cebed0313ef417ffa845fd92b7f00184e5e Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Thu, 29 Nov 2018 10:09:33 -0800 Subject: [PATCH 0883/2608] Input: xpad - quirk all PDP Xbox One gamepads commit a6754fae1e66e9a40fed406290d7ca3f2b4d227c upstream. Since we continue to find tons of new variants [0,1,2,3,4,5,6] that need the PDP quirk, let's just quirk all devices from PDP. [0]: https://github.com/paroj/xpad/pull/104 [1]: https://github.com/paroj/xpad/pull/105 [2]: https://github.com/paroj/xpad/pull/108 [3]: https://github.com/paroj/xpad/pull/109 [4]: https://github.com/paroj/xpad/pull/112 [5]: https://github.com/paroj/xpad/pull/115 [6]: https://github.com/paroj/xpad/pull/116 Fixes: e5c9c6a885fa ("Input: xpad - add support for PDP Xbox One controllers") Cc: stable@vger.kernel.org Signed-off-by: Cameron Gutman Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 2e52015634f9..f55dcdf99bc5 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -483,18 +483,18 @@ static const u8 xboxone_hori_init[] = { }; /* - * This packet is required for some of the PDP pads to start + * This packet is required for most (all?) of the PDP pads to start * sending input reports. These pads include: (0x0e6f:0x02ab), - * (0x0e6f:0x02a4). + * (0x0e6f:0x02a4), (0x0e6f:0x02a6). */ static const u8 xboxone_pdp_init1[] = { 0x0a, 0x20, 0x00, 0x03, 0x00, 0x01, 0x14 }; /* - * This packet is required for some of the PDP pads to start + * This packet is required for most (all?) of the PDP pads to start * sending input reports. These pads include: (0x0e6f:0x02ab), - * (0x0e6f:0x02a4). + * (0x0e6f:0x02a4), (0x0e6f:0x02a6). */ static const u8 xboxone_pdp_init2[] = { 0x06, 0x20, 0x00, 0x02, 0x01, 0x00 @@ -530,12 +530,8 @@ static const struct xboxone_init_packet xboxone_init_packets[] = { XBOXONE_INIT_PKT(0x0e6f, 0x0165, xboxone_hori_init), XBOXONE_INIT_PKT(0x0f0d, 0x0067, xboxone_hori_init), XBOXONE_INIT_PKT(0x0000, 0x0000, xboxone_fw2015_init), - XBOXONE_INIT_PKT(0x0e6f, 0x02ab, xboxone_pdp_init1), - XBOXONE_INIT_PKT(0x0e6f, 0x02ab, xboxone_pdp_init2), - XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init1), - XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init2), - XBOXONE_INIT_PKT(0x0e6f, 0x02a6, xboxone_pdp_init1), - XBOXONE_INIT_PKT(0x0e6f, 0x02a6, xboxone_pdp_init2), + XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_init1), + XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_init2), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init), From 6dc574d0cac6e71c404ea031a9b59b9e817e05b2 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Sat, 24 Nov 2018 23:28:10 -0800 Subject: [PATCH 0884/2608] Input: synaptics - add PNP ID for ThinkPad P50 to SMBus commit 9df39bedbf292680655c6a947c77d6562c693d4a upstream. Noticed the other day the trackpoint felt different on my P50, then realized it was because rmi4 wasn't loading for this machine automatically. Suspend/resume, hibernate, and everything else seem to work perfectly fine on here. Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 6f36e2d01e2e..65c9095eb517 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -170,6 +170,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0048", /* X1 Carbon 3 */ "LEN0046", /* X250 */ "LEN004a", /* W541 */ + "LEN005b", /* P50 */ "LEN0071", /* T480 */ "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ "LEN0073", /* X1 Carbon G5 (Elantech) */ From 58a3a4e82dea339ef011ff2510c11e21bc53e418 Mon Sep 17 00:00:00 2001 From: Christian Hoff Date: Mon, 12 Nov 2018 11:11:29 -0800 Subject: [PATCH 0885/2608] Input: matrix_keypad - check for errors from of_get_named_gpio() commit d55bda1b3e7c5a87f10da54fdda866a9a9cef30b upstream. "of_get_named_gpio()" returns a negative error value if it fails and drivers should check for this. This missing check was now added to the matrix_keypad driver. In my case "of_get_named_gpio()" returned -EPROBE_DEFER because the referenced GPIOs belong to an I/O expander, which was not yet probed at the point in time when the matrix_keypad driver was loading. Because the driver did not check for errors from the "of_get_named_gpio()" routine, it was assuming that "-EPROBE_DEFER" is actually a GPIO number and continued as usual, which led to further errors like this later on: WARNING: CPU: 3 PID: 167 at drivers/gpio/gpiolib.c:114 gpio_to_desc+0xc8/0xd0 invalid GPIO -517 Note that the "GPIO number" -517 in the error message above is actually "-EPROBE_DEFER". As part of the patch a misleading error message "no platform data defined" was also removed. This does not lead to information loss because the other error paths in matrix_keypad_parse_dt() already print an error. Signed-off-by: Christian Hoff Suggested-by: Sebastian Reichel Reviewed-by: Sebastian Reichel Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/keyboard/matrix_keypad.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c index 41614c185918..782dda68d93a 100644 --- a/drivers/input/keyboard/matrix_keypad.c +++ b/drivers/input/keyboard/matrix_keypad.c @@ -407,7 +407,7 @@ matrix_keypad_parse_dt(struct device *dev) struct matrix_keypad_platform_data *pdata; struct device_node *np = dev->of_node; unsigned int *gpios; - int i, nrow, ncol; + int ret, i, nrow, ncol; if (!np) { dev_err(dev, "device lacks DT data\n"); @@ -452,12 +452,19 @@ matrix_keypad_parse_dt(struct device *dev) return ERR_PTR(-ENOMEM); } - for (i = 0; i < pdata->num_row_gpios; i++) - gpios[i] = of_get_named_gpio(np, "row-gpios", i); + for (i = 0; i < nrow; i++) { + ret = of_get_named_gpio(np, "row-gpios", i); + if (ret < 0) + return ERR_PTR(ret); + gpios[i] = ret; + } - for (i = 0; i < pdata->num_col_gpios; i++) - gpios[pdata->num_row_gpios + i] = - of_get_named_gpio(np, "col-gpios", i); + for (i = 0; i < ncol; i++) { + ret = of_get_named_gpio(np, "col-gpios", i); + if (ret < 0) + return ERR_PTR(ret); + gpios[nrow + i] = ret; + } pdata->row_gpios = gpios; pdata->col_gpios = &gpios[pdata->num_row_gpios]; @@ -484,10 +491,8 @@ static int matrix_keypad_probe(struct platform_device *pdev) pdata = dev_get_platdata(&pdev->dev); if (!pdata) { pdata = matrix_keypad_parse_dt(&pdev->dev); - if (IS_ERR(pdata)) { - dev_err(&pdev->dev, "no platform data defined\n"); + if (IS_ERR(pdata)) return PTR_ERR(pdata); - } } else if (!pdata->keymap_data) { dev_err(&pdev->dev, "no keymap data defined\n"); return -EINVAL; From 4121029c4311c4e6298fc601bcb6b7dd2167b415 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 12 Nov 2018 11:23:39 -0800 Subject: [PATCH 0886/2608] Input: cros_ec_keyb - fix button/switch capability reports commit ac5722c1643a2fb75224c79b578214956d34f989 upstream. The cros_ec_keyb_bs array lists buttons and switches together, expecting that its users will match the appropriate type and bit fields. But cros_ec_keyb_register_bs() only checks the 'bit' field, which causes misreported input capabilities in some cases. For example, tablets (e.g., Scarlet -- a.k.a. Acer Chromebook Tab 10) were reporting a SW_LID capability, because EC_MKBP_POWER_BUTTON and EC_MKBP_LID_OPEN happen to share the same bit. (This has comedic effect on a tablet, in which a power-management daemon then thinks this "lid" is closed, and so puts the system to sleep as soon as it boots!) To fix this, check both the 'ev_type' and 'bit' fields before reporting the capability. Tested with a lid (Kevin / Samsung Chromebook Plus) and without a lid (Scarlet / Acer Chromebook Tab 10). This error got introduced when porting the feature from the downstream Chromium OS kernel to be upstreamed. Fixes: cdd7950e7aa4 ("input: cros_ec_keyb: Add non-matrix buttons and switches") Cc: Signed-off-by: Brian Norris Reviewed-by: Heiko Stuebner Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/keyboard/cros_ec_keyb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c index 79eb29550c34..0993b3f12df6 100644 --- a/drivers/input/keyboard/cros_ec_keyb.c +++ b/drivers/input/keyboard/cros_ec_keyb.c @@ -506,7 +506,8 @@ static int cros_ec_keyb_register_bs(struct cros_ec_keyb *ckdev) for (i = 0; i < ARRAY_SIZE(cros_ec_keyb_bs); i++) { const struct cros_ec_bs_map *map = &cros_ec_keyb_bs[i]; - if (buttons & BIT(map->bit)) + if ((map->ev_type == EV_KEY && (buttons & BIT(map->bit))) || + (map->ev_type == EV_SW && (switches & BIT(map->bit)))) input_set_capability(idev, map->ev_type, map->code); } From 1151c098512f148f409a9e6ec25b486ba3a312bf Mon Sep 17 00:00:00 2001 From: Patrick Gaskin Date: Mon, 12 Nov 2018 11:12:24 -0800 Subject: [PATCH 0887/2608] Input: elan_i2c - add ELAN0620 to the ACPI table commit 3ed64da3b790be7c63601e8ca6341b7dff74a660 upstream. Add ELAN0620 to the ACPI table to support the elan touchpad in the Lenovo IdeaPad 130-15IKB. Signed-off-by: Patrick Gaskin Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 766d30a7b085..e56223c6df8f 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1264,6 +1264,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0618", 0 }, { "ELAN061C", 0 }, { "ELAN061D", 0 }, + { "ELAN0620", 0 }, { "ELAN0622", 0 }, { "ELAN1000", 0 }, { } From ec2c95c56ba7689fd295acd6d43696846b38f4e1 Mon Sep 17 00:00:00 2001 From: Noah Westervelt Date: Thu, 29 Nov 2018 10:10:35 -0800 Subject: [PATCH 0888/2608] Input: elan_i2c - add ACPI ID for Lenovo IdeaPad 330-15ARR commit ad33429cd02565c28404bb16ae7a4c2bdfda6626 upstream. Add ELAN061E to the ACPI table to support Elan touchpad found in Lenovo IdeaPad 330-15ARR. Signed-off-by: Noah Westervelt Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index e56223c6df8f..5e20be31cd73 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1264,6 +1264,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0618", 0 }, { "ELAN061C", 0 }, { "ELAN061D", 0 }, + { "ELAN061E", 0 }, { "ELAN0620", 0 }, { "ELAN0622", 0 }, { "ELAN1000", 0 }, From 61ffe9b932037aabc00d51bc9095f4a20ea90d56 Mon Sep 17 00:00:00 2001 From: Adam Wong Date: Thu, 29 Nov 2018 10:04:35 -0800 Subject: [PATCH 0889/2608] Input: elan_i2c - add support for ELAN0621 touchpad commit bf87ade0dd7f8cf19dac4d3161d5e86abe0c062b upstream. Added the ability to detect the ELAN0621 touchpad found in some Lenovo laptops. Signed-off-by: Adam Wong Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 5e20be31cd73..368871a398a5 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1266,6 +1266,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN061D", 0 }, { "ELAN061E", 0 }, { "ELAN0620", 0 }, + { "ELAN0621", 0 }, { "ELAN0622", 0 }, { "ELAN1000", 0 }, { } From 1aadbb4a325b81989c9dd454c44b846d590cda49 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 23 Nov 2018 09:06:36 +0800 Subject: [PATCH 0890/2608] btrfs: tree-checker: Don't check max block group size as current max chunk size limit is unreliable commit 10950929e994c5ecee149ff0873388d3c98f12b5 upstream. [BUG] A completely valid btrfs will refuse to mount, with error message like: BTRFS critical (device sdb2): corrupt leaf: root=2 block=239681536 slot=172 \ bg_start=12018974720 bg_len=10888413184, invalid block group size, \ have 10888413184 expect (0, 10737418240] This has been reported several times as the 4.19 kernel is now being used. The filesystem refuses to mount, but is otherwise ok and booting 4.18 is a workaround. Btrfs check returns no error, and all kernels used on this fs is later than 2011, which should all have the 10G size limit commit. [CAUSE] For a 12 devices btrfs, we could allocate a chunk larger than 10G due to stripe stripe bump up. __btrfs_alloc_chunk() |- max_stripe_size = 1G |- max_chunk_size = 10G |- data_stripe = 11 |- if (1G * 11 > 10G) { stripe_size = 976128930; stripe_size = round_up(976128930, SZ_16M) = 989855744 However the final stripe_size (989855744) * 11 = 10888413184, which is still larger than 10G. [FIX] For the comprehensive check, we need to do the full check at chunk read time, and rely on bg <-> chunk mapping to do the check. We could just skip the length check for now. Fixes: fce466eab7ac ("btrfs: tree-checker: Verify block_group_item") Cc: stable@vger.kernel.org # v4.19+ Reported-by: Wang Yugui Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-checker.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index f206aec1525d..ebc882281b39 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -348,13 +348,11 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info, /* * Here we don't really care about alignment since extent allocator can - * handle it. We care more about the size, as if one block group is - * larger than maximum size, it's must be some obvious corruption. + * handle it. We care more about the size. */ - if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) { + if (key->offset == 0) { block_group_err(fs_info, leaf, slot, - "invalid block group size, have %llu expect (0, %llu]", - key->offset, BTRFS_MAX_DATA_CHUNK_SIZE); + "invalid block group size 0"); return -EUCLEAN; } From 335cbe4df8b0d14cd43f207932b33eb8c756beb2 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 6 Nov 2018 16:40:20 +0200 Subject: [PATCH 0891/2608] btrfs: Always try all copies when reading extent buffers commit f8397d69daef06d358430d3054662fb597e37c00 upstream. When a metadata read is served the endio routine btree_readpage_end_io_hook is called which eventually runs the tree-checker. If tree-checker fails to validate the read eb then it sets EXTENT_BUFFER_CORRUPT flag. This leads to btree_read_extent_buffer_pages wrongly assuming that all available copies of this extent buffer are wrong and failing prematurely. Fix this modify btree_read_extent_buffer_pages to read all copies of the data. This failure was exhibitted in xfstests btrfs/124 which would spuriously fail its balance operations. The reason was that when balance was run following re-introduction of the missing raid1 disk __btrfs_map_block would map the read request to stripe 0, which corresponded to devid 2 (the disk which is being removed in the test): item 2 key (FIRST_CHUNK_TREE CHUNK_ITEM 3553624064) itemoff 15975 itemsize 112 length 1073741824 owner 2 stripe_len 65536 type DATA|RAID1 io_align 65536 io_width 65536 sector_size 4096 num_stripes 2 sub_stripes 1 stripe 0 devid 2 offset 2156920832 dev_uuid 8466c350-ed0c-4c3b-b17d-6379b445d5c8 stripe 1 devid 1 offset 3553624064 dev_uuid 1265d8db-5596-477e-af03-df08eb38d2ca This caused read requests for a checksum item that to be routed to the stale disk which triggered the aforementioned logic involving EXTENT_BUFFER_CORRUPT flag. This then triggered cascading failures of the balance operation. Fixes: a826d6dcb32d ("Btrfs: check items for correctness as we search") CC: stable@vger.kernel.org # 4.4+ Suggested-by: Qu Wenruo Reviewed-by: Qu Wenruo Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e42673477c25..858d5812eb8f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -451,9 +451,9 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, int mirror_num = 0; int failed_mirror = 0; - clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree; while (1) { + clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE, btree_get_extent, mirror_num); if (!ret) { @@ -464,14 +464,6 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, ret = -EIO; } - /* - * This buffer's crc is fine, but its contents are corrupted, so - * there is no reason to read the other copies, they won't be - * any less wrong. - */ - if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) - break; - num_copies = btrfs_num_copies(fs_info, eb->start, eb->len); if (num_copies == 1) From f4069a6cf6faf910a90f469b72e27bc29e4beccd Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Fri, 30 Nov 2018 15:51:56 +0300 Subject: [PATCH 0892/2608] ARC: change defconfig defaults to ARCv2 commit b7cc40c32a8bfa6f2581a71747f6a7d491fe43ba upstream. Change the default defconfig (used with 'make defconfig') to the ARCv2 nsim_hs_defconfig, and also switch the default Kconfig ISA selection to ARCv2. This allows several default defconfigs (e.g. make defconfig, make allnoconfig, make tinyconfig) to all work with ARCv2 by default. Note since we change default architecture from ARCompact to ARCv2 it's required to explicitly mention architecture type in ARCompact defconfigs otherwise ARCv2 will be implied and binaries will be generated for ARCv2. Cc: # 4.4.x Signed-off-by: Kevin Hilman Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/Kconfig | 2 +- arch/arc/Makefile | 2 +- arch/arc/configs/axs101_defconfig | 1 + arch/arc/configs/nps_defconfig | 1 + arch/arc/configs/nsim_700_defconfig | 1 + arch/arc/configs/nsimosci_defconfig | 1 + arch/arc/configs/tb10x_defconfig | 1 + 7 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 8ff066090680..9d06c9478a0d 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -109,7 +109,7 @@ endmenu choice prompt "ARC Instruction Set" - default ISA_ARCOMPACT + default ISA_ARCV2 config ISA_ARCOMPACT bool "ARCompact ISA" diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 7c6c97782022..2917f56f0ea4 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -6,7 +6,7 @@ # published by the Free Software Foundation. # -KBUILD_DEFCONFIG := nsim_700_defconfig +KBUILD_DEFCONFIG := nsim_hs_defconfig cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index ece78630d711..77df85de1121 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -15,6 +15,7 @@ CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig index 7c9c706ae7f6..ba337d3d3a17 100644 --- a/arch/arc/configs/nps_defconfig +++ b/arch/arc/configs/nps_defconfig @@ -15,6 +15,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index b1a78222699c..cdb06417d3d9 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -16,6 +16,7 @@ CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_LBDAF is not set diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 14377b8234f7..8cbcbc185359 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -16,6 +16,7 @@ CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_LBDAF is not set diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index f30182549395..0130e29eeca1 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -19,6 +19,7 @@ CONFIG_KALLSYMS_ALL=y # CONFIG_AIO is not set CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y From 097c13e1b78d48c1a3a75dbfabc10736863056a7 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Tue, 20 Nov 2018 13:30:19 +0300 Subject: [PATCH 0893/2608] arc: [devboards] Add support of NFSv3 ACL commit 6b04114f6fae5e84d33404c2970b1949c032546e upstream. By default NFSv3 doesn't support ACL (Access Control Lists) which might be quite convenient to have so that mounted NFS behaves exactly as any other local file-system. In particular missing support of ACL makes umask useless. This among other thigs fixes Glibc's "nptl/tst-umask1". Signed-off-by: Alexey Brodkin Cc: Cupertino Miranda Cc: stable@vger.kernel.org #4.14+ Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/configs/axs101_defconfig | 1 + arch/arc/configs/axs103_defconfig | 1 + arch/arc/configs/axs103_smp_defconfig | 1 + arch/arc/configs/hsdk_defconfig | 1 + arch/arc/configs/nps_defconfig | 1 + arch/arc/configs/nsimosci_defconfig | 1 + arch/arc/configs/nsimosci_hs_defconfig | 1 + arch/arc/configs/nsimosci_hs_smp_defconfig | 1 + arch/arc/configs/vdk_hs38_defconfig | 1 + arch/arc/configs/vdk_hs38_smp_defconfig | 1 + 10 files changed, 10 insertions(+) diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index 77df85de1121..5d5ba2104ba7 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -99,6 +99,7 @@ CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 240c9251a7d4..0874db2d48a8 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -97,6 +97,7 @@ CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index af54b96abee0..cf5df0e1cb08 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -100,6 +100,7 @@ CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 762b1fcd93dc..083560e9e571 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -66,6 +66,7 @@ CONFIG_EXT3_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig index ba337d3d3a17..9121c6ba15d0 100644 --- a/arch/arc/configs/nps_defconfig +++ b/arch/arc/configs/nps_defconfig @@ -75,6 +75,7 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_ROOT_NFS=y CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 8cbcbc185359..c4577bd9196c 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -70,5 +70,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index 7e61c923a3cd..b20692c82d3c 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -68,5 +68,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index 299fbe8003b2..5ad4949af6d0 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -79,6 +79,7 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_FTRACE=y diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index 4fcf4f2503f6..4587c9af5afe 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -88,6 +88,7 @@ CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index 7b71464f6c2f..1855aa995bc9 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -92,6 +92,7 @@ CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set From 99bfa7bc16c3b5c4bf06d3885e535060dd5cfeef Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 16 Nov 2018 13:43:17 +0100 Subject: [PATCH 0894/2608] udf: Allow mounting volumes with incorrect identification strings commit b54e41f5efcb4316b2f30b30c2535cc194270373 upstream. Commit c26f6c615788 ("udf: Fix conversion of 'dstring' fields to UTF8") started to be more strict when checking whether converted strings are properly formatted. Sudip reports that there are DVDs where the volume identification string is actually too long - UDF reports: [ 632.309320] UDF-fs: incorrect dstring lengths (32/32) during mount and fails the mount. This is mostly harmless failure as we don't need volume identification (and even less volume set identification) for anything. So just truncate the volume identification string if it is too long and replace it with 'Invalid' if we just cannot convert it for other reasons. This keeps slightly incorrect media still mountable. CC: stable@vger.kernel.org Fixes: c26f6c615788 ("udf: Fix conversion of 'dstring' fields to UTF8") Reported-and-tested-by: Sudip Mukherjee Signed-off-by: Jan Kara Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- fs/udf/super.c | 16 ++++++++++------ fs/udf/unicode.c | 14 +++++++++++--- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index 9b0d6562d0a1..242d960df9a1 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -922,16 +922,20 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block) } ret = udf_dstrCS0toUTF8(outstr, 31, pvoldesc->volIdent, 32); - if (ret < 0) - goto out_bh; - - strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret); + if (ret < 0) { + strcpy(UDF_SB(sb)->s_volume_ident, "InvalidName"); + pr_warn("incorrect volume identification, setting to " + "'InvalidName'\n"); + } else { + strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret); + } udf_debug("volIdent[] = '%s'\n", UDF_SB(sb)->s_volume_ident); ret = udf_dstrCS0toUTF8(outstr, 127, pvoldesc->volSetIdent, 128); - if (ret < 0) + if (ret < 0) { + ret = 0; goto out_bh; - + } outstr[ret] = 0; udf_debug("volSetIdent[] = '%s'\n", outstr); diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 3a3be23689b3..61a1738895b7 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -341,6 +341,11 @@ try_again: return u_len; } +/* + * Convert CS0 dstring to output charset. Warning: This function may truncate + * input string if it is too long as it is used for informational strings only + * and it is better to truncate the string than to refuse mounting a media. + */ int udf_dstrCS0toUTF8(uint8_t *utf_o, int o_len, const uint8_t *ocu_i, int i_len) { @@ -349,9 +354,12 @@ int udf_dstrCS0toUTF8(uint8_t *utf_o, int o_len, if (i_len > 0) { s_len = ocu_i[i_len - 1]; if (s_len >= i_len) { - pr_err("incorrect dstring lengths (%d/%d)\n", - s_len, i_len); - return -EINVAL; + pr_warn("incorrect dstring lengths (%d/%d)," + " truncating\n", s_len, i_len); + s_len = i_len - 1; + /* 2-byte encoding? Need to round properly... */ + if (ocu_i[0] == 16) + s_len -= (s_len - 1) & 2; } } From b92cee8097d99ec641521975b5fa6eb907ab5361 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 29 Oct 2017 01:50:06 +0900 Subject: [PATCH 0895/2608] reset: make device_reset_optional() really optional commit 1554bbd4ad401b7f0f916c0891874111c10befe5 upstream. Commit bb475230b8e5 ("reset: make optional functions really optional") converted *_get_optional* functions, but device_reset_optional() was left behind. Convert it in the same way. Signed-off-by: Masahiro Yamada Signed-off-by: Philipp Zabel Cc: Dinh Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/reset/core.c | 9 +++++---- include/linux/reset.h | 28 +++++++++++++--------------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/reset/core.c b/drivers/reset/core.c index 1d21c6f7d56c..da4292e9de97 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -566,17 +566,18 @@ EXPORT_SYMBOL_GPL(__devm_reset_control_get); * device_reset - find reset controller associated with the device * and perform reset * @dev: device to be reset by the controller + * @optional: whether it is optional to reset the device * - * Convenience wrapper for reset_control_get() and reset_control_reset(). + * Convenience wrapper for __reset_control_get() and reset_control_reset(). * This is useful for the common case of devices with single, dedicated reset * lines. */ -int device_reset(struct device *dev) +int __device_reset(struct device *dev, bool optional) { struct reset_control *rstc; int ret; - rstc = reset_control_get(dev, NULL); + rstc = __reset_control_get(dev, NULL, 0, 0, optional); if (IS_ERR(rstc)) return PTR_ERR(rstc); @@ -586,7 +587,7 @@ int device_reset(struct device *dev) return ret; } -EXPORT_SYMBOL_GPL(device_reset); +EXPORT_SYMBOL_GPL(__device_reset); /** * APIs to manage an array of reset controls. diff --git a/include/linux/reset.h b/include/linux/reset.h index 4c7871ddf3c6..b681019fc04c 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -20,22 +20,16 @@ struct reset_control *__reset_control_get(struct device *dev, const char *id, int index, bool shared, bool optional); void reset_control_put(struct reset_control *rstc); +int __device_reset(struct device *dev, bool optional); struct reset_control *__devm_reset_control_get(struct device *dev, const char *id, int index, bool shared, bool optional); -int __must_check device_reset(struct device *dev); - struct reset_control *devm_reset_control_array_get(struct device *dev, bool shared, bool optional); struct reset_control *of_reset_control_array_get(struct device_node *np, bool shared, bool optional); -static inline int device_reset_optional(struct device *dev) -{ - return device_reset(dev); -} - #else static inline int reset_control_reset(struct reset_control *rstc) @@ -62,15 +56,9 @@ static inline void reset_control_put(struct reset_control *rstc) { } -static inline int __must_check device_reset(struct device *dev) +static inline int __device_reset(struct device *dev, bool optional) { - WARN_ON(1); - return -ENOTSUPP; -} - -static inline int device_reset_optional(struct device *dev) -{ - return -ENOTSUPP; + return optional ? 0 : -ENOTSUPP; } static inline struct reset_control *__of_reset_control_get( @@ -109,6 +97,16 @@ of_reset_control_array_get(struct device_node *np, bool shared, bool optional) #endif /* CONFIG_RESET_CONTROLLER */ +static inline int __must_check device_reset(struct device *dev) +{ + return __device_reset(dev, false); +} + +static inline int device_reset_optional(struct device *dev) +{ + return __device_reset(dev, true); +} + /** * reset_control_get_exclusive - Lookup and obtain an exclusive reference * to a reset controller. From d2e29b46a9cf4e733e147442a20dbba376decc26 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 29 Oct 2017 01:50:07 +0900 Subject: [PATCH 0896/2608] reset: remove remaining WARN_ON() in commit bb6c7768385b200063a14d6615cc1246c3d00760 upstream. Commit bb475230b8e5 ("reset: make optional functions really optional") gave a new meaning to _get_optional variants. The differentiation by WARN_ON() is not needed any more. We already have inconsistency about this; (devm_)reset_control_get_exclusive() has WARN_ON() check, but of_reset_control_get_exclusive() does not. Signed-off-by: Masahiro Yamada Signed-off-by: Philipp Zabel Cc: Dinh Nguyen Signed-off-by: Greg Kroah-Hartman --- include/linux/reset.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/linux/reset.h b/include/linux/reset.h index b681019fc04c..ed6fb0290797 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -125,9 +125,6 @@ static inline int device_reset_optional(struct device *dev) static inline struct reset_control * __must_check reset_control_get_exclusive(struct device *dev, const char *id) { -#ifndef CONFIG_RESET_CONTROLLER - WARN_ON(1); -#endif return __reset_control_get(dev, id, 0, false, false); } @@ -273,9 +270,6 @@ static inline struct reset_control * __must_check devm_reset_control_get_exclusive(struct device *dev, const char *id) { -#ifndef CONFIG_RESET_CONTROLLER - WARN_ON(1); -#endif return __devm_reset_control_get(dev, id, 0, false, false); } From 60720df8bf43e8ca2ce1a17936904a52129c8471 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 30 Nov 2018 14:09:00 -0800 Subject: [PATCH 0897/2608] mm: cleancache: fix corruption on missed inode invalidation commit 6ff38bd40230af35e446239396e5fc8ebd6a5248 upstream. If all pages are deleted from the mapping by memory reclaim and also moved to the cleancache: __delete_from_page_cache (no shadow case) unaccount_page_cache_page cleancache_put_page page_cache_delete mapping->nrpages -= nr (nrpages becomes 0) We don't clean the cleancache for an inode after final file truncation (removal). truncate_inode_pages_final check (nrpages || nrexceptional) is false no truncate_inode_pages no cleancache_invalidate_inode(mapping) These way when reading the new file created with same inode we may get these trash leftover pages from cleancache and see wrong data instead of the contents of the new file. Fix it by always doing truncate_inode_pages which is already ready for nrpages == 0 && nrexceptional == 0 case and just invalidates inode. [akpm@linux-foundation.org: add comment, per Jan] Link: http://lkml.kernel.org/r/20181112095734.17979-1-ptikhomirov@virtuozzo.com Fixes: commit 91b0abe36a7b ("mm + fs: store shadow entries in page cache") Signed-off-by: Pavel Tikhomirov Reviewed-by: Vasily Averin Reviewed-by: Andrey Ryabinin Reviewed-by: Jan Kara Cc: Johannes Weiner Cc: Mel Gorman Cc: Matthew Wilcox Cc: Andi Kleen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Vasily Averin Signed-off-by: Greg Kroah-Hartman --- mm/truncate.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/truncate.c b/mm/truncate.c index 2330223841fb..d3a2737cc188 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -471,9 +471,13 @@ void truncate_inode_pages_final(struct address_space *mapping) */ spin_lock_irq(&mapping->tree_lock); spin_unlock_irq(&mapping->tree_lock); - - truncate_inode_pages(mapping, 0); } + + /* + * Cleancache needs notification even if there are no pages or shadow + * entries. + */ + truncate_inode_pages(mapping, 0); } EXPORT_SYMBOL(truncate_inode_pages_final); From 402519439ccb9410faf6287bf98f99817bd0c354 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:44 +0200 Subject: [PATCH 0898/2608] thermal/drivers/hisi: Remove the multiple sensors support commit ff4ec2997df8fe7cc40513dbe5f86d9f88fb6be7 upstream. By essence, the tsensor does not really support multiple sensor at the same time. It allows to set a sensor and use it to get the temperature, another sensor could be switched but with a delay of 3-5ms. It is difficult to read simultaneously several sensors without a big delay. Today, just one sensor is used, it is not necessary to deal with multiple sensors in the code. Remove them and if it is needed in the future add them on top of a code which will be clean up in the meantime. Signed-off-by: Daniel Lezcano Reviewed-by: Leo Yan Acked-by: Wangtao (Kevin, Kirin) Signed-off-by: Eduardo Valentin Signed-off-by: Rafael David Tinoco Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/hisi_thermal.c | 75 +++++++++------------------------- 1 file changed, 19 insertions(+), 56 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 6d8906d65476..aecbfe9fc8f2 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -40,6 +40,7 @@ #define HISI_TEMP_STEP (784) #define HISI_MAX_SENSORS 4 +#define HISI_DEFAULT_SENSOR 2 struct hisi_thermal_sensor { struct hisi_thermal_data *thermal; @@ -54,9 +55,8 @@ struct hisi_thermal_data { struct mutex thermal_lock; /* protects register data */ struct platform_device *pdev; struct clk *clk; - struct hisi_thermal_sensor sensors[HISI_MAX_SENSORS]; - - int irq, irq_bind_sensor; + struct hisi_thermal_sensor sensors; + int irq; bool irq_enabled; void __iomem *regs; @@ -133,7 +133,7 @@ static void hisi_thermal_enable_bind_irq_sensor mutex_lock(&data->thermal_lock); - sensor = &data->sensors[data->irq_bind_sensor]; + sensor = &data->sensors; /* setting the hdak time */ writel(0x0, data->regs + TEMP0_CFG); @@ -181,31 +181,8 @@ static int hisi_thermal_get_temp(void *_sensor, int *temp) struct hisi_thermal_sensor *sensor = _sensor; struct hisi_thermal_data *data = sensor->thermal; - int sensor_id = -1, i; - long max_temp = 0; - *temp = hisi_thermal_get_sensor_temp(data, sensor); - sensor->sensor_temp = *temp; - - for (i = 0; i < HISI_MAX_SENSORS; i++) { - if (!data->sensors[i].tzd) - continue; - - if (data->sensors[i].sensor_temp >= max_temp) { - max_temp = data->sensors[i].sensor_temp; - sensor_id = i; - } - } - - /* If no sensor has been enabled, then skip to enable irq */ - if (sensor_id == -1) - return 0; - - mutex_lock(&data->thermal_lock); - data->irq_bind_sensor = sensor_id; - mutex_unlock(&data->thermal_lock); - dev_dbg(&data->pdev->dev, "id=%d, irq=%d, temp=%d, thres=%d\n", sensor->id, data->irq_enabled, *temp, sensor->thres_temp); /* @@ -218,7 +195,7 @@ static int hisi_thermal_get_temp(void *_sensor, int *temp) return 0; } - if (max_temp < sensor->thres_temp) { + if (*temp < sensor->thres_temp) { data->irq_enabled = true; hisi_thermal_enable_bind_irq_sensor(data); enable_irq(data->irq); @@ -245,22 +222,16 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) { struct hisi_thermal_data *data = dev; struct hisi_thermal_sensor *sensor; - int i; mutex_lock(&data->thermal_lock); - sensor = &data->sensors[data->irq_bind_sensor]; + sensor = &data->sensors; dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n", sensor->thres_temp); mutex_unlock(&data->thermal_lock); - for (i = 0; i < HISI_MAX_SENSORS; i++) { - if (!data->sensors[i].tzd) - continue; - - thermal_zone_device_update(data->sensors[i].tzd, - THERMAL_EVENT_UNSPECIFIED); - } + thermal_zone_device_update(data->sensors.tzd, + THERMAL_EVENT_UNSPECIFIED); return IRQ_HANDLED; } @@ -317,7 +288,6 @@ static int hisi_thermal_probe(struct platform_device *pdev) { struct hisi_thermal_data *data; struct resource *res; - int i; int ret; data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); @@ -359,14 +329,13 @@ static int hisi_thermal_probe(struct platform_device *pdev) hisi_thermal_enable_bind_irq_sensor(data); data->irq_enabled = true; - for (i = 0; i < HISI_MAX_SENSORS; ++i) { - ret = hisi_thermal_register_sensor(pdev, data, - &data->sensors[i], i); - if (ret) - dev_err(&pdev->dev, - "failed to register thermal sensor: %d\n", ret); - else - hisi_thermal_toggle_sensor(&data->sensors[i], true); + ret = hisi_thermal_register_sensor(pdev, data, + &data->sensors, + HISI_DEFAULT_SENSOR); + if (ret) { + dev_err(&pdev->dev, "failed to register thermal sensor: %d\n", + ret); + return ret; } ret = devm_request_threaded_irq(&pdev->dev, data->irq, @@ -378,6 +347,8 @@ static int hisi_thermal_probe(struct platform_device *pdev) return ret; } + hisi_thermal_toggle_sensor(&data->sensors, true); + enable_irq(data->irq); return 0; @@ -386,17 +357,9 @@ static int hisi_thermal_probe(struct platform_device *pdev) static int hisi_thermal_remove(struct platform_device *pdev) { struct hisi_thermal_data *data = platform_get_drvdata(pdev); - int i; - - for (i = 0; i < HISI_MAX_SENSORS; i++) { - struct hisi_thermal_sensor *sensor = &data->sensors[i]; - - if (!sensor->tzd) - continue; - - hisi_thermal_toggle_sensor(sensor, false); - } + struct hisi_thermal_sensor *sensor = &data->sensors; + hisi_thermal_toggle_sensor(sensor, false); hisi_thermal_disable_sensor(data); clk_disable_unprepare(data->clk); From e29649e49c81b1a1a78174f70c2e1ddf41f7832f Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:48 +0200 Subject: [PATCH 0899/2608] thermal/drivers/hisi: Remove pointless lock commit 2d4fa7b4c6f8080ced2e8237c9f46fb1fc110d64 upstream. The threaded interrupt inspect the sensors structure to look in the temp threshold field, but this field is read-only in all the code, except in the probe function before the threaded interrupt is set. In other words there is not race window in the threaded interrupt when reading the field value. Signed-off-by: Daniel Lezcano Reviewed-by: Leo Yan Signed-off-by: Eduardo Valentin Signed-off-by: Rafael David Tinoco Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/hisi_thermal.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index aecbfe9fc8f2..0c2966c285b1 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -221,14 +221,10 @@ static irqreturn_t hisi_thermal_alarm_irq(int irq, void *dev) static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) { struct hisi_thermal_data *data = dev; - struct hisi_thermal_sensor *sensor; - - mutex_lock(&data->thermal_lock); - sensor = &data->sensors; + struct hisi_thermal_sensor *sensor = &data->sensors; dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n", sensor->thres_temp); - mutex_unlock(&data->thermal_lock); thermal_zone_device_update(data->sensors.tzd, THERMAL_EVENT_UNSPECIFIED); From f7105bd03755272a0e434ddc54e3a78dae2ed0c6 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:49 +0200 Subject: [PATCH 0900/2608] thermal/drivers/hisi: Encapsulate register writes into helpers commit 1e11b014271ceccb5ea04ae58f4829ac8209a86d upstream. Hopefully, the function name can help to clarify the semantic of the operations when writing in the register. Signed-off-by: Daniel Lezcano Signed-off-by: Eduardo Valentin Signed-off-by: Rafael David Tinoco Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/hisi_thermal.c | 92 ++++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 22 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 0c2966c285b1..a803043f37ae 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -26,6 +26,7 @@ #include "thermal_core.h" +#define TEMP0_LAG (0x0) #define TEMP0_TH (0x4) #define TEMP0_RST_TH (0x8) #define TEMP0_CFG (0xC) @@ -96,6 +97,56 @@ static inline long hisi_thermal_round_temp(int temp) hisi_thermal_temp_to_step(temp)); } +static inline void hisi_thermal_set_lag(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_LAG); +} + +static inline void hisi_thermal_alarm_clear(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_INT_CLR); +} + +static inline void hisi_thermal_alarm_enable(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_INT_EN); +} + +static inline void hisi_thermal_alarm_set(void __iomem *addr, int temp) +{ + writel(hisi_thermal_temp_to_step(temp) | 0x0FFFFFF00, addr + TEMP0_TH); +} + +static inline void hisi_thermal_reset_set(void __iomem *addr, int temp) +{ + writel(hisi_thermal_temp_to_step(temp), addr + TEMP0_RST_TH); +} + +static inline void hisi_thermal_reset_enable(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_RST_MSK); +} + +static inline void hisi_thermal_enable(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_EN); +} + +static inline void hisi_thermal_sensor_select(void __iomem *addr, int sensor) +{ + writel((sensor << 12), addr + TEMP0_CFG); +} + +static inline int hisi_thermal_get_temperature(void __iomem *addr) +{ + return hisi_thermal_step_to_temp(readl(addr + TEMP0_VALUE)); +} + +static inline void hisi_thermal_hdak_set(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_CFG); +} + static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, struct hisi_thermal_sensor *sensor) { @@ -104,22 +155,21 @@ static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, mutex_lock(&data->thermal_lock); /* disable interrupt */ - writel(0x0, data->regs + TEMP0_INT_EN); - writel(0x1, data->regs + TEMP0_INT_CLR); + hisi_thermal_alarm_enable(data->regs, 0); + hisi_thermal_alarm_clear(data->regs, 1); /* disable module firstly */ - writel(0x0, data->regs + TEMP0_EN); + hisi_thermal_enable(data->regs, 0); /* select sensor id */ - writel((sensor->id << 12), data->regs + TEMP0_CFG); + hisi_thermal_sensor_select(data->regs, sensor->id); /* enable module */ - writel(0x1, data->regs + TEMP0_EN); + hisi_thermal_enable(data->regs, 1); usleep_range(3000, 5000); - val = readl(data->regs + TEMP0_VALUE); - val = hisi_thermal_step_to_temp(val); + val = hisi_thermal_get_temperature(data->regs); mutex_unlock(&data->thermal_lock); @@ -136,28 +186,26 @@ static void hisi_thermal_enable_bind_irq_sensor sensor = &data->sensors; /* setting the hdak time */ - writel(0x0, data->regs + TEMP0_CFG); + hisi_thermal_hdak_set(data->regs, 0); /* disable module firstly */ - writel(0x0, data->regs + TEMP0_RST_MSK); - writel(0x0, data->regs + TEMP0_EN); + hisi_thermal_reset_enable(data->regs, 0); + hisi_thermal_enable(data->regs, 0); /* select sensor id */ - writel((sensor->id << 12), data->regs + TEMP0_CFG); + hisi_thermal_sensor_select(data->regs, sensor->id); /* enable for interrupt */ - writel(hisi_thermal_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00, - data->regs + TEMP0_TH); + hisi_thermal_alarm_set(data->regs, sensor->thres_temp); - writel(hisi_thermal_temp_to_step(HISI_TEMP_RESET), - data->regs + TEMP0_RST_TH); + hisi_thermal_reset_set(data->regs, HISI_TEMP_RESET); /* enable module */ - writel(0x1, data->regs + TEMP0_RST_MSK); - writel(0x1, data->regs + TEMP0_EN); + hisi_thermal_reset_enable(data->regs, 1); + hisi_thermal_enable(data->regs, 1); - writel(0x0, data->regs + TEMP0_INT_CLR); - writel(0x1, data->regs + TEMP0_INT_EN); + hisi_thermal_alarm_clear(data->regs, 0); + hisi_thermal_alarm_enable(data->regs, 1); usleep_range(3000, 5000); @@ -169,9 +217,9 @@ static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data) mutex_lock(&data->thermal_lock); /* disable sensor module */ - writel(0x0, data->regs + TEMP0_INT_EN); - writel(0x0, data->regs + TEMP0_RST_MSK); - writel(0x0, data->regs + TEMP0_EN); + hisi_thermal_enable(data->regs, 0); + hisi_thermal_alarm_enable(data->regs, 0); + hisi_thermal_reset_enable(data->regs, 0); mutex_unlock(&data->thermal_lock); } From aa6e035f259a5a4402a204565b7b2fb26bbeaaf2 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:50 +0200 Subject: [PATCH 0901/2608] thermal/drivers/hisi: Fix configuration register setting commit b424315a287c70eeb5f920f84c92492bd2f5658e upstream. The TEMP0_CFG configuration register contains different field to set up the temperature controller. However in the code, nothing prevents a setup to overwrite the previous one: eg. writing the hdak value overwrites the sensor selection, the sensor selection overwrites the hdak value. In order to prevent such thing, use a regmap-like mechanism by reading the value before, set the corresponding bits and write the result. Signed-off-by: Daniel Lezcano Signed-off-by: Eduardo Valentin Signed-off-by: Rafael David Tinoco Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/hisi_thermal.c | 36 ++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index a803043f37ae..31b52ebf426f 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -30,6 +30,8 @@ #define TEMP0_TH (0x4) #define TEMP0_RST_TH (0x8) #define TEMP0_CFG (0xC) +#define TEMP0_CFG_SS_MSK (0xF000) +#define TEMP0_CFG_HDAK_MSK (0x30) #define TEMP0_EN (0x10) #define TEMP0_INT_EN (0x14) #define TEMP0_INT_CLR (0x18) @@ -132,19 +134,41 @@ static inline void hisi_thermal_enable(void __iomem *addr, int value) writel(value, addr + TEMP0_EN); } -static inline void hisi_thermal_sensor_select(void __iomem *addr, int sensor) -{ - writel((sensor << 12), addr + TEMP0_CFG); -} - static inline int hisi_thermal_get_temperature(void __iomem *addr) { return hisi_thermal_step_to_temp(readl(addr + TEMP0_VALUE)); } +/* + * Temperature configuration register - Sensor selection + * + * Bits [19:12] + * + * 0x0: local sensor (default) + * 0x1: remote sensor 1 (ACPU cluster 1) + * 0x2: remote sensor 2 (ACPU cluster 0) + * 0x3: remote sensor 3 (G3D) + */ +static inline void hisi_thermal_sensor_select(void __iomem *addr, int sensor) +{ + writel((readl(addr + TEMP0_CFG) & ~TEMP0_CFG_SS_MSK) | + (sensor << 12), addr + TEMP0_CFG); +} + +/* + * Temperature configuration register - Hdak conversion polling interval + * + * Bits [5:4] + * + * 0x0 : 0.768 ms + * 0x1 : 6.144 ms + * 0x2 : 49.152 ms + * 0x3 : 393.216 ms + */ static inline void hisi_thermal_hdak_set(void __iomem *addr, int value) { - writel(value, addr + TEMP0_CFG); + writel((readl(addr + TEMP0_CFG) & ~TEMP0_CFG_HDAK_MSK) | + (value << 4), addr + TEMP0_CFG); } static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, From dd9989eda8327d9894f8f2c4812ca7dd085fad51 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:51 +0200 Subject: [PATCH 0902/2608] thermal/drivers/hisi: Remove costly sensor inspection commit 10d7e9a9181f4637640f388d334c6740c1b5d0e8 upstream. The sensor is all setup, bind, resetted, acked, etc... every single second. That was the way to workaround a problem with the interrupt bouncing again and again. With the following changes, we fix all in one: - Do the setup, one time, at probe time - Add the IRQF_ONESHOT, ack the interrupt in the threaded handler - Remove the interrupt handler - Set the correct value for the LAG register - Remove all the irq_enabled stuff in the code as the interruption handling is fixed - Remove the 3ms delay - Reorder the initialization routine to be in the right order It ends up to a nicer code and more efficient, the 3-5ms delay is removed from the get_temp() path. Signed-off-by: Daniel Lezcano Reviewed-by: Leo Yan Tested-by: Leo Yan Signed-off-by: Eduardo Valentin Signed-off-by: Rafael David Tinoco Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/hisi_thermal.c | 205 +++++++++++++++------------------ 1 file changed, 94 insertions(+), 111 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 31b52ebf426f..7c5d4647cb5e 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -41,6 +41,7 @@ #define HISI_TEMP_BASE (-60000) #define HISI_TEMP_RESET (100000) #define HISI_TEMP_STEP (784) +#define HISI_TEMP_LAG (3500) #define HISI_MAX_SENSORS 4 #define HISI_DEFAULT_SENSOR 2 @@ -60,8 +61,6 @@ struct hisi_thermal_data { struct clk *clk; struct hisi_thermal_sensor sensors; int irq; - bool irq_enabled; - void __iomem *regs; }; @@ -99,9 +98,40 @@ static inline long hisi_thermal_round_temp(int temp) hisi_thermal_temp_to_step(temp)); } +/* + * The lag register contains 5 bits encoding the temperature in steps. + * + * Each time the temperature crosses the threshold boundary, an + * interrupt is raised. It could be when the temperature is going + * above the threshold or below. However, if the temperature is + * fluctuating around this value due to the load, we can receive + * several interrupts which may not desired. + * + * We can setup a temperature representing the delta between the + * threshold and the current temperature when the temperature is + * decreasing. + * + * For instance: the lag register is 5°C, the threshold is 65°C, when + * the temperature reaches 65°C an interrupt is raised and when the + * temperature decrease to 65°C - 5°C another interrupt is raised. + * + * A very short lag can lead to an interrupt storm, a long lag + * increase the latency to react to the temperature changes. In our + * case, that is not really a problem as we are polling the + * temperature. + * + * [0:4] : lag register + * + * The temperature is coded in steps, cf. HISI_TEMP_STEP. + * + * Min : 0x00 : 0.0 °C + * Max : 0x1F : 24.3 °C + * + * The 'value' parameter is in milliCelsius. + */ static inline void hisi_thermal_set_lag(void __iomem *addr, int value) { - writel(value, addr + TEMP0_LAG); + writel((value / HISI_TEMP_STEP) & 0x1F, addr + TEMP0_LAG); } static inline void hisi_thermal_alarm_clear(void __iomem *addr, int value) @@ -171,71 +201,6 @@ static inline void hisi_thermal_hdak_set(void __iomem *addr, int value) (value << 4), addr + TEMP0_CFG); } -static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, - struct hisi_thermal_sensor *sensor) -{ - long val; - - mutex_lock(&data->thermal_lock); - - /* disable interrupt */ - hisi_thermal_alarm_enable(data->regs, 0); - hisi_thermal_alarm_clear(data->regs, 1); - - /* disable module firstly */ - hisi_thermal_enable(data->regs, 0); - - /* select sensor id */ - hisi_thermal_sensor_select(data->regs, sensor->id); - - /* enable module */ - hisi_thermal_enable(data->regs, 1); - - usleep_range(3000, 5000); - - val = hisi_thermal_get_temperature(data->regs); - - mutex_unlock(&data->thermal_lock); - - return val; -} - -static void hisi_thermal_enable_bind_irq_sensor - (struct hisi_thermal_data *data) -{ - struct hisi_thermal_sensor *sensor; - - mutex_lock(&data->thermal_lock); - - sensor = &data->sensors; - - /* setting the hdak time */ - hisi_thermal_hdak_set(data->regs, 0); - - /* disable module firstly */ - hisi_thermal_reset_enable(data->regs, 0); - hisi_thermal_enable(data->regs, 0); - - /* select sensor id */ - hisi_thermal_sensor_select(data->regs, sensor->id); - - /* enable for interrupt */ - hisi_thermal_alarm_set(data->regs, sensor->thres_temp); - - hisi_thermal_reset_set(data->regs, HISI_TEMP_RESET); - - /* enable module */ - hisi_thermal_reset_enable(data->regs, 1); - hisi_thermal_enable(data->regs, 1); - - hisi_thermal_alarm_clear(data->regs, 0); - hisi_thermal_alarm_enable(data->regs, 1); - - usleep_range(3000, 5000); - - mutex_unlock(&data->thermal_lock); -} - static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data) { mutex_lock(&data->thermal_lock); @@ -253,25 +218,10 @@ static int hisi_thermal_get_temp(void *_sensor, int *temp) struct hisi_thermal_sensor *sensor = _sensor; struct hisi_thermal_data *data = sensor->thermal; - *temp = hisi_thermal_get_sensor_temp(data, sensor); + *temp = hisi_thermal_get_temperature(data->regs); - dev_dbg(&data->pdev->dev, "id=%d, irq=%d, temp=%d, thres=%d\n", - sensor->id, data->irq_enabled, *temp, sensor->thres_temp); - /* - * Bind irq to sensor for two cases: - * Reenable alarm IRQ if temperature below threshold; - * if irq has been enabled, always set it; - */ - if (data->irq_enabled) { - hisi_thermal_enable_bind_irq_sensor(data); - return 0; - } - - if (*temp < sensor->thres_temp) { - data->irq_enabled = true; - hisi_thermal_enable_bind_irq_sensor(data); - enable_irq(data->irq); - } + dev_dbg(&data->pdev->dev, "id=%d, temp=%d, thres=%d\n", + sensor->id, *temp, sensor->thres_temp); return 0; } @@ -280,26 +230,27 @@ static const struct thermal_zone_of_device_ops hisi_of_thermal_ops = { .get_temp = hisi_thermal_get_temp, }; -static irqreturn_t hisi_thermal_alarm_irq(int irq, void *dev) -{ - struct hisi_thermal_data *data = dev; - - disable_irq_nosync(irq); - data->irq_enabled = false; - - return IRQ_WAKE_THREAD; -} - static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) { struct hisi_thermal_data *data = dev; struct hisi_thermal_sensor *sensor = &data->sensors; + int temp; - dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n", - sensor->thres_temp); + hisi_thermal_alarm_clear(data->regs, 1); - thermal_zone_device_update(data->sensors.tzd, - THERMAL_EVENT_UNSPECIFIED); + temp = hisi_thermal_get_temperature(data->regs); + + if (temp >= sensor->thres_temp) { + dev_crit(&data->pdev->dev, "THERMAL ALARM: %d > %d\n", + temp, sensor->thres_temp); + + thermal_zone_device_update(data->sensors.tzd, + THERMAL_EVENT_UNSPECIFIED); + + } else if (temp < sensor->thres_temp) { + dev_crit(&data->pdev->dev, "THERMAL ALARM stopped: %d < %d\n", + temp, sensor->thres_temp); + } return IRQ_HANDLED; } @@ -352,6 +303,40 @@ static void hisi_thermal_toggle_sensor(struct hisi_thermal_sensor *sensor, on ? THERMAL_DEVICE_ENABLED : THERMAL_DEVICE_DISABLED); } +static int hisi_thermal_setup(struct hisi_thermal_data *data) +{ + struct hisi_thermal_sensor *sensor; + + sensor = &data->sensors; + + /* disable module firstly */ + hisi_thermal_reset_enable(data->regs, 0); + hisi_thermal_enable(data->regs, 0); + + /* select sensor id */ + hisi_thermal_sensor_select(data->regs, sensor->id); + + /* setting the hdak time */ + hisi_thermal_hdak_set(data->regs, 0); + + /* setting lag value between current temp and the threshold */ + hisi_thermal_set_lag(data->regs, HISI_TEMP_LAG); + + /* enable for interrupt */ + hisi_thermal_alarm_set(data->regs, sensor->thres_temp); + + hisi_thermal_reset_set(data->regs, HISI_TEMP_RESET); + + /* enable module */ + hisi_thermal_reset_enable(data->regs, 1); + hisi_thermal_enable(data->regs, 1); + + hisi_thermal_alarm_clear(data->regs, 0); + hisi_thermal_alarm_enable(data->regs, 1); + + return 0; +} + static int hisi_thermal_probe(struct platform_device *pdev) { struct hisi_thermal_data *data; @@ -394,9 +379,6 @@ static int hisi_thermal_probe(struct platform_device *pdev) return ret; } - hisi_thermal_enable_bind_irq_sensor(data); - data->irq_enabled = true; - ret = hisi_thermal_register_sensor(pdev, data, &data->sensors, HISI_DEFAULT_SENSOR); @@ -406,10 +388,15 @@ static int hisi_thermal_probe(struct platform_device *pdev) return ret; } - ret = devm_request_threaded_irq(&pdev->dev, data->irq, - hisi_thermal_alarm_irq, + ret = hisi_thermal_setup(data); + if (ret) { + dev_err(&pdev->dev, "Failed to setup the sensor: %d\n", ret); + return ret; + } + + ret = devm_request_threaded_irq(&pdev->dev, data->irq, NULL, hisi_thermal_alarm_irq_thread, - 0, "hisi_thermal", data); + IRQF_ONESHOT, "hisi_thermal", data); if (ret < 0) { dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); return ret; @@ -417,8 +404,6 @@ static int hisi_thermal_probe(struct platform_device *pdev) hisi_thermal_toggle_sensor(&data->sensors, true); - enable_irq(data->irq); - return 0; } @@ -440,7 +425,6 @@ static int hisi_thermal_suspend(struct device *dev) struct hisi_thermal_data *data = dev_get_drvdata(dev); hisi_thermal_disable_sensor(data); - data->irq_enabled = false; clk_disable_unprepare(data->clk); @@ -456,8 +440,7 @@ static int hisi_thermal_resume(struct device *dev) if (ret) return ret; - data->irq_enabled = true; - hisi_thermal_enable_bind_irq_sensor(data); + hisi_thermal_setup(data); return 0; } From c29f9010a35604047f96a7e9d6cbabfa36d996d1 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 30 Oct 2018 17:48:25 +0000 Subject: [PATCH 0903/2608] mm: hide incomplete nr_indirectly_reclaimable in /proc/zoneinfo [fixed differently upstream, this is a work-around to resolve it for 4.14.y] Yongqin reported that /proc/zoneinfo format is broken in 4.14 due to commit 7aaf77272358 ("mm: don't show nr_indirectly_reclaimable in /proc/vmstat") Node 0, zone DMA per-node stats nr_inactive_anon 403 nr_active_anon 89123 nr_inactive_file 128887 nr_active_file 47377 nr_unevictable 2053 nr_slab_reclaimable 7510 nr_slab_unreclaimable 10775 nr_isolated_anon 0 nr_isolated_file 0 <...> nr_vmscan_write 0 nr_vmscan_immediate_reclaim 0 nr_dirtied 6022 nr_written 5985 74240 ^^^^^^^^^^ pages free 131656 The problem is caused by the nr_indirectly_reclaimable counter, which is hidden from the /proc/vmstat, but not from the /proc/zoneinfo. Let's fix this inconsistency and hide the counter from /proc/zoneinfo exactly as from /proc/vmstat. BTW, in 4.19+ the counter has been renamed and exported by the commit b29940c1abd7 ("mm: rename and change semantics of nr_indirectly_reclaimable_bytes"), so there is no such a problem anymore. Cc: # 4.14.x-4.18.x Fixes: 7aaf77272358 ("mm: don't show nr_indirectly_reclaimable in /proc/vmstat") Reported-by: Yongqin Liu Signed-off-by: Roman Gushchin Cc: Vlastimil Babka Cc: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/vmstat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/vmstat.c b/mm/vmstat.c index 527ae727d547..6389e876c7a7 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1500,6 +1500,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, if (is_zone_first_populated(pgdat, zone)) { seq_printf(m, "\n per-node stats"); for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { + /* Skip hidden vmstat items. */ + if (*vmstat_text[i + NR_VM_ZONE_STAT_ITEMS + + NR_VM_NUMA_STAT_ITEMS] == '\0') + continue; seq_printf(m, "\n %-12s %lu", vmstat_text[i + NR_VM_ZONE_STAT_ITEMS + NR_VM_NUMA_STAT_ITEMS], From 02dc68829e5f2a9b48829725f332200ac3b051c2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Feb 2018 16:44:47 +0100 Subject: [PATCH 0904/2608] net: qed: use correct strncpy() size commit 11f711081af0eb54190dc0de96ba4a9cd494666b upstream. passing the strlen() of the source string as the destination length is pointless, and gcc-8 now warns about it: drivers/net/ethernet/qlogic/qed/qed_debug.c: In function 'qed_grc_dump': include/linux/string.h:253: error: 'strncpy' specified bound depends on the length of the source argument [-Werror=stringop-overflow=] This changes qed_grc_dump_big_ram() to instead uses the length of the destination buffer, and use strscpy() to guarantee nul-termination. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_debug.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index 03c3cf77aaff..99f32202a85c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -3590,10 +3590,8 @@ static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn, total_blocks = big_ram->num_of_blocks[dev_data->chip_id]; ram_size = total_blocks * BIG_RAM_BLOCK_SIZE_DWORDS; - strncpy(type_name, big_ram->instance_name, - strlen(big_ram->instance_name)); - strncpy(mem_name, big_ram->instance_name, - strlen(big_ram->instance_name)); + strscpy(type_name, big_ram->instance_name, sizeof(type_name)); + strscpy(mem_name, big_ram->instance_name, sizeof(mem_name)); /* Dump memory header */ offset += qed_grc_dump_mem_hdr(p_hwfn, From 88b58409f82e98d28f90328fa9ab98e0de479a0c Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 19 Oct 2018 12:08:22 +0800 Subject: [PATCH 0905/2608] tipc: use destination length for copy string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 29e270fc32192e7729057963ae7120663856c93e upstream. Got below warning with gcc 8.2 compiler. net/tipc/topsrv.c: In function ‘tipc_topsrv_start’: net/tipc/topsrv.c:660:2: warning: ‘strncpy’ specified bound depends on the length of the source argument [-Wstringop-overflow=] strncpy(srv->name, name, strlen(name) + 1); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/tipc/topsrv.c:660:27: note: length computed here strncpy(srv->name, name, strlen(name) + 1); ^~~~~~~~~~~~ So change it to correct length and use strscpy. Signed-off-by: Guoqing Jiang Acked-by: Ying Xue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/subscr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index be3d9e3183dc..959c9aea3d1a 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -375,7 +375,7 @@ int tipc_topsrv_start(struct net *net) topsrv->tipc_conn_new = tipc_subscrb_connect_cb; topsrv->tipc_conn_release = tipc_subscrb_release_cb; - strncpy(topsrv->name, name, strlen(name) + 1); + strscpy(topsrv->name, name, sizeof(topsrv->name)); tn->topsrv = topsrv; atomic_set(&tn->subscription_count, 0); From ca48e5e30b75a28c12c43c7428c95735e4885e6b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 8 Dec 2018 13:03:41 +0100 Subject: [PATCH 0906/2608] Linux 4.14.87 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 77ffc1dc4e79..322484348f3e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 86 +SUBLEVEL = 87 EXTRAVERSION = NAME = Petit Gorille From bcdfccfd8dee5f024b618a3ec4df17fab1296658 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 9 Oct 2018 07:49:49 -0400 Subject: [PATCH 0907/2608] media: omap3isp: Unregister media device as first [ Upstream commit 30efae3d789cd0714ef795545a46749236e29558 ] While there are issues related to object lifetime management, unregister the media device first when the driver is being unbound. This is slightly safer. Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/omap3isp/isp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c index 6e6e978263b0..c834fea5f9b0 100644 --- a/drivers/media/platform/omap3isp/isp.c +++ b/drivers/media/platform/omap3isp/isp.c @@ -1592,6 +1592,8 @@ static void isp_pm_complete(struct device *dev) static void isp_unregister_entities(struct isp_device *isp) { + media_device_unregister(&isp->media_dev); + omap3isp_csi2_unregister_entities(&isp->isp_csi2a); omap3isp_ccp2_unregister_entities(&isp->isp_ccp2); omap3isp_ccdc_unregister_entities(&isp->isp_ccdc); @@ -1602,7 +1604,6 @@ static void isp_unregister_entities(struct isp_device *isp) omap3isp_stat_unregister_entities(&isp->isp_hist); v4l2_device_unregister(&isp->v4l2_dev); - media_device_unregister(&isp->media_dev); media_device_cleanup(&isp->media_dev); } From df11500b9dbf1bb3dd752f11be62a1cad22ca16d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 5 Nov 2018 10:18:58 +0800 Subject: [PATCH 0908/2608] iommu/vt-d: Fix NULL pointer dereference in prq_event_thread() [ Upstream commit 19ed3e2dd8549c1a34914e8dad01b64e7837645a ] When handling page request without pasid event, go to "no_pasid" branch instead of "bad_req". Otherwise, a NULL pointer deference will happen there. Cc: Ashok Raj Cc: Jacob Pan Cc: Sohil Mehta Signed-off-by: Lu Baolu Fixes: a222a7f0bb6c9 'iommu/vt-d: Implement page request handling' Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel-svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index d7def26ccf79..f5573bb9f450 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -589,7 +589,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) pr_err("%s: Page request without PASID: %08llx %08llx\n", iommu->name, ((unsigned long long *)req)[0], ((unsigned long long *)req)[1]); - goto bad_req; + goto no_pasid; } if (!svm || svm->pasid != req->pasid) { From 8fa55f1d1859a6b0d2e6c1606b6d8fa1a9ef63d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 26 Oct 2018 12:50:39 +0200 Subject: [PATCH 0909/2608] brcmutil: really fix decoding channel info for 160 MHz bandwidth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3401d42c7ea2d064d15c66698ff8eb96553179ce ] Previous commit /adding/ support for 160 MHz chanspecs was incomplete. It didn't set bandwidth info and didn't extract control channel info. As the result it was also using uninitialized "sb" var. This change has been tested for two chanspecs found to be reported by some devices/firmwares: 1) 60/160 (0xee32) Before: chnum:50 control_ch_num:36 After: chnum:50 control_ch_num:60 2) 120/160 (0xed72) Before: chnum:114 control_ch_num:100 After: chnum:114 control_ch_num:120 Fixes: 330994e8e8ec ("brcmfmac: fix for proper support of 160MHz bandwidth") Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c b/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c index e7584b842dce..eb5db94f5745 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c @@ -193,6 +193,9 @@ static void brcmu_d11ac_decchspec(struct brcmu_chan *ch) } break; case BRCMU_CHSPEC_D11AC_BW_160: + ch->bw = BRCMU_CHAN_BW_160; + ch->sb = brcmu_maskget16(ch->chspec, BRCMU_CHSPEC_D11AC_SB_MASK, + BRCMU_CHSPEC_D11AC_SB_SHIFT); switch (ch->sb) { case BRCMU_CHAN_SB_LLL: ch->control_ch_num -= CH_70MHZ_APART; From b65fa7b581f8a50ef147055006a7aa28ddb1086d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 7 Nov 2018 14:18:50 +0100 Subject: [PATCH 0910/2608] iommu/ipmmu-vmsa: Fix crash on early domain free [ Upstream commit e5b78f2e349eef5d4fca5dc1cf5a3b4b2cc27abd ] If iommu_ops.add_device() fails, iommu_ops.domain_free() is still called, leading to a crash, as the domain was only partially initialized: ipmmu-vmsa e67b0000.mmu: Cannot accommodate DMA translation for IOMMU page tables sata_rcar ee300000.sata: Unable to initialize IPMMU context iommu: Failed to add device ee300000.sata to group 0: -22 Unable to handle kernel NULL pointer dereference at virtual address 0000000000000038 ... Call trace: ipmmu_domain_free+0x1c/0xa0 iommu_group_release+0x48/0x68 kobject_put+0x74/0xe8 kobject_del.part.0+0x3c/0x50 kobject_put+0x60/0xe8 iommu_group_get_for_dev+0xa8/0x1f0 ipmmu_add_device+0x1c/0x40 of_iommu_configure+0x118/0x190 Fix this by checking if the domain's context already exists, before trying to destroy it. Signed-off-by: Geert Uytterhoeven Reviewed-by: Robin Murphy Fixes: d25a2a16f0889 ('iommu: Add driver for Renesas VMSA-compatible IPMMU') Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/ipmmu-vmsa.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 5d0ba5f644c4..777aff1f549f 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -424,6 +424,9 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain) { + if (!domain->mmu) + return; + /* * Disable the context. Flush the TLB as required when modifying the * context registers. From c6b578b1b4a7c56b469e16e6afd3ec478e1872c3 Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Mon, 10 Sep 2018 11:43:13 +0100 Subject: [PATCH 0911/2608] can: rcar_can: Fix erroneous registration [ Upstream commit 68c8d209cd4337da4fa04c672f0b62bb735969bc ] Assigning 2 to "renesas,can-clock-select" tricks the driver into registering the CAN interface, even though we don't want that. This patch improves one of the checks to prevent that from happening. Fixes: 862e2b6af9413b43 ("can: rcar_can: support all input clocks") Signed-off-by: Fabrizio Castro Signed-off-by: Chris Paterson Reviewed-by: Simon Horman Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/rcar/rcar_can.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c index 11662f479e76..771a46083739 100644 --- a/drivers/net/can/rcar/rcar_can.c +++ b/drivers/net/can/rcar/rcar_can.c @@ -24,6 +24,9 @@ #define RCAR_CAN_DRV_NAME "rcar_can" +#define RCAR_SUPPORTED_CLOCKS (BIT(CLKR_CLKP1) | BIT(CLKR_CLKP2) | \ + BIT(CLKR_CLKEXT)) + /* Mailbox configuration: * mailbox 60 - 63 - Rx FIFO mailboxes * mailbox 56 - 59 - Tx FIFO mailboxes @@ -789,7 +792,7 @@ static int rcar_can_probe(struct platform_device *pdev) goto fail_clk; } - if (clock_select >= ARRAY_SIZE(clock_names)) { + if (!(BIT(clock_select) & RCAR_SUPPORTED_CLOCKS)) { err = -EINVAL; dev_err(&pdev->dev, "invalid CAN clock selected\n"); goto fail_clk; From 58e0bc43507120ce1185268f588b191c2f5f4df4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 19 Oct 2018 13:58:01 +0100 Subject: [PATCH 0912/2608] test_firmware: fix error return getting clobbered [ Upstream commit 8bb0a88600f0267cfcc245d34f8c4abe8c282713 ] In the case where eq->fw->size > PAGE_SIZE the error return rc is being set to EINVAL however this is being overwritten to rc = req->fw->size because the error exit path via label 'out' is not being taken. Fix this by adding the jump to the error exit path 'out'. Detected by CoverityScan, CID#1453465 ("Unused value") Fixes: c92316bf8e94 ("test_firmware: add batched firmware tests") Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- lib/test_firmware.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/test_firmware.c b/lib/test_firmware.c index e7008688769b..71d371f97138 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -838,6 +838,7 @@ static ssize_t read_firmware_show(struct device *dev, if (req->fw->size > PAGE_SIZE) { pr_err("Testing interface must use PAGE_SIZE firmware for now\n"); rc = -EINVAL; + goto out; } memcpy(buf, req->fw->data, req->fw->size); From a1270af9b1a0d3ba66ac3425771767e677d2fd0d Mon Sep 17 00:00:00 2001 From: Benson Leung Date: Thu, 8 Nov 2018 15:59:21 -0800 Subject: [PATCH 0913/2608] HID: input: Ignore battery reported by Symbol DS4308 [ Upstream commit 0fd791841a6d67af1155a9c3de54dea51220721e ] The Motorola/Zebra Symbol DS4308-HD is a handheld USB barcode scanner which does not have a battery, but reports one anyway that always has capacity 2. Let's apply the IGNORE quirk to prevent it from being treated like a power supply so that userspaces don't get confused that this accessory is almost out of power and warn the user that they need to charge their wired barcode scanner. Reported here: https://bugs.chromium.org/p/chromium/issues/detail?id=804720 Signed-off-by: Benson Leung Reviewed-by: Benjamin Tissoires Signed-off-by: Benjamin Tissoires Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-input.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 3fc8c0d67592..87904d2adadb 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1001,6 +1001,7 @@ #define USB_VENDOR_ID_SYMBOL 0x05e0 #define USB_DEVICE_ID_SYMBOL_SCANNER_1 0x0800 #define USB_DEVICE_ID_SYMBOL_SCANNER_2 0x1300 +#define USB_DEVICE_ID_SYMBOL_SCANNER_3 0x1200 #define USB_VENDOR_ID_SYNAPTICS 0x06cb #define USB_DEVICE_ID_SYNAPTICS_TP 0x0001 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index bb984cc9753b..d146a9b545ee 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -325,6 +325,9 @@ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084), HID_BATTERY_QUIRK_IGNORE }, + { HID_USB_DEVICE(USB_VENDOR_ID_SYMBOL, + USB_DEVICE_ID_SYMBOL_SCANNER_3), + HID_BATTERY_QUIRK_IGNORE }, {} }; From 94f748fd4ed3ac4c90990413681e703444c26225 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 30 Oct 2018 12:17:10 +0100 Subject: [PATCH 0914/2608] batman-adv: Use explicit tvlv padding for ELP packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f4156f9656feac21f4de712fac94fae964c5d402 ] The announcement messages of batman-adv COMPAT_VERSION 15 have the possibility to announce additional information via a dynamic TVLV part. This part is optional for the ELP packets and currently not parsed by the Linux implementation. Still out-of-tree versions are using it to transport things like neighbor hashes to optimize the rebroadcast behavior. Since the ELP broadcast packets are smaller than the minimal ethernet packet, it often has to be padded. This is often done (as specified in RFC894) with octets of zero and thus work perfectly fine with the TVLV part (making it a zero length and thus empty). But not all ethernet compatible hardware seems to follow this advice. To avoid ambiguous situations when parsing the TVLV header, just force the 4 bytes (TVLV length + padding) after the required ELP header to zero. Fixes: d6f94d91f766 ("batman-adv: ELP - adding basic infrastructure") Reported-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin --- net/batman-adv/bat_v_elp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index e92dfedccc16..fbc132f4670e 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -338,19 +338,21 @@ out: */ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface) { + static const size_t tvlv_padding = sizeof(__be32); struct batadv_elp_packet *elp_packet; unsigned char *elp_buff; u32 random_seqno; size_t size; int res = -ENOMEM; - size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN; + size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN + tvlv_padding; hard_iface->bat_v.elp_skb = dev_alloc_skb(size); if (!hard_iface->bat_v.elp_skb) goto out; skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN); - elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN); + elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb, + BATADV_ELP_HLEN + tvlv_padding); elp_packet = (struct batadv_elp_packet *)elp_buff; elp_packet->packet_type = BATADV_ELP; From 2761d3237145412c1405b204f152b796bf9df4a2 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 7 Nov 2018 23:09:12 +0100 Subject: [PATCH 0915/2608] batman-adv: Expand merged fragment buffer for full packet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d7d8bbb40a5b1f682ee6589e212934f4c6b8ad60 ] The complete size ("total_size") of the fragmented packet is stored in the fragment header and in the size of the fragment chain. When the fragments are ready for merge, the skbuff's tail of the first fragment is expanded to have enough room after the data pointer for at least total_size. This means that it gets expanded by total_size - first_skb->len. But this is ignoring the fact that after expanding the buffer, the fragment header is pulled by from this buffer. Assuming that the tailroom of the buffer was already 0, the buffer after the data pointer of the skbuff is now only total_size - len(fragment_header) large. When the merge function is then processing the remaining fragments, the code to copy the data over to the merged skbuff will cause an skb_over_panic when it tries to actually put enough data to fill the total_size bytes of the packet. The size of the skb_pull must therefore also be taken into account when the buffer's tailroom is expanded. Fixes: 610bfc6bc99b ("batman-adv: Receive fragmented packets and merge") Reported-by: Martin Weinelt Co-authored-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin --- net/batman-adv/fragmentation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index b6abd19ab23e..c6d37d22bd12 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -274,7 +274,7 @@ batadv_frag_merge_packets(struct hlist_head *chain) kfree(entry); packet = (struct batadv_frag_packet *)skb_out->data; - size = ntohs(packet->total_size); + size = ntohs(packet->total_size) + hdr_size; /* Make room for the rest of the fragments. */ if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) { From 41cb057690134a335b689be7e8db6d46db54a8ed Mon Sep 17 00:00:00 2001 From: Filippo Sironi Date: Mon, 12 Nov 2018 12:26:30 +0000 Subject: [PATCH 0916/2608] amd/iommu: Fix Guest Virtual APIC Log Tail Address Register [ Upstream commit ab99be4683d9db33b100497d463274ebd23bd67e ] This register should have been programmed with the physical address of the memory location containing the shadow tail pointer for the guest virtual APIC log instead of the base address. Fixes: 8bda0cfbdc1a ('iommu/amd: Detect and initialize guest vAPIC log') Signed-off-by: Filippo Sironi Signed-off-by: Wei Wang Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 6fe2d0346073..b97984a5ddad 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -796,7 +796,8 @@ static int iommu_init_ga_log(struct amd_iommu *iommu) entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, &entry, sizeof(entry)); - entry = (iommu_virt_to_phys(iommu->ga_log) & 0xFFFFFFFFFFFFFULL) & ~7ULL; + entry = (iommu_virt_to_phys(iommu->ga_log_tail) & + (BIT_ULL(52)-1)) & ~7ULL; memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, &entry, sizeof(entry)); writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); From fd358f42a60d85798b6d4715cd936b438fa2cc3e Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Sun, 11 Nov 2018 18:27:34 -0800 Subject: [PATCH 0917/2608] bnx2x: Assign unique DMAE channel number for FW DMAE transactions. [ Upstream commit 77e461d14ed141253573eeeb4d34eccc51e38328 ] Driver assigns DMAE channel 0 for FW as part of START_RAMROD command. FW uses this channel for DMAE operations (e.g., TIME_SYNC implementation). Driver also uses the same channel 0 for DMAE operations for some of the PFs (e.g., PF0 on Port0). This could lead to concurrent access to the DMAE channel by FW and driver which is not legal. Hence need to assign unique DMAE id for FW. Currently following DMAE channels are used by the clients, MFW - OCBB/OCSD functionality uses DMAE channel 14/15 Driver 0-3 and 8-11 (for PF dmae operations) 4 and 12 (for stats requests) Assigning unique dmae_id '13' to the FW. Changes from previous version: ------------------------------ v2: Incorporated the review comments. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 7 +++++++ drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 828e2e56b75e..1b7f4342dab9 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -2187,6 +2187,13 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id, #define PMF_DMAE_C(bp) (BP_PORT(bp) * MAX_DMAE_C_PER_PORT + \ E1HVN_MAX) +/* Following is the DMAE channel number allocation for the clients. + * MFW: OCBB/OCSD implementations use DMAE channels 14/15 respectively. + * Driver: 0-3 and 8-11 (for PF dmae operations) + * 4 and 12 (for stats requests) + */ +#define BNX2X_FW_DMAE_C 13 /* Channel for FW DMAE operations */ + /* PCIE link and speed */ #define PCICFG_LINK_WIDTH 0x1f00000 #define PCICFG_LINK_WIDTH_SHIFT 20 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index 8baf9d3eb4b1..453bfd83a070 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -6149,6 +6149,7 @@ static inline int bnx2x_func_send_start(struct bnx2x *bp, rdata->sd_vlan_tag = cpu_to_le16(start_params->sd_vlan_tag); rdata->path_id = BP_PATH(bp); rdata->network_cos_mode = start_params->network_cos_mode; + rdata->dmae_cmd_id = BNX2X_FW_DMAE_C; rdata->vxlan_dst_port = cpu_to_le16(start_params->vxlan_dst_port); rdata->geneve_dst_port = cpu_to_le16(start_params->geneve_dst_port); From 0f1c847db352a87f9974aa1869ca3e6f26b621f7 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Mon, 12 Nov 2018 12:50:20 +0200 Subject: [PATCH 0918/2608] qed: Fix PTT leak in qed_drain() [ Upstream commit 9aaa4e8ba12972d674caeefbc5f88d83235dd697 ] Release PTT before entering error flow. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 954f7ce4cf28..ecc2d4296526 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1561,9 +1561,9 @@ static int qed_drain(struct qed_dev *cdev) return -EBUSY; } rc = qed_mcp_drain(hwfn, ptt); + qed_ptt_release(hwfn, ptt); if (rc) return rc; - qed_ptt_release(hwfn, ptt); } return 0; From 93663e617b8fdcaceb6f407af6e494df2d5c7067 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Mon, 12 Nov 2018 12:50:23 +0200 Subject: [PATCH 0919/2608] qed: Fix reading wrong value in loop condition [ Upstream commit ed4eac20dcffdad47709422e0cb925981b056668 ] The value of "sb_index" is written by the hardware. Reading its value and writing it to "index" must finish before checking the loop condition. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_int.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 719cdbfe1695..7746417130bd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -992,6 +992,8 @@ static int qed_int_attentions(struct qed_hwfn *p_hwfn) */ do { index = p_sb_attn->sb_index; + /* finish reading index before the loop condition */ + dma_rmb(); attn_bits = le32_to_cpu(p_sb_attn->atten_bits); attn_acks = le32_to_cpu(p_sb_attn->atten_ack); } while (index != p_sb_attn->sb_index); From 65d9e9821b86155dc810ff08c12cc29c7fabbca9 Mon Sep 17 00:00:00 2001 From: Shen Jing Date: Thu, 1 Nov 2018 15:35:17 +0530 Subject: [PATCH 0920/2608] Revert "usb: gadget: ffs: Fix BUG when userland exits with submitted AIO transfers" [ Upstream commit a9c859033f6ec772f8e3228c343bb1321584ae0e ] This reverts commit b4194da3f9087dd38d91b40f9bec42d59ce589a8 since it causes list corruption followed by kernel panic: Workqueue: adb ffs_aio_cancel_worker RIP: 0010:__list_add_valid+0x4d/0x70 Call Trace: insert_work+0x47/0xb0 __queue_work+0xf6/0x400 queue_work_on+0x65/0x70 dwc3_gadget_giveback+0x44/0x50 [dwc3] dwc3_gadget_ep_dequeue+0x83/0x2d0 [dwc3] ? finish_wait+0x80/0x80 usb_ep_dequeue+0x1e/0x90 process_one_work+0x18c/0x3b0 worker_thread+0x3c/0x390 ? process_one_work+0x3b0/0x3b0 kthread+0x11e/0x140 ? kthread_create_worker_on_cpu+0x70/0x70 ret_from_fork+0x3a/0x50 This issue is seen with warm reboot stability testing. Signed-off-by: Shen Jing Signed-off-by: Saranya Gopal Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/f_fs.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 17467545391b..52e6897fa35a 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -219,7 +219,6 @@ struct ffs_io_data { struct mm_struct *mm; struct work_struct work; - struct work_struct cancellation_work; struct usb_ep *ep; struct usb_request *req; @@ -1074,31 +1073,22 @@ ffs_epfile_open(struct inode *inode, struct file *file) return 0; } -static void ffs_aio_cancel_worker(struct work_struct *work) -{ - struct ffs_io_data *io_data = container_of(work, struct ffs_io_data, - cancellation_work); - - ENTER(); - - usb_ep_dequeue(io_data->ep, io_data->req); -} - static int ffs_aio_cancel(struct kiocb *kiocb) { struct ffs_io_data *io_data = kiocb->private; - struct ffs_data *ffs = io_data->ffs; + struct ffs_epfile *epfile = kiocb->ki_filp->private_data; int value; ENTER(); - if (likely(io_data && io_data->ep && io_data->req)) { - INIT_WORK(&io_data->cancellation_work, ffs_aio_cancel_worker); - queue_work(ffs->io_completion_wq, &io_data->cancellation_work); - value = -EINPROGRESS; - } else { + spin_lock_irq(&epfile->ffs->eps_lock); + + if (likely(io_data && io_data->ep && io_data->req)) + value = usb_ep_dequeue(io_data->ep, io_data->req); + else value = -EINVAL; - } + + spin_unlock_irq(&epfile->ffs->eps_lock); return value; } From 471b2b9c525424e5213b4d6084ed1d12758ce8c8 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 15 Nov 2018 18:05:13 +0200 Subject: [PATCH 0921/2608] net/mlx4_core: Zero out lkey field in SW2HW_MPT fw command [ Upstream commit bd85fbc2038a1bbe84990b23ff69b6fc81a32b2c ] When re-registering a user mr, the mpt information for the existing mr when running SRIOV is obtained via the QUERY_MPT fw command. The returned information includes the mpt's lkey. This retrieved mpt information is used to move the mpt back to hardware ownership in the rereg flow (via the SW2HW_MPT fw command when running SRIOV). The fw API spec states that for SW2HW_MPT, the lkey field must be zero. Any ConnectX-3 PF driver which checks for strict spec adherence will return failure for SW2HW_MPT if the lkey field is not zero (although the fw in practice ignores this field for SW2HW_MPT). Thus, in order to conform to the fw API spec, set the lkey field to zero before invoking SW2HW_MPT when running SRIOV. Fixes: e630664c8383 ("mlx4_core: Add helper functions to support MR re-registration") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx4/mr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index c7c0764991c9..20043f82c1d8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -363,6 +363,7 @@ int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, container_of((void *)mpt_entry, struct mlx4_cmd_mailbox, buf); + (*mpt_entry)->lkey = 0; err = mlx4_SW2HW_MPT(dev, mailbox, key); } From e6e5d3dda0ea79e4c9a52ed2031ed832713f3e99 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 15 Nov 2018 18:05:14 +0200 Subject: [PATCH 0922/2608] net/mlx4_core: Fix uninitialized variable compilation warning [ Upstream commit 3ea7e7ea53c9f6ee41cb69a29c375fe9dd9a56a7 ] Initialize the uid variable to zero to avoid the compilation warning. Fixes: 7a89399ffad7 ("net/mlx4: Add mlx4_bitmap zone allocator") Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx4/alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index 6dabd983e7e0..94f4dc4a77e9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -337,7 +337,7 @@ void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc) static u32 __mlx4_alloc_from_zone(struct mlx4_zone_entry *zone, int count, int align, u32 skip_mask, u32 *puid) { - u32 uid; + u32 uid = 0; u32 res; struct mlx4_zone_allocator *zone_alloc = zone->allocator; struct mlx4_zone_entry *curr_node; From 934f4965b46b99885e31b89fbf3240cb034c0959 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 15 Nov 2018 18:05:15 +0200 Subject: [PATCH 0923/2608] net/mlx4: Fix UBSAN warning of signed integer overflow [ Upstream commit a463146e67c848cbab5ce706d6528281b7cded08 ] UBSAN: Undefined behavior in drivers/net/ethernet/mellanox/mlx4/resource_tracker.c:626:29 signed integer overflow: 1802201963 + 1802201963 cannot be represented in type 'int' The union of res_reserved and res_port_rsvd[MLX4_MAX_PORTS] monitors granting of reserved resources. The grant operation is calculated and protected, thus both members of the union cannot be negative. Changed type of res_reserved and of res_port_rsvd[MLX4_MAX_PORTS] from signed int to unsigned int, allowing large value. Fixes: 5a0d0a6161ae ("mlx4: Structures and init/teardown for VF resource quotas") Signed-off-by: Aya Levin Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index c68da1986e51..aaeb446bba62 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -541,8 +541,8 @@ struct slave_list { struct resource_allocator { spinlock_t alloc_lock; /* protect quotas */ union { - int res_reserved; - int res_port_rsvd[MLX4_MAX_PORTS]; + unsigned int res_reserved; + unsigned int res_port_rsvd[MLX4_MAX_PORTS]; }; union { int res_free; From 6c65ce92019b50861cd0b1763da8ea8217d6a420 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 8 Nov 2018 17:52:53 +0100 Subject: [PATCH 0924/2608] gpio: mockup: fix indicated direction [ Upstream commit bff466bac59994cfcceabe4d0be5fdc1c20cd5b8 ] Commit 3edfb7bd76bd ("gpiolib: Show correct direction from the beginning") fixed an existing issue but broke libgpiod tests by changing the default direction of dummy lines to output. We don't break user-space so make gpio-mockup behave as before. Signed-off-by: Bartosz Golaszewski Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-mockup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c index 9532d86a82f7..d99c8d8da9a0 100644 --- a/drivers/gpio/gpio-mockup.c +++ b/drivers/gpio/gpio-mockup.c @@ -35,8 +35,8 @@ #define GPIO_MOCKUP_MAX_RANGES (GPIO_MOCKUP_MAX_GC * 2) enum { - GPIO_MOCKUP_DIR_OUT = 0, - GPIO_MOCKUP_DIR_IN = 1, + GPIO_MOCKUP_DIR_IN = 0, + GPIO_MOCKUP_DIR_OUT = 1, }; /* @@ -112,7 +112,7 @@ static int gpio_mockup_get_direction(struct gpio_chip *gc, unsigned int offset) { struct gpio_mockup_chip *chip = gpiochip_get_data(gc); - return chip->lines[offset].dir; + return !chip->lines[offset].dir; } static int gpio_mockup_name_lines(struct device *dev, From 60adf764a94f393d9ec56e05d976987d4d3cfdd3 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Fri, 16 Nov 2018 19:43:27 -0800 Subject: [PATCH 0925/2608] mtd: rawnand: qcom: Namespace prefix some commands [ Upstream commit 33bf5519ae5dd356b182a94e3622f42860274a38 ] PAGE_READ is used by RISC-V arch code included through mm headers, and it makes sense to bring in a prefix on these in the driver. drivers/mtd/nand/raw/qcom_nandc.c:153: warning: "PAGE_READ" redefined #define PAGE_READ 0x2 In file included from include/linux/memremap.h:7, from include/linux/mm.h:27, from include/linux/scatterlist.h:8, from include/linux/dma-mapping.h:11, from drivers/mtd/nand/raw/qcom_nandc.c:17: arch/riscv/include/asm/pgtable.h:48: note: this is the location of the previous definition Caught by riscv allmodconfig. Signed-off-by: Olof Johansson Reviewed-by: Miquel Raynal Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin --- drivers/mtd/nand/qcom_nandc.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c index b49ca02b399d..09d5f7df6023 100644 --- a/drivers/mtd/nand/qcom_nandc.c +++ b/drivers/mtd/nand/qcom_nandc.c @@ -149,15 +149,15 @@ #define NAND_VERSION_MINOR_SHIFT 16 /* NAND OP_CMDs */ -#define PAGE_READ 0x2 -#define PAGE_READ_WITH_ECC 0x3 -#define PAGE_READ_WITH_ECC_SPARE 0x4 -#define PROGRAM_PAGE 0x6 -#define PAGE_PROGRAM_WITH_ECC 0x7 -#define PROGRAM_PAGE_SPARE 0x9 -#define BLOCK_ERASE 0xa -#define FETCH_ID 0xb -#define RESET_DEVICE 0xd +#define OP_PAGE_READ 0x2 +#define OP_PAGE_READ_WITH_ECC 0x3 +#define OP_PAGE_READ_WITH_ECC_SPARE 0x4 +#define OP_PROGRAM_PAGE 0x6 +#define OP_PAGE_PROGRAM_WITH_ECC 0x7 +#define OP_PROGRAM_PAGE_SPARE 0x9 +#define OP_BLOCK_ERASE 0xa +#define OP_FETCH_ID 0xb +#define OP_RESET_DEVICE 0xd /* Default Value for NAND_DEV_CMD_VLD */ #define NAND_DEV_CMD_VLD_VAL (READ_START_VLD | WRITE_START_VLD | \ @@ -629,11 +629,11 @@ static void update_rw_regs(struct qcom_nand_host *host, int num_cw, bool read) if (read) { if (host->use_ecc) - cmd = PAGE_READ_WITH_ECC | PAGE_ACC | LAST_PAGE; + cmd = OP_PAGE_READ_WITH_ECC | PAGE_ACC | LAST_PAGE; else - cmd = PAGE_READ | PAGE_ACC | LAST_PAGE; + cmd = OP_PAGE_READ | PAGE_ACC | LAST_PAGE; } else { - cmd = PROGRAM_PAGE | PAGE_ACC | LAST_PAGE; + cmd = OP_PROGRAM_PAGE | PAGE_ACC | LAST_PAGE; } if (host->use_ecc) { @@ -1030,7 +1030,7 @@ static int nandc_param(struct qcom_nand_host *host) * in use. we configure the controller to perform a raw read of 512 * bytes to read onfi params */ - nandc_set_reg(nandc, NAND_FLASH_CMD, PAGE_READ | PAGE_ACC | LAST_PAGE); + nandc_set_reg(nandc, NAND_FLASH_CMD, OP_PAGE_READ | PAGE_ACC | LAST_PAGE); nandc_set_reg(nandc, NAND_ADDR0, 0); nandc_set_reg(nandc, NAND_ADDR1, 0); nandc_set_reg(nandc, NAND_DEV0_CFG0, 0 << CW_PER_PAGE @@ -1084,7 +1084,7 @@ static int erase_block(struct qcom_nand_host *host, int page_addr) struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip); nandc_set_reg(nandc, NAND_FLASH_CMD, - BLOCK_ERASE | PAGE_ACC | LAST_PAGE); + OP_BLOCK_ERASE | PAGE_ACC | LAST_PAGE); nandc_set_reg(nandc, NAND_ADDR0, page_addr); nandc_set_reg(nandc, NAND_ADDR1, 0); nandc_set_reg(nandc, NAND_DEV0_CFG0, @@ -1115,7 +1115,7 @@ static int read_id(struct qcom_nand_host *host, int column) if (column == -1) return 0; - nandc_set_reg(nandc, NAND_FLASH_CMD, FETCH_ID); + nandc_set_reg(nandc, NAND_FLASH_CMD, OP_FETCH_ID); nandc_set_reg(nandc, NAND_ADDR0, column); nandc_set_reg(nandc, NAND_ADDR1, 0); nandc_set_reg(nandc, NAND_FLASH_CHIP_SELECT, @@ -1136,7 +1136,7 @@ static int reset(struct qcom_nand_host *host) struct nand_chip *chip = &host->chip; struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip); - nandc_set_reg(nandc, NAND_FLASH_CMD, RESET_DEVICE); + nandc_set_reg(nandc, NAND_FLASH_CMD, OP_RESET_DEVICE); nandc_set_reg(nandc, NAND_EXEC_CMD, 1); write_reg_dma(nandc, NAND_FLASH_CMD, 1, NAND_BAM_NEXT_SGL); From 16f2433ede2374060ccb79767fcd3a69cde2a22f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 14 Nov 2018 05:35:20 +0000 Subject: [PATCH 0926/2608] HID: multitouch: Add pointstick support for Cirque Touchpad [ Upstream commit 12d43aacf9a74d0eb66fd0ea54ebeb79ca28940f ] Cirque Touchpad/Pointstick combo is similar to Alps devices, it requires MT_CLS_WIN_8_DUAL to expose its pointstick as a mouse. Signed-off-by: Kai-Heng Feng Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/hid-multitouch.c | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 87904d2adadb..fcc688df694c 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -266,6 +266,9 @@ #define USB_VENDOR_ID_CIDC 0x1677 +#define I2C_VENDOR_ID_CIRQUE 0x0488 +#define I2C_PRODUCT_ID_CIRQUE_121F 0x121F + #define USB_VENDOR_ID_CJTOUCH 0x24b8 #define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0020 0x0020 #define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0040 0x0040 diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index c3b9bd5dba75..07d92d4a9f7c 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1474,6 +1474,12 @@ static const struct hid_device_id mt_devices[] = { MT_USB_DEVICE(USB_VENDOR_ID_CHUNGHWAT, USB_DEVICE_ID_CHUNGHWAT_MULTITOUCH) }, + /* Cirque devices */ + { .driver_data = MT_CLS_WIN_8_DUAL, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + I2C_VENDOR_ID_CIRQUE, + I2C_PRODUCT_ID_CIRQUE_121F) }, + /* CJTouch panels */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_CJTOUCH, From 4b4c6714a19792f616fc03696594dafbb5a9b9a1 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Fri, 16 Nov 2018 08:25:49 -0600 Subject: [PATCH 0927/2608] mtd: spi-nor: Fix Cadence QSPI page fault kernel panic [ Upstream commit a6a66f80c85e8e20573ca03fabf32445954a88d5 ] The current Cadence QSPI driver caused a kernel panic sporadically when writing to QSPI. The problem was caused by writing more bytes than needed because the QSPI operated on 4 bytes at a time. [ 11.202044] Unable to handle kernel paging request at virtual address bffd3000 [ 11.209254] pgd = e463054d [ 11.211948] [bffd3000] *pgd=2fffb811, *pte=00000000, *ppte=00000000 [ 11.218202] Internal error: Oops: 7 [#1] SMP ARM [ 11.222797] Modules linked in: [ 11.225844] CPU: 1 PID: 1317 Comm: systemd-hwdb Not tainted 4.17.7-d0c45cd44a8f [ 11.235796] Hardware name: Altera SOCFPGA Arria10 [ 11.240487] PC is at __raw_writesl+0x70/0xd4 [ 11.244741] LR is at cqspi_write+0x1a0/0x2cc On a page boundary limit the number of bytes copied from the tx buffer to remain within the page. This patch uses a temporary buffer to hold the 4 bytes to write and then copies only the bytes required from the tx buffer. Reported-by: Adrian Amborzewicz Signed-off-by: Thor Thayer Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin --- drivers/mtd/spi-nor/cadence-quadspi.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c index 8d89204b90d2..f22dd34f4f83 100644 --- a/drivers/mtd/spi-nor/cadence-quadspi.c +++ b/drivers/mtd/spi-nor/cadence-quadspi.c @@ -625,9 +625,23 @@ static int cqspi_indirect_write_execute(struct spi_nor *nor, reg_base + CQSPI_REG_INDIRECTWR); while (remaining > 0) { + size_t write_words, mod_bytes; + write_bytes = remaining > page_size ? page_size : remaining; - iowrite32_rep(cqspi->ahb_base, txbuf, - DIV_ROUND_UP(write_bytes, 4)); + write_words = write_bytes / 4; + mod_bytes = write_bytes % 4; + /* Write 4 bytes at a time then single bytes. */ + if (write_words) { + iowrite32_rep(cqspi->ahb_base, txbuf, write_words); + txbuf += (write_words * 4); + } + if (mod_bytes) { + unsigned int temp = 0xFFFFFFFF; + + memcpy(&temp, txbuf, mod_bytes); + iowrite32(temp, cqspi->ahb_base); + txbuf += mod_bytes; + } ret = wait_for_completion_timeout(&cqspi->transfer_complete, msecs_to_jiffies @@ -638,7 +652,6 @@ static int cqspi_indirect_write_execute(struct spi_nor *nor, goto failwr; } - txbuf += write_bytes; remaining -= write_bytes; if (remaining > 0) From 9dded0ffdeb7fa10698d534978892b406b60c91e Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Mon, 19 Nov 2018 16:28:30 +0200 Subject: [PATCH 0928/2608] qed: Fix bitmap_weight() check [ Upstream commit 276d43f0ae963312c0cd0e2b9a85fd11ac65dfcc ] Fix the condition which verifies that only one flag is set. The API bitmap_weight() should receive size in bits instead of bytes. Fixes: b5a9ee7cf3be ("qed: Revise QM cofiguration") Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index ef2374699726..a51cd1028ecb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -440,8 +440,11 @@ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn, struct qed_qm_info *qm_info = &p_hwfn->qm_info; /* Can't have multiple flags set here */ - if (bitmap_weight((unsigned long *)&pq_flags, sizeof(pq_flags)) > 1) + if (bitmap_weight((unsigned long *)&pq_flags, + sizeof(pq_flags) * BITS_PER_BYTE) > 1) { + DP_ERR(p_hwfn, "requested multiple pq flags 0x%x\n", pq_flags); goto err; + } switch (pq_flags) { case PQ_FLAGS_RLS: From f7a6fd087a300dabc4dbde84b1f2d4d0ac5173e0 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Mon, 19 Nov 2018 16:28:31 +0200 Subject: [PATCH 0929/2608] qed: Fix QM getters to always return a valid pq [ Upstream commit eb62cca9bee842e5b23bd0ddfb1f271ca95e8759 ] The getter callers doesn't know the valid Physical Queues (PQ) values. This patch makes sure that a valid PQ will always be returned. The patch consists of 3 fixes: - When qed_init_qm_get_idx_from_flags() receives a disabled flag, it returned PQ 0, which can potentially be another function's pq. Verify that flag is enabled, otherwise return default start_pq. - When qed_init_qm_get_idx_from_flags() receives an unknown flag, it returned NULL and could lead to a segmentation fault. Return default start_pq instead. - A modulo operation was added to MCOS/VFS PQ getters to make sure the PQ returned is in range of the required flag. Fixes: b5a9ee7cf3be ("qed: Revise QM cofiguration") Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 24 +++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index a51cd1028ecb..16953c4ebd71 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -446,6 +446,11 @@ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn, goto err; } + if (!(qed_get_pq_flags(p_hwfn) & pq_flags)) { + DP_ERR(p_hwfn, "pq flag 0x%x is not set\n", pq_flags); + goto err; + } + switch (pq_flags) { case PQ_FLAGS_RLS: return &qm_info->first_rl_pq; @@ -468,8 +473,7 @@ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn, } err: - DP_ERR(p_hwfn, "BAD pq flags %d\n", pq_flags); - return NULL; + return &qm_info->start_pq; } /* save pq index in qm info */ @@ -493,20 +497,32 @@ u16 qed_get_cm_pq_idx_mcos(struct qed_hwfn *p_hwfn, u8 tc) { u8 max_tc = qed_init_qm_get_num_tcs(p_hwfn); + if (max_tc == 0) { + DP_ERR(p_hwfn, "pq with flag 0x%lx do not exist\n", + PQ_FLAGS_MCOS); + return p_hwfn->qm_info.start_pq; + } + if (tc > max_tc) DP_ERR(p_hwfn, "tc %d must be smaller than %d\n", tc, max_tc); - return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_MCOS) + tc; + return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_MCOS) + (tc % max_tc); } u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf) { u16 max_vf = qed_init_qm_get_num_vfs(p_hwfn); + if (max_vf == 0) { + DP_ERR(p_hwfn, "pq with flag 0x%lx do not exist\n", + PQ_FLAGS_VFS); + return p_hwfn->qm_info.start_pq; + } + if (vf > max_vf) DP_ERR(p_hwfn, "vf %d must be smaller than %d\n", vf, max_vf); - return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_VFS) + vf; + return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_VFS) + (vf % max_vf); } u16 qed_get_cm_pq_idx_rl(struct qed_hwfn *p_hwfn, u8 rl) From d0decb8d7cbb225336e043a63e9a8c72498834fd Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Wed, 21 Nov 2018 09:38:11 +0800 Subject: [PATCH 0930/2608] net: faraday: ftmac100: remove netif_running(netdev) check before disabling interrupts [ Upstream commit 426a593e641ebf0d9288f0a2fcab644a86820220 ] In the original ftmac100_interrupt(), the interrupts are only disabled when the condition "netif_running(netdev)" is true. However, this condition causes kerenl hang in the following case. When the user requests to disable the network device, kernel will clear the bit __LINK_STATE_START from the dev->state and then call the driver's ndo_stop function. Network device interrupts are not blocked during this process. If an interrupt occurs between clearing __LINK_STATE_START and stopping network device, kernel cannot disable the interrupts due to the condition "netif_running(netdev)" in the ISR. Hence, kernel will hang due to the continuous interruption of the network device. In order to solve the above problem, the interrupts of the network device should always be disabled in the ISR without being restricted by the condition "netif_running(netdev)". [V2] Remove unnecessary curly braces. Signed-off-by: Vincent Chen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/faraday/ftmac100.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index 66928a922824..415fd93e9930 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -870,11 +870,10 @@ static irqreturn_t ftmac100_interrupt(int irq, void *dev_id) struct net_device *netdev = dev_id; struct ftmac100 *priv = netdev_priv(netdev); - if (likely(netif_running(netdev))) { - /* Disable interrupts for polling */ - ftmac100_disable_all_int(priv); + /* Disable interrupts for polling */ + ftmac100_disable_all_int(priv); + if (likely(netif_running(netdev))) napi_schedule(&priv->napi); - } return IRQ_HANDLED; } From 84f49bb07f9d8f48891976229a6eaed9e1a4d6ca Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 21 Nov 2018 17:53:47 +0800 Subject: [PATCH 0931/2608] iommu/vt-d: Use memunmap to free memremap [ Upstream commit 829383e183728dec7ed9150b949cd6de64127809 ] memunmap() should be used to free the return of memremap(), not iounmap(). Fixes: dfddb969edf0 ('iommu/vt-d: Switch from ioremap_cache to memremap') Signed-off-by: Pan Bian Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index aaf3fed97477..e86c1c8ec7f6 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3086,7 +3086,7 @@ static int copy_context_table(struct intel_iommu *iommu, } if (old_ce) - iounmap(old_ce); + memunmap(old_ce); ret = 0; if (devfn < 0x80) From dab45f4d03f01e8d6612803518c225bf859f796c Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Wed, 21 Nov 2018 12:25:41 +0100 Subject: [PATCH 0932/2608] flexfiles: use per-mirror specified stateid for IO [ Upstream commit bb21ce0ad227b69ec0f83279297ee44232105d96 ] rfc8435 says: For tight coupling, ffds_stateid provides the stateid to be used by the client to access the file. However current implementation replaces per-mirror provided stateid with by open or lock stateid. Ensure that per-mirror stateid is used by ff_layout_write_prepare_v4 and nfs4_ff_layout_prepare_ds. Signed-off-by: Tigran Mkrtchyan Signed-off-by: Rick Macklem Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/flexfilelayout/flexfilelayout.c | 21 +++++++++------------ fs/nfs/flexfilelayout/flexfilelayout.h | 4 ++++ fs/nfs/flexfilelayout/flexfilelayoutdev.c | 19 +++++++++++++++++++ 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index b0fa83a60754..13612a848378 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1365,12 +1365,7 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) task)) return; - if (ff_layout_read_prepare_common(task, hdr)) - return; - - if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, - hdr->args.lock_context, FMODE_READ) == -EIO) - rpc_exit(task, -EIO); /* lost lock, terminate I/O */ + ff_layout_read_prepare_common(task, hdr); } static void ff_layout_read_call_done(struct rpc_task *task, void *data) @@ -1539,12 +1534,7 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) task)) return; - if (ff_layout_write_prepare_common(task, hdr)) - return; - - if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, - hdr->args.lock_context, FMODE_WRITE) == -EIO) - rpc_exit(task, -EIO); /* lost lock, terminate I/O */ + ff_layout_write_prepare_common(task, hdr); } static void ff_layout_write_call_done(struct rpc_task *task, void *data) @@ -1734,6 +1724,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) fh = nfs4_ff_layout_select_ds_fh(lseg, idx); if (fh) hdr->args.fh = fh; + + if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) + goto out_failed; + /* * Note that if we ever decide to split across DSes, * then we may need to handle dense-like offsets. @@ -1796,6 +1790,9 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) if (fh) hdr->args.fh = fh; + if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) + goto out_failed; + /* * Note that if we ever decide to split across DSes, * then we may need to handle dense-like offsets. diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 679cb087ef3f..d6515f1584f3 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -214,6 +214,10 @@ unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, unsigned int maxnum); struct nfs_fh * nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx); +int +nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg, + u32 mirror_idx, + nfs4_stateid *stateid); struct nfs4_pnfs_ds * nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index d62279d3fc5d..9f69e83810ca 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -369,6 +369,25 @@ out: return fh; } +int +nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg, + u32 mirror_idx, + nfs4_stateid *stateid) +{ + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); + + if (!ff_layout_mirror_valid(lseg, mirror, false)) { + pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n", + __func__, mirror_idx); + goto out; + } + + nfs4_stateid_copy(stateid, &mirror->stateid); + return 1; +out: + return 0; +} + /** * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call * @lseg: the layout segment we're operating on From 1c1fbd4d2e9200f4b8a69d2b517e5708b0965a68 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 21 Nov 2018 11:17:58 -0600 Subject: [PATCH 0933/2608] ibmvnic: Fix RX queue buffer cleanup [ Upstream commit b7cdec3d699db2e5985ad39de0f25d3b6111928e ] The wrong index is used when cleaning up RX buffer objects during release of RX queues. Update to use the correct index counter. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5c7134ccc1fd..14c53ed5cca6 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -457,8 +457,8 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter) for (j = 0; j < rx_pool->size; j++) { if (rx_pool->rx_buff[j].skb) { - dev_kfree_skb_any(rx_pool->rx_buff[i].skb); - rx_pool->rx_buff[i].skb = NULL; + dev_kfree_skb_any(rx_pool->rx_buff[j].skb); + rx_pool->rx_buff[j].skb = NULL; } } From 63e307e81cffa11dbc8b513ee2b951ffa59fc61c Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 22 Nov 2018 16:15:28 +0800 Subject: [PATCH 0934/2608] team: no need to do team_notify_peers or team_mcast_rejoin when disabling port [ Upstream commit 5ed9dc99107144f83b6c1bb52a69b58875baf540 ] team_notify_peers() will send ARP and NA to notify peers. team_mcast_rejoin() will send multicast join group message to notify peers. We should do this when enabling/changed to a new port. But it doesn't make sense to do it when a port is disabled. On the other hand, when we set mcast_rejoin_count to 2, and do a failover, team_port_disable() will increase mcast_rejoin.count_pending to 2 and then team_port_enable() will increase mcast_rejoin.count_pending to 4. We will send 4 mcast rejoin messages at latest, which will make user confused. The same with notify_peers.count. Fix it by deleting team_notify_peers() and team_mcast_rejoin() in team_port_disable(). Reported-by: Liang Li Fixes: fc423ff00df3a ("team: add peer notification") Fixes: 492b200efdd20 ("team: add support for sending multicast rejoins") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/team/team.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 817451a1efd6..bd455a6cc82c 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -989,8 +989,6 @@ static void team_port_disable(struct team *team, team->en_port_count--; team_queue_override_port_del(team, port); team_adjust_ops(team); - team_notify_peers(team); - team_mcast_rejoin(team); team_lower_state_changed(port); } From ccbffb5d7dc88b7e7eecf504d8fa2a271f10ee16 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Thu, 22 Nov 2018 07:34:41 -0500 Subject: [PATCH 0935/2608] net: amd: add missing of_node_put() [ Upstream commit c44c749d3b6fdfca39002e7e48e03fe9f9fe37a3 ] of_find_node_by_path() acquires a reference to the node returned by it and that reference needs to be dropped by its caller. This place doesn't do that, so fix it. Signed-off-by: Yangtao Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/sunlance.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index 291ca5187f12..9845e07d40cd 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -1418,7 +1418,7 @@ static int sparc_lance_probe_one(struct platform_device *op, prop = of_get_property(nd, "tpe-link-test?", NULL); if (!prop) - goto no_link_test; + goto node_put; if (strcmp(prop, "true")) { printk(KERN_NOTICE "SunLance: warning: overriding option " @@ -1427,6 +1427,8 @@ static int sparc_lance_probe_one(struct platform_device *op, "to ecd@skynet.be\n"); auxio_set_lte(AUXIO_LTE_ON); } +node_put: + of_node_put(nd); no_link_test: lp->auto_select = 1; lp->tpe = 0; From 4515bbc4e284066d4e2f4a66eccf13813f6e7206 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 15 Nov 2017 17:38:37 -0800 Subject: [PATCH 0936/2608] mm: don't warn about allocations which stall for too long [ Upstream commit 400e22499dd92613821374c8c6c88c7225359980 ] Commit 63f53dea0c98 ("mm: warn about allocations which stall for too long") was a great step for reducing possibility of silent hang up problem caused by memory allocation stalls. But this commit reverts it, for it is possible to trigger OOM lockup and/or soft lockups when many threads concurrently called warn_alloc() (in order to warn about memory allocation stalls) due to current implementation of printk(), and it is difficult to obtain useful information due to limitation of synchronous warning approach. Current printk() implementation flushes all pending logs using the context of a thread which called console_unlock(). printk() should be able to flush all pending logs eventually unless somebody continues appending to printk() buffer. Since warn_alloc() started appending to printk() buffer while waiting for oom_kill_process() to make forward progress when oom_kill_process() is processing pending logs, it became possible for warn_alloc() to force oom_kill_process() loop inside printk(). As a result, warn_alloc() significantly increased possibility of preventing oom_kill_process() from making forward progress. ---------- Pseudo code start ---------- Before warn_alloc() was introduced: retry: if (mutex_trylock(&oom_lock)) { while (atomic_read(&printk_pending_logs) > 0) { atomic_dec(&printk_pending_logs); print_one_log(); } // Send SIGKILL here. mutex_unlock(&oom_lock) } goto retry; After warn_alloc() was introduced: retry: if (mutex_trylock(&oom_lock)) { while (atomic_read(&printk_pending_logs) > 0) { atomic_dec(&printk_pending_logs); print_one_log(); } // Send SIGKILL here. mutex_unlock(&oom_lock) } else if (waited_for_10seconds()) { atomic_inc(&printk_pending_logs); } goto retry; ---------- Pseudo code end ---------- Although waited_for_10seconds() becomes true once per 10 seconds, unbounded number of threads can call waited_for_10seconds() at the same time. Also, since threads doing waited_for_10seconds() keep doing almost busy loop, the thread doing print_one_log() can use little CPU resource. Therefore, this situation can be simplified like ---------- Pseudo code start ---------- retry: if (mutex_trylock(&oom_lock)) { while (atomic_read(&printk_pending_logs) > 0) { atomic_dec(&printk_pending_logs); print_one_log(); } // Send SIGKILL here. mutex_unlock(&oom_lock) } else { atomic_inc(&printk_pending_logs); } goto retry; ---------- Pseudo code end ---------- when printk() is called faster than print_one_log() can process a log. One of possible mitigation would be to introduce a new lock in order to make sure that no other series of printk() (either oom_kill_process() or warn_alloc()) can append to printk() buffer when one series of printk() (either oom_kill_process() or warn_alloc()) is already in progress. Such serialization will also help obtaining kernel messages in readable form. ---------- Pseudo code start ---------- retry: if (mutex_trylock(&oom_lock)) { mutex_lock(&oom_printk_lock); while (atomic_read(&printk_pending_logs) > 0) { atomic_dec(&printk_pending_logs); print_one_log(); } // Send SIGKILL here. mutex_unlock(&oom_printk_lock); mutex_unlock(&oom_lock) } else { if (mutex_trylock(&oom_printk_lock)) { atomic_inc(&printk_pending_logs); mutex_unlock(&oom_printk_lock); } } goto retry; ---------- Pseudo code end ---------- But this commit does not go that direction, for we don't want to introduce a new lock dependency, and we unlikely be able to obtain useful information even if we serialized oom_kill_process() and warn_alloc(). Synchronous approach is prone to unexpected results (e.g. too late [1], too frequent [2], overlooked [3]). As far as I know, warn_alloc() never helped with providing information other than "something is going wrong". I want to consider asynchronous approach which can obtain information during stalls with possibly relevant threads (e.g. the owner of oom_lock and kswapd-like threads) and serve as a trigger for actions (e.g. turn on/off tracepoints, ask libvirt daemon to take a memory dump of stalling KVM guest for diagnostic purpose). This commit temporarily loses ability to report e.g. OOM lockup due to unable to invoke the OOM killer due to !__GFP_FS allocation request. But asynchronous approach will be able to detect such situation and emit warning. Thus, let's remove warn_alloc(). [1] https://bugzilla.kernel.org/show_bug.cgi?id=192981 [2] http://lkml.kernel.org/r/CAM_iQpWuPVGc2ky8M-9yukECtS+zKjiDasNymX7rMcBjBFyM_A@mail.gmail.com [3] commit db73ee0d46379922 ("mm, vmscan: do not loop on too_many_isolated for ever")) Link: http://lkml.kernel.org/r/1509017339-4802-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Reported-by: Cong Wang Reported-by: yuwang.yuwang Reported-by: Johannes Weiner Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Vlastimil Babka Cc: Mel Gorman Cc: Dave Hansen Cc: Sergey Senozhatsky Cc: Petr Mladek Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/page_alloc.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2074f424dabf..6be91a1a00d9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3862,8 +3862,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, enum compact_result compact_result; int compaction_retries; int no_progress_loops; - unsigned long alloc_start = jiffies; - unsigned int stall_timeout = 10 * HZ; unsigned int cpuset_mems_cookie; int reserve_flags; @@ -3983,14 +3981,6 @@ retry: if (!can_direct_reclaim) goto nopage; - /* Make sure we know about allocations which stall for too long */ - if (time_after(jiffies, alloc_start + stall_timeout)) { - warn_alloc(gfp_mask & ~__GFP_NOWARN, ac->nodemask, - "page allocation stalls for %ums, order:%u", - jiffies_to_msecs(jiffies-alloc_start), order); - stall_timeout += 10 * HZ; - } - /* Avoid recursion of direct reclaim */ if (current->flags & PF_MEMALLOC) goto nopage; From d2afd22aff69bc448b1defc06fcd311479f336f5 Mon Sep 17 00:00:00 2001 From: Harry Pan Date: Thu, 29 Nov 2018 00:40:41 +0800 Subject: [PATCH 0937/2608] usb: quirk: add no-LPM quirk on SanDisk Ultra Flair device commit 2f2dde6ba89b1ef1fe23c1138131b315d9aa4019 upstream. Some lower volume SanDisk Ultra Flair in 16GB, which the VID:PID is in 0781:5591, will aggressively request LPM of U1/U2 during runtime, when using this thumb drive as the OS installation key we found the device will generate failure during U1 exit path making it dropped from the USB bus, this causes a corrupted installation in system at the end. i.e., [ 166.918296] hub 2-0:1.0: state 7 ports 7 chg 0000 evt 0004 [ 166.918327] usb usb2-port2: link state change [ 166.918337] usb usb2-port2: do warm reset [ 166.970039] usb usb2-port2: not warm reset yet, waiting 50ms [ 167.022040] usb usb2-port2: not warm reset yet, waiting 200ms [ 167.276043] usb usb2-port2: status 02c0, change 0041, 5.0 Gb/s [ 167.276050] usb 2-2: USB disconnect, device number 2 [ 167.276058] usb 2-2: unregistering device [ 167.276060] usb 2-2: unregistering interface 2-2:1.0 [ 167.276170] xhci_hcd 0000:00:15.0: shutdown urb ffffa3c7cc695cc0 ep1in-bulk [ 167.284055] sd 0:0:0:0: [sda] tag#0 FAILED Result: hostbyte=DID_NO_CONNECT driverbyte=DRIVER_OK [ 167.284064] sd 0:0:0:0: [sda] tag#0 CDB: Read(10) 28 00 00 33 04 90 00 01 00 00 ... Analyzed the USB trace in the link layer we realized it is because of the 6-ms timer of tRecoveryConfigurationTimeout which documented on the USB 3.2 Revision 1.0, the section 7.5.10.4.2 of "Exit from Recovery.Configuration"; device initiates U1 exit -> Recovery.Active -> Recovery.Configuration, then the host timer timeout makes the link transits to eSS.Inactive -> Rx.Detect follows by a Warm Reset. Interestingly, the other higher volume of SanDisk Ultra Flair sharing the same VID:PID, such as 64GB, would not request LPM during runtime, it sticks at U0 always, thus disabling LPM does not affect those thumb drives at all. The same odd occures in SanDisk Ultra Fit 16GB, VID:PID in 0781:5583. Signed-off-by: Harry Pan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 808437c5ec49..cf378b1ed373 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -188,6 +188,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Midiman M-Audio Keystation 88es */ { USB_DEVICE(0x0763, 0x0192), .driver_info = USB_QUIRK_RESET_RESUME }, + /* SanDisk Ultra Fit and Ultra Flair */ + { USB_DEVICE(0x0781, 0x5583), .driver_info = USB_QUIRK_NO_LPM }, + { USB_DEVICE(0x0781, 0x5591), .driver_info = USB_QUIRK_NO_LPM }, + /* M-Systems Flash Disk Pioneers */ { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME }, From 810487aa9d483262044c41fc1973c7abd95e1d86 Mon Sep 17 00:00:00 2001 From: Alexander Theissen Date: Tue, 4 Dec 2018 23:43:35 +0100 Subject: [PATCH 0938/2608] usb: appledisplay: Add 27" Apple Cinema Display commit d7859905301880ad3e16272399d26900af3ac496 upstream. Add another Apple Cinema Display to the list of supported displays. Signed-off-by: Alexander Theissen Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/appledisplay.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c index 288fe3e69d52..03be7c75c5be 100644 --- a/drivers/usb/misc/appledisplay.c +++ b/drivers/usb/misc/appledisplay.c @@ -64,6 +64,7 @@ static const struct usb_device_id appledisplay_table[] = { { APPLEDISPLAY_DEVICE(0x921c) }, { APPLEDISPLAY_DEVICE(0x921d) }, { APPLEDISPLAY_DEVICE(0x9222) }, + { APPLEDISPLAY_DEVICE(0x9226) }, { APPLEDISPLAY_DEVICE(0x9236) }, /* Terminating entry */ From 7b6e85da8d94948201abb8d576d485892a6a878f Mon Sep 17 00:00:00 2001 From: Mathias Payer Date: Wed, 5 Dec 2018 21:19:59 +0100 Subject: [PATCH 0939/2608] USB: check usb_get_extra_descriptor for proper size commit 704620afc70cf47abb9d6a1a57f3825d2bca49cf upstream. When reading an extra descriptor, we need to properly check the minimum and maximum size allowed, to prevent from invalid data being sent by a device. Reported-by: Hui Peng Reported-by: Mathias Payer Co-developed-by: Linus Torvalds Signed-off-by: Hui Peng Signed-off-by: Mathias Payer Signed-off-by: Linus Torvalds Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 2 +- drivers/usb/core/usb.c | 6 +++--- drivers/usb/host/hwa-hc.c | 2 +- include/linux/usb.h | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 638dc6f66d70..a073cb5be013 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2231,7 +2231,7 @@ static int usb_enumerate_device_otg(struct usb_device *udev) /* descriptor may appear anywhere in config */ err = __usb_get_extra_descriptor(udev->rawdescriptors[0], le16_to_cpu(udev->config[0].desc.wTotalLength), - USB_DT_OTG, (void **) &desc); + USB_DT_OTG, (void **) &desc, sizeof(*desc)); if (err || !(desc->bmAttributes & USB_OTG_HNP)) return 0; diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index f8b50eaf6d1e..7a4e3da549fe 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -833,14 +833,14 @@ EXPORT_SYMBOL_GPL(usb_get_current_frame_number); */ int __usb_get_extra_descriptor(char *buffer, unsigned size, - unsigned char type, void **ptr) + unsigned char type, void **ptr, size_t minsize) { struct usb_descriptor_header *header; while (size >= sizeof(struct usb_descriptor_header)) { header = (struct usb_descriptor_header *)buffer; - if (header->bLength < 2) { + if (header->bLength < 2 || header->bLength > size) { printk(KERN_ERR "%s: bogus descriptor, type %d length %d\n", usbcore_name, @@ -849,7 +849,7 @@ int __usb_get_extra_descriptor(char *buffer, unsigned size, return -1; } - if (header->bDescriptorType == type) { + if (header->bDescriptorType == type && header->bLength >= minsize) { *ptr = header; return 0; } diff --git a/drivers/usb/host/hwa-hc.c b/drivers/usb/host/hwa-hc.c index da3b18038d23..216069c396a0 100644 --- a/drivers/usb/host/hwa-hc.c +++ b/drivers/usb/host/hwa-hc.c @@ -654,7 +654,7 @@ static int hwahc_security_create(struct hwahc *hwahc) top = itr + itr_size; result = __usb_get_extra_descriptor(usb_dev->rawdescriptors[index], le16_to_cpu(usb_dev->actconfig->desc.wTotalLength), - USB_DT_SECURITY, (void **) &secd); + USB_DT_SECURITY, (void **) &secd, sizeof(*secd)); if (result == -1) { dev_warn(dev, "BUG? WUSB host has no security descriptors\n"); return 0; diff --git a/include/linux/usb.h b/include/linux/usb.h index 4192a1755ccb..8c7ba40cf021 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -407,11 +407,11 @@ struct usb_host_bos { }; int __usb_get_extra_descriptor(char *buffer, unsigned size, - unsigned char type, void **ptr); + unsigned char type, void **ptr, size_t min); #define usb_get_extra_descriptor(ifpoint, type, ptr) \ __usb_get_extra_descriptor((ifpoint)->extra, \ (ifpoint)->extralen, \ - type, (void **)ptr) + type, (void **)ptr, sizeof(**(ptr))) /* ----------------------------------------------------------------------- */ From 19f74e45746253cafb8cb1e773041e7cadbac622 Mon Sep 17 00:00:00 2001 From: Hui Peng Date: Mon, 3 Dec 2018 16:09:34 +0100 Subject: [PATCH 0940/2608] ALSA: usb-audio: Fix UAF decrement if card has no live interfaces in card.c commit 5f8cf712582617d523120df67d392059eaf2fc4b upstream. If a USB sound card reports 0 interfaces, an error condition is triggered and the function usb_audio_probe errors out. In the error path, there was a use-after-free vulnerability where the memory object of the card was first freed, followed by a decrement of the number of active chips. Moving the decrement above the atomic_dec fixes the UAF. [ The original problem was introduced in 3.1 kernel, while it was developed in a different form. The Fixes tag below indicates the original commit but it doesn't mean that the patch is applicable cleanly. -- tiwai ] Fixes: 362e4e49abe5 ("ALSA: usb-audio - clear chip->probing on error exit") Reported-by: Hui Peng Reported-by: Mathias Payer Signed-off-by: Hui Peng Signed-off-by: Mathias Payer Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/card.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index 23d1d23aefec..4169c71f8a32 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -644,9 +644,12 @@ static int usb_audio_probe(struct usb_interface *intf, __error: if (chip) { + /* chip->active is inside the chip->card object, + * decrement before memory is possibly returned. + */ + atomic_dec(&chip->active); if (!chip->num_interfaces) snd_card_free(chip->card); - atomic_dec(&chip->active); } mutex_unlock(®ister_mutex); return err; From 0618c71c73caa194f4d9ac2d42551ad6df047103 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 29 Nov 2018 08:57:37 +0000 Subject: [PATCH 0941/2608] ALSA: hda: Add support for AMD Stoney Ridge commit 3deef52ce10514ccdebba8e8ab85f9cebd0eb3f7 upstream. It's similar to other AMD audio devices, it also supports D3, which can save some power drain. Signed-off-by: Kai-Heng Feng Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 4e38905bc47d..d8e80b6f5a6b 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2513,6 +2513,10 @@ static const struct pci_device_id azx_ids[] = { /* AMD Hudson */ { PCI_DEVICE(0x1022, 0x780d), .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB }, + /* AMD Stoney */ + { PCI_DEVICE(0x1022, 0x157a), + .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB | + AZX_DCAPS_PM_RUNTIME }, /* AMD Raven */ { PCI_DEVICE(0x1022, 0x15e3), .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB | From 108ab9eafbbeffa17431cb1acd7a930d23bfd197 Mon Sep 17 00:00:00 2001 From: Chanho Min Date: Mon, 26 Nov 2018 14:36:37 +0900 Subject: [PATCH 0942/2608] ALSA: pcm: Fix starvation on down_write_nonblock() commit b888a5f713e4d17faaaff24316585a4eb07f35b7 upstream. Commit 67ec1072b053 ("ALSA: pcm: Fix rwsem deadlock for non-atomic PCM stream") fixes deadlock for non-atomic PCM stream. But, This patch causes antother stuck. If writer is RT thread and reader is a normal thread, the reader thread will be difficult to get scheduled. It may not give chance to release readlocks and writer gets stuck for a long time if they are pinned to single cpu. The deadlock described in the previous commit is because the linux rwsem queues like a FIFO. So, we might need non-FIFO writelock, not non-block one. My suggestion is that the writer gives reader a chance to be scheduled by using the minimum msleep() instaed of spinning without blocking by writer. Also, The *_nonblock may be changed to *_nonfifo appropriately to this concept. In terms of performance, when trylock is failed, this minimum periodic msleep will have the same performance as the tick-based schedule()/wake_up_q(). [ Although this has a fairly high performance penalty, the relevant code path became already rare due to the previous commit ("ALSA: pcm: Call snd_pcm_unlink() conditionally at closing"). That is, now this unconditional msleep appears only when using linked streams, and this must be a rare case. So we accept this as a quick workaround until finding a more suitable one -- tiwai ] Fixes: 67ec1072b053 ("ALSA: pcm: Fix rwsem deadlock for non-atomic PCM stream") Suggested-by: Wonmin Jung Signed-off-by: Chanho Min Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm_native.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index ab3bf36786b6..30784bd1c5af 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "pcm_local.h" @@ -91,12 +92,12 @@ static DECLARE_RWSEM(snd_pcm_link_rwsem); * and this may lead to a deadlock when the code path takes read sem * twice (e.g. one in snd_pcm_action_nonatomic() and another in * snd_pcm_stream_lock()). As a (suboptimal) workaround, let writer to - * spin until it gets the lock. + * sleep until all the readers are completed without blocking by writer. */ -static inline void down_write_nonblock(struct rw_semaphore *lock) +static inline void down_write_nonfifo(struct rw_semaphore *lock) { while (!down_write_trylock(lock)) - cond_resched(); + msleep(1); } /** @@ -1935,7 +1936,7 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd) res = -ENOMEM; goto _nolock; } - down_write_nonblock(&snd_pcm_link_rwsem); + down_write_nonfifo(&snd_pcm_link_rwsem); write_lock_irq(&snd_pcm_link_rwlock); if (substream->runtime->status->state == SNDRV_PCM_STATE_OPEN || substream->runtime->status->state != substream1->runtime->status->state || @@ -1982,7 +1983,7 @@ static int snd_pcm_unlink(struct snd_pcm_substream *substream) struct snd_pcm_substream *s; int res = 0; - down_write_nonblock(&snd_pcm_link_rwsem); + down_write_nonfifo(&snd_pcm_link_rwsem); write_lock_irq(&snd_pcm_link_rwlock); if (!snd_pcm_stream_linked(substream)) { res = -EALREADY; From cabb9539dc1741b2a5bf6077a4aac8156f07a1c8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 29 Nov 2018 08:02:49 +0100 Subject: [PATCH 0943/2608] ALSA: pcm: Call snd_pcm_unlink() conditionally at closing commit b51abed8355e5556886623b2772fa6b7598d2282 upstream. Currently the PCM core calls snd_pcm_unlink() always unconditionally at closing a stream. However, since snd_pcm_unlink() invokes the global rwsem down, the lock can be easily contended. More badly, when a thread runs in a high priority RT-FIFO, it may stall at spinning. Basically the call of snd_pcm_unlink() is required only for the linked streams that are already rare occasion. For normal use cases, this code path is fairly superfluous. As an optimization (and also as a workaround for the RT problem above in normal situations without linked streams), this patch adds a check before calling snd_pcm_unlink() and calls it only when needed. Reported-by: Chanho Min Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm_native.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 30784bd1c5af..966ac384c3f4 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -2338,7 +2338,8 @@ int snd_pcm_hw_constraints_complete(struct snd_pcm_substream *substream) static void pcm_release_private(struct snd_pcm_substream *substream) { - snd_pcm_unlink(substream); + if (snd_pcm_stream_linked(substream)) + snd_pcm_unlink(substream); } void snd_pcm_release_substream(struct snd_pcm_substream *substream) From 7852f54f08a0a7222235561fc57ccd83f1c358d4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 29 Nov 2018 12:05:19 +0100 Subject: [PATCH 0944/2608] ALSA: pcm: Fix interval evaluation with openmin/max commit 5363857b916c1f48027e9b96ee8be8376bf20811 upstream. As addressed in alsa-lib (commit b420056604f0), we need to fix the case where the evaluation of PCM interval "(x x+1]" leading to -EINVAL. After applying rules, such an interval may be translated as "(x x+1)". Fixes: ff2d6acdf6f1 ("ALSA: pcm: Fix snd_interval_refine first/last with open min/max") Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- include/sound/pcm_params.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/sound/pcm_params.h b/include/sound/pcm_params.h index c704357775fc..2af7bb3ee57d 100644 --- a/include/sound/pcm_params.h +++ b/include/sound/pcm_params.h @@ -247,11 +247,13 @@ static inline int snd_interval_empty(const struct snd_interval *i) static inline int snd_interval_single(const struct snd_interval *i) { return (i->min == i->max || - (i->min + 1 == i->max && i->openmax)); + (i->min + 1 == i->max && (i->openmin || i->openmax))); } static inline int snd_interval_value(const struct snd_interval *i) { + if (i->openmin && !i->openmax) + return i->max; return i->min; } From 01a02e66228b69b497136168716cb1ba2b5ebd73 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Dec 2018 10:44:15 +0100 Subject: [PATCH 0945/2608] ALSA: hda/realtek - Fix speaker output regression on Thinkpad T570 commit 54947cd64c1b8290f64bb2958e343c07270e3a58 upstream. We've got a regression report for some Thinkpad models (at least T570s) which shows the too low speaker output volume. The bisection leaded to the commit 61fcf8ece9b6 ("ALSA: hda/realtek - Enable Thinkpad Dock device for ALC298 platform"), and it's basically adding the two pin configurations for the dock, and looks harmless. The real culprit seems, though, that the DAC assignment for the speaker pin is implicitly assumed on these devices, i.e. pin NID 0x14 to be coupled with DAC NID 0x03. When more pins are configured by the commit above, the auto-parser changes the DAC assignment, and this resulted in the regression. As a workaround, just provide the fixed pin / DAC mapping table for this Thinkpad fixup function. It's no generic solution, but the problem itself is pretty much device-specific, so must be good enough. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1554304 Fixes: 61fcf8ece9b6 ("ALSA: hda/realtek - Enable Thinkpad Dock device for ALC298 platform") Cc: Reported-and-tested-by: Jeremy Cline Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 66b0a124beae..f6136f041a81 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4863,9 +4863,18 @@ static void alc_fixup_tpt470_dock(struct hda_codec *codec, { 0x19, 0x21a11010 }, /* dock mic */ { } }; + /* Assure the speaker pin to be coupled with DAC NID 0x03; otherwise + * the speaker output becomes too low by some reason on Thinkpads with + * ALC298 codec + */ + static hda_nid_t preferred_pairs[] = { + 0x14, 0x03, 0x17, 0x02, 0x21, 0x02, + 0 + }; struct alc_spec *spec = codec->spec; if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->gen.preferred_dacs = preferred_pairs; spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP; snd_hda_apply_pincfgs(codec, pincfgs); } else if (action == HDA_FIXUP_ACT_INIT) { From 73f6d525be2cded3bd3dd3bcfcfffec548a79e94 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Wed, 26 Sep 2018 18:48:29 +0200 Subject: [PATCH 0946/2608] virtio/s390: avoid race on vcdev->config commit 2448a299ec416a80f699940a86f4a6d9a4f643b1 upstream. Currently we have a race on vcdev->config in virtio_ccw_get_config() and in virtio_ccw_set_config(). This normally does not cause problems, as these are usually infrequent operations. However, for some devices writing to/reading from the config space can be triggered through sysfs attributes. For these, userspace can force the race by increasing the frequency. Signed-off-by: Halil Pasic Cc: stable@vger.kernel.org Message-Id: <20180925121309.58524-2-pasic@linux.ibm.com> Signed-off-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/virtio/virtio_ccw.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index b18fe2014cf2..3a1b553e7653 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -831,6 +831,7 @@ static void virtio_ccw_get_config(struct virtio_device *vdev, int ret; struct ccw1 *ccw; void *config_area; + unsigned long flags; ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); if (!ccw) @@ -849,11 +850,13 @@ static void virtio_ccw_get_config(struct virtio_device *vdev, if (ret) goto out_free; + spin_lock_irqsave(&vcdev->lock, flags); memcpy(vcdev->config, config_area, offset + len); - if (buf) - memcpy(buf, &vcdev->config[offset], len); if (vcdev->config_ready < offset + len) vcdev->config_ready = offset + len; + spin_unlock_irqrestore(&vcdev->lock, flags); + if (buf) + memcpy(buf, config_area + offset, len); out_free: kfree(config_area); @@ -867,6 +870,7 @@ static void virtio_ccw_set_config(struct virtio_device *vdev, struct virtio_ccw_device *vcdev = to_vc_device(vdev); struct ccw1 *ccw; void *config_area; + unsigned long flags; ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); if (!ccw) @@ -879,9 +883,11 @@ static void virtio_ccw_set_config(struct virtio_device *vdev, /* Make sure we don't overwrite fields. */ if (vcdev->config_ready < offset) virtio_ccw_get_config(vdev, 0, NULL, offset); + spin_lock_irqsave(&vcdev->lock, flags); memcpy(&vcdev->config[offset], buf, len); /* Write the config area to the host. */ memcpy(config_area, vcdev->config, sizeof(vcdev->config)); + spin_unlock_irqrestore(&vcdev->lock, flags); ccw->cmd_code = CCW_CMD_WRITE_CONF; ccw->flags = 0; ccw->count = offset + len; From 52b993b4c4c2a88b62601b0cc0662a564e1d65dc Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Wed, 26 Sep 2018 18:48:30 +0200 Subject: [PATCH 0947/2608] virtio/s390: fix race in ccw_io_helper() commit 78b1a52e05c9db11d293342e8d6d8a230a04b4e7 upstream. While ccw_io_helper() seems like intended to be exclusive in a sense that it is supposed to facilitate I/O for at most one thread at any given time, there is actually nothing ensuring that threads won't pile up at vcdev->wait_q. If they do, all threads get woken up and see the status that belongs to some other request than their own. This can lead to bugs. For an example see: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1788432 This race normally does not cause any problems. The operations provided by struct virtio_config_ops are usually invoked in a well defined sequence, normally don't fail, and are normally used quite infrequent too. Yet, if some of the these operations are directly triggered via sysfs attributes, like in the case described by the referenced bug, userspace is given an opportunity to force races by increasing the frequency of the given operations. Let us fix the problem by ensuring, that for each device, we finish processing the previous request before starting with a new one. Signed-off-by: Halil Pasic Reported-by: Colin Ian King Cc: stable@vger.kernel.org Message-Id: <20180925121309.58524-3-pasic@linux.ibm.com> Signed-off-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/virtio/virtio_ccw.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 3a1b553e7653..0847d05e138b 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -59,6 +59,7 @@ struct virtio_ccw_device { unsigned int revision; /* Transport revision */ wait_queue_head_t wait_q; spinlock_t lock; + struct mutex io_lock; /* Serializes I/O requests */ struct list_head virtqueues; unsigned long indicators; unsigned long indicators2; @@ -299,6 +300,7 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev, unsigned long flags; int flag = intparm & VIRTIO_CCW_INTPARM_MASK; + mutex_lock(&vcdev->io_lock); do { spin_lock_irqsave(get_ccwdev_lock(vcdev->cdev), flags); ret = ccw_device_start(vcdev->cdev, ccw, intparm, 0, 0); @@ -311,7 +313,9 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev, cpu_relax(); } while (ret == -EBUSY); wait_event(vcdev->wait_q, doing_io(vcdev, flag) == 0); - return ret ? ret : vcdev->err; + ret = ret ? ret : vcdev->err; + mutex_unlock(&vcdev->io_lock); + return ret; } static void virtio_ccw_drop_indicator(struct virtio_ccw_device *vcdev, @@ -1256,6 +1260,7 @@ static int virtio_ccw_online(struct ccw_device *cdev) init_waitqueue_head(&vcdev->wait_q); INIT_LIST_HEAD(&vcdev->virtqueues); spin_lock_init(&vcdev->lock); + mutex_init(&vcdev->io_lock); spin_lock_irqsave(get_ccwdev_lock(cdev), flags); dev_set_drvdata(&cdev->dev, vcdev); From f15c072d6576c5e2b693c22e39ccc9103c952078 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 5 Nov 2018 10:35:47 +0000 Subject: [PATCH 0948/2608] vhost/vsock: fix use-after-free in network stack callers commit 834e772c8db0c6a275d75315d90aba4ebbb1e249 upstream. If the network stack calls .send_pkt()/.cancel_pkt() during .release(), a struct vhost_vsock use-after-free is possible. This occurs because .release() does not wait for other CPUs to stop using struct vhost_vsock. Switch to an RCU-enabled hashtable (indexed by guest CID) so that .release() can wait for other CPUs by calling synchronize_rcu(). This also eliminates vhost_vsock_lock acquisition in the data path so it could have a positive effect on performance. This is CVE-2018-14625 "kernel: use-after-free Read in vhost_transport_send_pkt". Cc: stable@vger.kernel.org Reported-and-tested-by: syzbot+bd391451452fb0b93039@syzkaller.appspotmail.com Reported-by: syzbot+e3e074963495f92a89ed@syzkaller.appspotmail.com Reported-by: syzbot+d5a0a170c5069658b141@syzkaller.appspotmail.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vsock.c | 57 +++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index c9de9c41aa97..b044a0800805 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "vhost.h" @@ -27,14 +28,14 @@ enum { /* Used to track all the vhost_vsock instances on the system. */ static DEFINE_SPINLOCK(vhost_vsock_lock); -static LIST_HEAD(vhost_vsock_list); +static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8); struct vhost_vsock { struct vhost_dev dev; struct vhost_virtqueue vqs[2]; - /* Link to global vhost_vsock_list, protected by vhost_vsock_lock */ - struct list_head list; + /* Link to global vhost_vsock_hash, writes use vhost_vsock_lock */ + struct hlist_node hash; struct vhost_work send_pkt_work; spinlock_t send_pkt_list_lock; @@ -50,11 +51,14 @@ static u32 vhost_transport_get_local_cid(void) return VHOST_VSOCK_DEFAULT_HOST_CID; } -static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid) +/* Callers that dereference the return value must hold vhost_vsock_lock or the + * RCU read lock. + */ +static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) { struct vhost_vsock *vsock; - list_for_each_entry(vsock, &vhost_vsock_list, list) { + hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) { u32 other_cid = vsock->guest_cid; /* Skip instances that have no CID yet */ @@ -69,17 +73,6 @@ static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid) return NULL; } -static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) -{ - struct vhost_vsock *vsock; - - spin_lock_bh(&vhost_vsock_lock); - vsock = __vhost_vsock_get(guest_cid); - spin_unlock_bh(&vhost_vsock_lock); - - return vsock; -} - static void vhost_transport_do_send_pkt(struct vhost_vsock *vsock, struct vhost_virtqueue *vq) @@ -210,9 +203,12 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) struct vhost_vsock *vsock; int len = pkt->len; + rcu_read_lock(); + /* Find the vhost_vsock according to guest context id */ vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid)); if (!vsock) { + rcu_read_unlock(); virtio_transport_free_pkt(pkt); return -ENODEV; } @@ -225,6 +221,8 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) spin_unlock_bh(&vsock->send_pkt_list_lock); vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); + + rcu_read_unlock(); return len; } @@ -234,12 +232,15 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk) struct vhost_vsock *vsock; struct virtio_vsock_pkt *pkt, *n; int cnt = 0; + int ret = -ENODEV; LIST_HEAD(freeme); + rcu_read_lock(); + /* Find the vhost_vsock according to guest context id */ vsock = vhost_vsock_get(vsk->remote_addr.svm_cid); if (!vsock) - return -ENODEV; + goto out; spin_lock_bh(&vsock->send_pkt_list_lock); list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) { @@ -265,7 +266,10 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk) vhost_poll_queue(&tx_vq->poll); } - return 0; + ret = 0; +out: + rcu_read_unlock(); + return ret; } static struct virtio_vsock_pkt * @@ -531,10 +535,6 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) spin_lock_init(&vsock->send_pkt_list_lock); INIT_LIST_HEAD(&vsock->send_pkt_list); vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); - - spin_lock_bh(&vhost_vsock_lock); - list_add_tail(&vsock->list, &vhost_vsock_list); - spin_unlock_bh(&vhost_vsock_lock); return 0; out: @@ -575,9 +575,13 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) struct vhost_vsock *vsock = file->private_data; spin_lock_bh(&vhost_vsock_lock); - list_del(&vsock->list); + if (vsock->guest_cid) + hash_del_rcu(&vsock->hash); spin_unlock_bh(&vhost_vsock_lock); + /* Wait for other CPUs to finish using vsock */ + synchronize_rcu(); + /* Iterating over all connections for all CIDs to find orphans is * inefficient. Room for improvement here. */ vsock_for_each_connected_socket(vhost_vsock_reset_orphans); @@ -618,12 +622,17 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) /* Refuse if CID is already in use */ spin_lock_bh(&vhost_vsock_lock); - other = __vhost_vsock_get(guest_cid); + other = vhost_vsock_get(guest_cid); if (other && other != vsock) { spin_unlock_bh(&vhost_vsock_lock); return -EADDRINUSE; } + + if (vsock->guest_cid) + hash_del_rcu(&vsock->hash); + vsock->guest_cid = guest_cid; + hash_add_rcu(vhost_vsock_hash, &vsock->hash, guest_cid); spin_unlock_bh(&vhost_vsock_lock); return 0; From e3dccb527cfc97c4027824b6104635714ae14f47 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 30 Nov 2018 15:39:57 -0500 Subject: [PATCH 0949/2608] SUNRPC: Fix leak of krb5p encode pages commit 8dae5398ab1ac107b1517e8195ed043d5f422bd0 upstream. call_encode can be invoked more than once per RPC call. Ensure that each call to gss_wrap_req_priv does not overwrite pointers to previously allocated memory. Signed-off-by: Chuck Lever Cc: stable@kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/auth_gss/auth_gss.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 9463af4b32e8..1281b967dbf9 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1736,6 +1736,7 @@ priv_release_snd_buf(struct rpc_rqst *rqstp) for (i=0; i < rqstp->rq_enc_pages_num; i++) __free_page(rqstp->rq_enc_pages[i]); kfree(rqstp->rq_enc_pages); + rqstp->rq_release_snd_buf = NULL; } static int @@ -1744,6 +1745,9 @@ alloc_enc_pages(struct rpc_rqst *rqstp) struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; int first, last, i; + if (rqstp->rq_release_snd_buf) + rqstp->rq_release_snd_buf(rqstp); + if (snd_buf->page_len == 0) { rqstp->rq_enc_pages_num = 0; return 0; From bc3d506a116f1a00dba4a380ff511007c71e4db9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 5 Dec 2018 18:33:59 +0200 Subject: [PATCH 0950/2608] dmaengine: dw: Fix FIFO size for Intel Merrifield commit ffe843b18211301ad25893eba09f402c19d12304 upstream. Intel Merrifield has a reduced size of FIFO used in iDMA 32-bit controller, i.e. 512 bytes instead of 1024. Fix this by partitioning it as 64 bytes per channel. Note, in the future we might switch to 'fifo-size' property instead of hard coded value. Fixes: 199244d69458 ("dmaengine: dw: add support of iDMA 32-bit hardware") Signed-off-by: Andy Shevchenko Cc: stable@vger.kernel.org Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dw/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index f43e6dafe446..0f389e008ce6 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -1064,12 +1064,12 @@ static void dwc_issue_pending(struct dma_chan *chan) /* * Program FIFO size of channels. * - * By default full FIFO (1024 bytes) is assigned to channel 0. Here we + * By default full FIFO (512 bytes) is assigned to channel 0. Here we * slice FIFO on equal parts between channels. */ static void idma32_fifo_partition(struct dw_dma *dw) { - u64 value = IDMA32C_FP_PSIZE_CH0(128) | IDMA32C_FP_PSIZE_CH1(128) | + u64 value = IDMA32C_FP_PSIZE_CH0(64) | IDMA32C_FP_PSIZE_CH1(64) | IDMA32C_FP_UPDATE; u64 fifo_partition = 0; @@ -1082,7 +1082,7 @@ static void idma32_fifo_partition(struct dw_dma *dw) /* Fill FIFO_PARTITION high bits (Channels 2..3, 6..7) */ fifo_partition |= value << 32; - /* Program FIFO Partition registers - 128 bytes for each channel */ + /* Program FIFO Partition registers - 64 bytes per channel */ idma32_writeq(dw, FIFO_PARTITION1, fifo_partition); idma32_writeq(dw, FIFO_PARTITION0, fifo_partition); } From e47a6e68c3bbfebb6d6f336d92884df37b722e4b Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Mon, 12 Nov 2018 09:43:22 -0600 Subject: [PATCH 0951/2608] dmaengine: cppi41: delete channel from pending list when stop channel commit 59861547ec9a9736e7882f6fb0c096a720ff811a upstream. The driver defines three states for a cppi channel. - idle: .chan_busy == 0 && not in .pending list - pending: .chan_busy == 0 && in .pending list - busy: .chan_busy == 1 && not in .pending list There are cases in which the cppi channel could be in the pending state when cppi41_dma_issue_pending() is called after cppi41_runtime_suspend() is called. cppi41_stop_chan() has a bug for these cases to set channels to idle state. It only checks the .chan_busy flag, but not the .pending list, then later when cppi41_runtime_resume() is called the channels in .pending list will be transitioned to busy state. Removing channels from the .pending list solves the problem. Fixes: 975faaeb9985 ("dma: cppi41: start tear down only if channel is busy") Cc: stable@vger.kernel.org # v3.15+ Signed-off-by: Bin Liu Reviewed-by: Peter Ujfalusi Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/cppi41.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index f7e965f63274..ddd4a3932127 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -723,8 +723,22 @@ static int cppi41_stop_chan(struct dma_chan *chan) desc_phys = lower_32_bits(c->desc_phys); desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); - if (!cdd->chan_busy[desc_num]) + if (!cdd->chan_busy[desc_num]) { + struct cppi41_channel *cc, *_ct; + + /* + * channels might still be in the pendling list if + * cppi41_dma_issue_pending() is called after + * cppi41_runtime_suspend() is called + */ + list_for_each_entry_safe(cc, _ct, &cdd->pending, node) { + if (cc != c) + continue; + list_del(&cc->node); + break; + } return 0; + } ret = cppi41_tear_down_chan(c); if (ret) From f9e0bc710347fadad55910846ee36f0681d0dca8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 30 Oct 2018 22:12:56 +0100 Subject: [PATCH 0952/2608] ARM: 8806/1: kprobes: Fix false positive with FORTIFY_SOURCE commit e46daee53bb50bde38805f1823a182979724c229 upstream. The arm compiler internally interprets an inline assembly label as an unsigned long value, not a pointer. As a result, under CONFIG_FORTIFY_SOURCE, the address of a label has a size of 4 bytes, which was tripping the runtime checks. Instead, we can just cast the label (as done with the size calculations earlier). Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1639397 Reported-by: William Cohen Fixes: 6974f0c4555e ("include/linux/string.h: add the option of fortified string.h functions") Cc: stable@vger.kernel.org Acked-by: Laura Abbott Acked-by: Masami Hiramatsu Tested-by: William Cohen Signed-off-by: Kees Cook Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/probes/kprobes/opt-arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index b2aa9b32bff2..2c118a6ab358 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -247,7 +247,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or } /* Copy arch-dep-instance from template. */ - memcpy(code, &optprobe_template_entry, + memcpy(code, (unsigned char *)optprobe_template_entry, TMPL_END_IDX * sizeof(kprobe_opcode_t)); /* Adjust buffer according to instruction. */ From 3ff0131284e94ddac80520fa549d55a4294776cb Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 5 Dec 2018 14:22:38 +0200 Subject: [PATCH 0953/2608] xhci: workaround CSS timeout on AMD SNPS 3.0 xHC commit a7d57abcc8a5bdeb53bbf8e87558e8e0a2c2a29d upstream. Occasionally AMD SNPS 3.0 xHC does not respond to CSS when set, also it does not flag anything on SRE and HCE to point the internal xHC errors on USBSTS register. This stalls the entire system wide suspend and there is no point in stalling just because of xHC CSS is not responding. To work around this problem, if the xHC does not flag anything on SRE and HCE, we can skip the CSS timeout and allow the system to continue the suspend. Once the system resume happens we can internally reset the controller using XHCI_RESET_ON_RESUME quirk Signed-off-by: Shyam Sundar S K Signed-off-by: Sandeep Singh cc: Nehal Shah Cc: Tested-by: Kai-Heng Feng Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 ++++ drivers/usb/host/xhci.c | 26 ++++++++++++++++++++++---- drivers/usb/host/xhci.h | 3 +++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 4b07b6859b4c..0fbc549cc55c 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -144,6 +144,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == 0x43bb)) xhci->quirks |= XHCI_SUSPEND_DELAY; + if (pdev->vendor == PCI_VENDOR_ID_AMD && + (pdev->device == 0x15e0 || pdev->device == 0x15e1)) + xhci->quirks |= XHCI_SNPS_BROKEN_SUSPEND; + if (pdev->vendor == PCI_VENDOR_ID_AMD) xhci->quirks |= XHCI_TRUST_TX_LENGTH; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index faf048682194..155b36265f7a 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -918,6 +918,7 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) unsigned int delay = XHCI_MAX_HALT_USEC; struct usb_hcd *hcd = xhci_to_hcd(xhci); u32 command; + u32 res; if (!hcd->state) return 0; @@ -969,11 +970,28 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) command = readl(&xhci->op_regs->command); command |= CMD_CSS; writel(command, &xhci->op_regs->command); + xhci->broken_suspend = 0; if (xhci_handshake(&xhci->op_regs->status, STS_SAVE, 0, 10 * 1000)) { - xhci_warn(xhci, "WARN: xHC save state timeout\n"); - spin_unlock_irq(&xhci->lock); - return -ETIMEDOUT; + /* + * AMD SNPS xHC 3.0 occasionally does not clear the + * SSS bit of USBSTS and when driver tries to poll + * to see if the xHC clears BIT(8) which never happens + * and driver assumes that controller is not responding + * and times out. To workaround this, its good to check + * if SRE and HCE bits are not set (as per xhci + * Section 5.4.2) and bypass the timeout. + */ + res = readl(&xhci->op_regs->status); + if ((xhci->quirks & XHCI_SNPS_BROKEN_SUSPEND) && + (((res & STS_SRE) == 0) && + ((res & STS_HCE) == 0))) { + xhci->broken_suspend = 1; + } else { + xhci_warn(xhci, "WARN: xHC save state timeout\n"); + spin_unlock_irq(&xhci->lock); + return -ETIMEDOUT; + } } spin_unlock_irq(&xhci->lock); @@ -1026,7 +1044,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) set_bit(HCD_FLAG_HW_ACCESSIBLE, &xhci->shared_hcd->flags); spin_lock_irq(&xhci->lock); - if (xhci->quirks & XHCI_RESET_ON_RESUME) + if ((xhci->quirks & XHCI_RESET_ON_RESUME) || xhci->broken_suspend) hibernated = true; if (!hibernated) { diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 74ba20556020..1ccff2d9dee9 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1839,6 +1839,7 @@ struct xhci_hcd { #define XHCI_SUSPEND_DELAY BIT_ULL(30) #define XHCI_INTEL_USB_ROLE_SW BIT_ULL(31) #define XHCI_RESET_PLL_ON_DISCONNECT BIT_ULL(34) +#define XHCI_SNPS_BROKEN_SUSPEND BIT_ULL(35) unsigned int num_active_eps; unsigned int limit_active_eps; @@ -1870,6 +1871,8 @@ struct xhci_hcd { /* platform-specific data -- must come last */ unsigned long priv[0] __aligned(sizeof(s64)); + /* Broken Suspend flag for SNPS Suspend resume issue */ + u8 broken_suspend; }; /* Platform specific overrides to generic XHCI hc_driver ops */ From cacfa255bee4fd7137939411b05a0527475a19ec Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 5 Dec 2018 14:22:39 +0200 Subject: [PATCH 0954/2608] xhci: Prevent U1/U2 link pm states if exit latency is too long commit 0472bf06c6fd33c1a18aaead4c8f91e5a03d8d7b upstream. Don't allow USB3 U1 or U2 if the latency to wake up from the U-state reaches the service interval for a periodic endpoint. This is according to xhci 1.1 specification section 4.23.5.2 extra note: "Software shall ensure that a device is prevented from entering a U-state where its worst case exit latency approaches the ESIT." Allowing too long exit latencies for periodic endpoint confuses xHC internal scheduling, and new devices may fail to enumerate with a "Not enough bandwidth for new device state" error from the host. Cc: Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 155b36265f7a..930eecd86429 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4381,6 +4381,14 @@ static u16 xhci_calculate_u1_timeout(struct xhci_hcd *xhci, { unsigned long long timeout_ns; + /* Prevent U1 if service interval is shorter than U1 exit latency */ + if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { + if (xhci_service_interval_to_ns(desc) <= udev->u1_params.mel) { + dev_dbg(&udev->dev, "Disable U1, ESIT shorter than exit latency\n"); + return USB3_LPM_DISABLED; + } + } + if (xhci->quirks & XHCI_INTEL_HOST) timeout_ns = xhci_calculate_intel_u1_timeout(udev, desc); else @@ -4437,6 +4445,14 @@ static u16 xhci_calculate_u2_timeout(struct xhci_hcd *xhci, { unsigned long long timeout_ns; + /* Prevent U2 if service interval is shorter than U2 exit latency */ + if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { + if (xhci_service_interval_to_ns(desc) <= udev->u2_params.mel) { + dev_dbg(&udev->dev, "Disable U2, ESIT shorter than exit latency\n"); + return USB3_LPM_DISABLED; + } + } + if (xhci->quirks & XHCI_INTEL_HOST) timeout_ns = xhci_calculate_intel_u2_timeout(udev, desc); else From 38fce19d4d7bc8acfa183ee2918758d279a69c9a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 10 Dec 2018 17:00:50 +0000 Subject: [PATCH 0955/2608] f2fs: fix to do sanity check with block address in main area v2 commit 91291e9998d208370eb8156c760691b873bd7522 upstream. This patch adds f2fs_is_valid_blkaddr() in below functions to do sanity check with block address to avoid pentential panic: - f2fs_grab_read_bio() - __written_first_block() https://bugzilla.kernel.org/show_bug.cgi?id=200465 - Reproduce - POC (poc.c) #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void activity(char *mpoint) { char *xattr; int err; err = asprintf(&xattr, "%s/foo/bar/xattr", mpoint); char buf2[113]; memset(buf2, 0, sizeof(buf2)); listxattr(xattr, buf2, sizeof(buf2)); } int main(int argc, char *argv[]) { activity(argv[1]); return 0; } - kernel message [ 844.718738] F2FS-fs (loop0): Mounted with checkpoint version = 2 [ 846.430929] F2FS-fs (loop0): access invalid blkaddr:1024 [ 846.431058] WARNING: CPU: 1 PID: 1249 at fs/f2fs/checkpoint.c:154 f2fs_is_valid_blkaddr+0x10f/0x160 [ 846.431059] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper [ 846.431310] CPU: 1 PID: 1249 Comm: a.out Not tainted 4.18.0-rc3+ #1 [ 846.431312] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 846.431315] RIP: 0010:f2fs_is_valid_blkaddr+0x10f/0x160 [ 846.431316] Code: 00 eb ed 31 c0 83 fa 05 75 ae 48 83 ec 08 48 8b 3f 89 f1 48 c7 c2 fc 0b 0f 8b 48 c7 c6 8b d7 09 8b 88 44 24 07 e8 61 8b ff ff <0f> 0b 0f b6 44 24 07 48 83 c4 08 eb 81 4c 8b 47 10 8b 8f 38 04 00 [ 846.431347] RSP: 0018:ffff961c414a7bc0 EFLAGS: 00010282 [ 846.431349] RAX: 0000000000000000 RBX: ffffc5f787b8ea80 RCX: 0000000000000000 [ 846.431350] RDX: 0000000000000000 RSI: ffff89dfffd165d8 RDI: ffff89dfffd165d8 [ 846.431351] RBP: ffff961c414a7c20 R08: 0000000000000001 R09: 0000000000000248 [ 846.431353] R10: 0000000000000000 R11: 0000000000000248 R12: 0000000000000007 [ 846.431369] R13: ffff89dff5492800 R14: ffff89dfae3aa000 R15: ffff89dff4ff88d0 [ 846.431372] FS: 00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000 [ 846.431373] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.431374] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0 [ 846.431384] Call Trace: [ 846.431426] f2fs_iget+0x6f4/0xe70 [ 846.431430] ? f2fs_find_entry+0x71/0x90 [ 846.431432] f2fs_lookup+0x1aa/0x390 [ 846.431452] __lookup_slow+0x97/0x150 [ 846.431459] lookup_slow+0x35/0x50 [ 846.431462] walk_component+0x1c6/0x470 [ 846.431479] ? memcg_kmem_charge_memcg+0x70/0x90 [ 846.431488] ? page_add_file_rmap+0x13/0x200 [ 846.431491] path_lookupat+0x76/0x230 [ 846.431501] ? __alloc_pages_nodemask+0xfc/0x280 [ 846.431504] filename_lookup+0xb8/0x1a0 [ 846.431534] ? _cond_resched+0x16/0x40 [ 846.431541] ? kmem_cache_alloc+0x160/0x1d0 [ 846.431549] ? path_listxattr+0x41/0xa0 [ 846.431551] path_listxattr+0x41/0xa0 [ 846.431570] do_syscall_64+0x55/0x100 [ 846.431583] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 846.431607] RIP: 0033:0x7f882de1c0d7 [ 846.431607] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 [ 846.431639] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2 [ 846.431641] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7 [ 846.431642] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0 [ 846.431643] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000 [ 846.431645] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550 [ 846.431646] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000 [ 846.431648] ---[ end trace abca54df39d14f5c ]--- [ 846.431651] F2FS-fs (loop0): invalid blkaddr: 1024, type: 5, run fsck to fix. [ 846.431762] WARNING: CPU: 1 PID: 1249 at fs/f2fs/f2fs.h:2697 f2fs_iget+0xd17/0xe70 [ 846.431763] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper [ 846.431797] CPU: 1 PID: 1249 Comm: a.out Tainted: G W 4.18.0-rc3+ #1 [ 846.431798] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 846.431800] RIP: 0010:f2fs_iget+0xd17/0xe70 [ 846.431801] Code: ff ff 48 63 d8 e9 e1 f6 ff ff 48 8b 45 c8 41 b8 05 00 00 00 48 c7 c2 d8 e8 0e 8b 48 c7 c6 1d b0 0a 8b 48 8b 38 e8 f9 b4 00 00 <0f> 0b 48 8b 45 c8 f0 80 48 48 04 e9 d8 f9 ff ff 0f 0b 48 8b 43 18 [ 846.431832] RSP: 0018:ffff961c414a7bd0 EFLAGS: 00010282 [ 846.431834] RAX: 0000000000000000 RBX: ffffc5f787b8ea80 RCX: 0000000000000006 [ 846.431835] RDX: 0000000000000000 RSI: 0000000000000096 RDI: ffff89dfffd165d0 [ 846.431836] RBP: ffff961c414a7c20 R08: 0000000000000000 R09: 0000000000000273 [ 846.431837] R10: 0000000000000000 R11: ffff89dfad50ca60 R12: 0000000000000007 [ 846.431838] R13: ffff89dff5492800 R14: ffff89dfae3aa000 R15: ffff89dff4ff88d0 [ 846.431840] FS: 00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000 [ 846.431841] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.431842] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0 [ 846.431846] Call Trace: [ 846.431850] ? f2fs_find_entry+0x71/0x90 [ 846.431853] f2fs_lookup+0x1aa/0x390 [ 846.431856] __lookup_slow+0x97/0x150 [ 846.431858] lookup_slow+0x35/0x50 [ 846.431874] walk_component+0x1c6/0x470 [ 846.431878] ? memcg_kmem_charge_memcg+0x70/0x90 [ 846.431880] ? page_add_file_rmap+0x13/0x200 [ 846.431882] path_lookupat+0x76/0x230 [ 846.431884] ? __alloc_pages_nodemask+0xfc/0x280 [ 846.431886] filename_lookup+0xb8/0x1a0 [ 846.431890] ? _cond_resched+0x16/0x40 [ 846.431891] ? kmem_cache_alloc+0x160/0x1d0 [ 846.431894] ? path_listxattr+0x41/0xa0 [ 846.431896] path_listxattr+0x41/0xa0 [ 846.431898] do_syscall_64+0x55/0x100 [ 846.431901] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 846.431902] RIP: 0033:0x7f882de1c0d7 [ 846.431903] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 [ 846.431934] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2 [ 846.431936] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7 [ 846.431937] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0 [ 846.431939] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000 [ 846.431940] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550 [ 846.431941] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000 [ 846.431943] ---[ end trace abca54df39d14f5d ]--- [ 846.432033] F2FS-fs (loop0): access invalid blkaddr:1024 [ 846.432051] WARNING: CPU: 1 PID: 1249 at fs/f2fs/checkpoint.c:154 f2fs_is_valid_blkaddr+0x10f/0x160 [ 846.432051] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper [ 846.432085] CPU: 1 PID: 1249 Comm: a.out Tainted: G W 4.18.0-rc3+ #1 [ 846.432086] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 846.432089] RIP: 0010:f2fs_is_valid_blkaddr+0x10f/0x160 [ 846.432089] Code: 00 eb ed 31 c0 83 fa 05 75 ae 48 83 ec 08 48 8b 3f 89 f1 48 c7 c2 fc 0b 0f 8b 48 c7 c6 8b d7 09 8b 88 44 24 07 e8 61 8b ff ff <0f> 0b 0f b6 44 24 07 48 83 c4 08 eb 81 4c 8b 47 10 8b 8f 38 04 00 [ 846.432120] RSP: 0018:ffff961c414a7900 EFLAGS: 00010286 [ 846.432122] RAX: 0000000000000000 RBX: 0000000000000400 RCX: 0000000000000006 [ 846.432123] RDX: 0000000000000000 RSI: 0000000000000096 RDI: ffff89dfffd165d0 [ 846.432124] RBP: ffff89dff5492800 R08: 0000000000000001 R09: 000000000000029d [ 846.432125] R10: ffff961c414a7820 R11: 000000000000029d R12: 0000000000000400 [ 846.432126] R13: 0000000000000000 R14: ffff89dff4ff88d0 R15: 0000000000000000 [ 846.432128] FS: 00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000 [ 846.432130] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.432131] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0 [ 846.432135] Call Trace: [ 846.432151] f2fs_wait_on_block_writeback+0x20/0x110 [ 846.432158] f2fs_grab_read_bio+0xbc/0xe0 [ 846.432161] f2fs_submit_page_read+0x21/0x280 [ 846.432163] f2fs_get_read_data_page+0xb7/0x3c0 [ 846.432165] f2fs_get_lock_data_page+0x29/0x1e0 [ 846.432167] f2fs_get_new_data_page+0x148/0x550 [ 846.432170] f2fs_add_regular_entry+0x1d2/0x550 [ 846.432178] ? __switch_to+0x12f/0x460 [ 846.432181] f2fs_add_dentry+0x6a/0xd0 [ 846.432184] f2fs_do_add_link+0xe9/0x140 [ 846.432186] __recover_dot_dentries+0x260/0x280 [ 846.432189] f2fs_lookup+0x343/0x390 [ 846.432193] __lookup_slow+0x97/0x150 [ 846.432195] lookup_slow+0x35/0x50 [ 846.432208] walk_component+0x1c6/0x470 [ 846.432212] ? memcg_kmem_charge_memcg+0x70/0x90 [ 846.432215] ? page_add_file_rmap+0x13/0x200 [ 846.432217] path_lookupat+0x76/0x230 [ 846.432219] ? __alloc_pages_nodemask+0xfc/0x280 [ 846.432221] filename_lookup+0xb8/0x1a0 [ 846.432224] ? _cond_resched+0x16/0x40 [ 846.432226] ? kmem_cache_alloc+0x160/0x1d0 [ 846.432228] ? path_listxattr+0x41/0xa0 [ 846.432230] path_listxattr+0x41/0xa0 [ 846.432233] do_syscall_64+0x55/0x100 [ 846.432235] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 846.432237] RIP: 0033:0x7f882de1c0d7 [ 846.432237] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 [ 846.432269] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2 [ 846.432271] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7 [ 846.432272] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0 [ 846.432273] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000 [ 846.432274] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550 [ 846.432275] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000 [ 846.432277] ---[ end trace abca54df39d14f5e ]--- [ 846.432279] F2FS-fs (loop0): invalid blkaddr: 1024, type: 5, run fsck to fix. [ 846.432376] WARNING: CPU: 1 PID: 1249 at fs/f2fs/f2fs.h:2697 f2fs_wait_on_block_writeback+0xb1/0x110 [ 846.432376] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper [ 846.432410] CPU: 1 PID: 1249 Comm: a.out Tainted: G W 4.18.0-rc3+ #1 [ 846.432411] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 846.432413] RIP: 0010:f2fs_wait_on_block_writeback+0xb1/0x110 [ 846.432414] Code: 66 90 f0 ff 4b 34 74 59 5b 5d c3 48 8b 7d 00 41 b8 05 00 00 00 89 d9 48 c7 c2 d8 e8 0e 8b 48 c7 c6 1d b0 0a 8b e8 df bc fd ff <0f> 0b f0 80 4d 48 04 e9 67 ff ff ff 48 8b 03 48 c1 e8 37 83 e0 07 [ 846.432445] RSP: 0018:ffff961c414a7910 EFLAGS: 00010286 [ 846.432447] RAX: 0000000000000000 RBX: 0000000000000400 RCX: 0000000000000006 [ 846.432448] RDX: 0000000000000000 RSI: 0000000000000092 RDI: ffff89dfffd165d0 [ 846.432449] RBP: ffff89dff5492800 R08: 0000000000000000 R09: 00000000000002d1 [ 846.432450] R10: ffff961c414a7820 R11: ffff89dfad50cf80 R12: 0000000000000400 [ 846.432451] R13: 0000000000000000 R14: ffff89dff4ff88d0 R15: 0000000000000000 [ 846.432453] FS: 00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000 [ 846.432454] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.432455] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0 [ 846.432459] Call Trace: [ 846.432463] f2fs_grab_read_bio+0xbc/0xe0 [ 846.432464] f2fs_submit_page_read+0x21/0x280 [ 846.432466] f2fs_get_read_data_page+0xb7/0x3c0 [ 846.432468] f2fs_get_lock_data_page+0x29/0x1e0 [ 846.432470] f2fs_get_new_data_page+0x148/0x550 [ 846.432473] f2fs_add_regular_entry+0x1d2/0x550 [ 846.432475] ? __switch_to+0x12f/0x460 [ 846.432477] f2fs_add_dentry+0x6a/0xd0 [ 846.432480] f2fs_do_add_link+0xe9/0x140 [ 846.432483] __recover_dot_dentries+0x260/0x280 [ 846.432485] f2fs_lookup+0x343/0x390 [ 846.432488] __lookup_slow+0x97/0x150 [ 846.432490] lookup_slow+0x35/0x50 [ 846.432505] walk_component+0x1c6/0x470 [ 846.432509] ? memcg_kmem_charge_memcg+0x70/0x90 [ 846.432511] ? page_add_file_rmap+0x13/0x200 [ 846.432513] path_lookupat+0x76/0x230 [ 846.432515] ? __alloc_pages_nodemask+0xfc/0x280 [ 846.432517] filename_lookup+0xb8/0x1a0 [ 846.432520] ? _cond_resched+0x16/0x40 [ 846.432522] ? kmem_cache_alloc+0x160/0x1d0 [ 846.432525] ? path_listxattr+0x41/0xa0 [ 846.432526] path_listxattr+0x41/0xa0 [ 846.432529] do_syscall_64+0x55/0x100 [ 846.432531] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 846.432533] RIP: 0033:0x7f882de1c0d7 [ 846.432533] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 [ 846.432565] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2 [ 846.432567] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7 [ 846.432568] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0 [ 846.432569] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000 [ 846.432570] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550 [ 846.432571] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000 [ 846.432573] ---[ end trace abca54df39d14f5f ]--- [ 846.434280] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 846.434424] PGD 80000001ebd3a067 P4D 80000001ebd3a067 PUD 1eb1ae067 PMD 0 [ 846.434551] Oops: 0000 [#1] SMP PTI [ 846.434697] CPU: 0 PID: 44 Comm: kworker/u5:0 Tainted: G W 4.18.0-rc3+ #1 [ 846.434805] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 846.435000] Workqueue: fscrypt_read_queue decrypt_work [ 846.435174] RIP: 0010:fscrypt_do_page_crypto+0x6e/0x2d0 [ 846.435351] Code: 00 65 48 8b 04 25 28 00 00 00 48 89 84 24 88 00 00 00 31 c0 e8 43 c2 e0 ff 49 8b 86 48 02 00 00 85 ed c7 44 24 70 00 00 00 00 <48> 8b 58 08 0f 84 14 02 00 00 48 8b 78 10 48 8b 0c 24 48 c7 84 24 [ 846.435696] RSP: 0018:ffff961c40f9bd60 EFLAGS: 00010206 [ 846.435870] RAX: 0000000000000000 RBX: ffffc5f787719b80 RCX: ffffc5f787719b80 [ 846.436051] RDX: ffffffff8b9f4b88 RSI: ffffffff8b0ae622 RDI: ffff961c40f9bdb8 [ 846.436261] RBP: 0000000000001000 R08: ffffc5f787719b80 R09: 0000000000001000 [ 846.436433] R10: 0000000000000018 R11: fefefefefefefeff R12: ffffc5f787719b80 [ 846.436562] R13: ffffc5f787719b80 R14: ffff89dff4ff88d0 R15: 0ffff89dfaddee60 [ 846.436658] FS: 0000000000000000(0000) GS:ffff89dfffc00000(0000) knlGS:0000000000000000 [ 846.436758] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.436898] CR2: 0000000000000008 CR3: 00000001eddd0000 CR4: 00000000000006f0 [ 846.437001] Call Trace: [ 846.437181] ? check_preempt_wakeup+0xf2/0x230 [ 846.437276] ? check_preempt_curr+0x7c/0x90 [ 846.437370] fscrypt_decrypt_page+0x48/0x4d [ 846.437466] __fscrypt_decrypt_bio+0x5b/0x90 [ 846.437542] decrypt_work+0x12/0x20 [ 846.437651] process_one_work+0x15e/0x3d0 [ 846.437740] worker_thread+0x4c/0x440 [ 846.437848] kthread+0xf8/0x130 [ 846.437938] ? rescuer_thread+0x350/0x350 [ 846.438022] ? kthread_associate_blkcg+0x90/0x90 [ 846.438117] ret_from_fork+0x35/0x40 [ 846.438201] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper [ 846.438653] CR2: 0000000000000008 [ 846.438713] ---[ end trace abca54df39d14f60 ]--- [ 846.438796] RIP: 0010:fscrypt_do_page_crypto+0x6e/0x2d0 [ 846.438844] Code: 00 65 48 8b 04 25 28 00 00 00 48 89 84 24 88 00 00 00 31 c0 e8 43 c2 e0 ff 49 8b 86 48 02 00 00 85 ed c7 44 24 70 00 00 00 00 <48> 8b 58 08 0f 84 14 02 00 00 48 8b 78 10 48 8b 0c 24 48 c7 84 24 [ 846.439084] RSP: 0018:ffff961c40f9bd60 EFLAGS: 00010206 [ 846.439176] RAX: 0000000000000000 RBX: ffffc5f787719b80 RCX: ffffc5f787719b80 [ 846.440927] RDX: ffffffff8b9f4b88 RSI: ffffffff8b0ae622 RDI: ffff961c40f9bdb8 [ 846.442083] RBP: 0000000000001000 R08: ffffc5f787719b80 R09: 0000000000001000 [ 846.443284] R10: 0000000000000018 R11: fefefefefefefeff R12: ffffc5f787719b80 [ 846.444448] R13: ffffc5f787719b80 R14: ffff89dff4ff88d0 R15: 0ffff89dfaddee60 [ 846.445558] FS: 0000000000000000(0000) GS:ffff89dfffc00000(0000) knlGS:0000000000000000 [ 846.446687] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.447796] CR2: 0000000000000008 CR3: 00000001eddd0000 CR4: 00000000000006f0 - Location https://elixir.bootlin.com/linux/v4.18-rc4/source/fs/crypto/crypto.c#L149 struct crypto_skcipher *tfm = ci->ci_ctfm; Here ci can be NULL Note that this issue maybe require CONFIG_F2FS_FS_ENCRYPTION=y to reproduce. Reported-by Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim [bwh: Backported to 4.14: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 3 +++ fs/f2fs/inode.c | 18 +++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8f6e7c3a10f8..c68b319b07aa 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -468,6 +468,9 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, struct fscrypt_ctx *ctx = NULL; struct bio *bio; + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) + return ERR_PTR(-EFAULT); + if (f2fs_encrypted_file(inode)) { ctx = fscrypt_get_ctx(inode, GFP_NOFS); if (IS_ERR(ctx)) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 9a40724dbaa6..50818b519df8 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -62,14 +62,16 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } -static bool __written_first_block(struct f2fs_sb_info *sbi, +static int __written_first_block(struct f2fs_sb_info *sbi, struct f2fs_inode *ri) { block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]); - if (is_valid_data_blkaddr(sbi, addr)) - return true; - return false; + if (!__is_valid_data_blkaddr(addr)) + return 1; + if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC)) + return -EFAULT; + return 0; } static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) @@ -253,6 +255,7 @@ static int do_read_inode(struct inode *inode) struct page *node_page; struct f2fs_inode *ri; projid_t i_projid; + int err; /* Check if ino is within scope */ if (check_nid_range(sbi, inode->i_ino)) @@ -307,7 +310,12 @@ static int do_read_inode(struct inode *inode) /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); - if (__written_first_block(sbi, ri)) + err = __written_first_block(sbi, ri); + if (err < 0) { + f2fs_put_page(node_page, 1); + return err; + } + if (!err) set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); if (!need_inode_block_update(sbi, inode->i_ino)) From f2a4f7622d052eb987e8693633468c239c13575a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Dec 2018 18:14:58 +0000 Subject: [PATCH 0956/2608] swiotlb: clean up reporting commit 7d63fb3af87aa67aa7d24466e792f9d7c57d8e79 upstream. This removes needless use of '%p', and refactors the printk calls to use pr_*() helpers instead. Signed-off-by: Kees Cook Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Christoph Hellwig [bwh: Backported to 4.14: - Adjust filename - Remove "swiotlb: " prefix from an additional log message] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- lib/swiotlb.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 20df2fd9b150..b4c768de3344 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -17,6 +17,8 @@ * 08/12/11 beckyb Add highmem support */ +#define pr_fmt(fmt) "software IO TLB: " fmt + #include #include #include @@ -177,20 +179,16 @@ static bool no_iotlb_memory; void swiotlb_print_info(void) { unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; - unsigned char *vstart, *vend; if (no_iotlb_memory) { - pr_warn("software IO TLB: No low mem\n"); + pr_warn("No low mem\n"); return; } - vstart = phys_to_virt(io_tlb_start); - vend = phys_to_virt(io_tlb_end); - - printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n", + pr_info("mapped [mem %#010llx-%#010llx] (%luMB)\n", (unsigned long long)io_tlb_start, (unsigned long long)io_tlb_end, - bytes >> 20, vstart, vend - 1); + bytes >> 20); } /* @@ -290,7 +288,7 @@ swiotlb_init(int verbose) if (io_tlb_start) memblock_free_early(io_tlb_start, PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); - pr_warn("Cannot allocate SWIOTLB buffer"); + pr_warn("Cannot allocate buffer"); no_iotlb_memory = true; } @@ -332,8 +330,8 @@ swiotlb_late_init_with_default_size(size_t default_size) return -ENOMEM; } if (order != get_order(bytes)) { - printk(KERN_WARNING "Warning: only able to allocate %ld MB " - "for software IO TLB\n", (PAGE_SIZE << order) >> 20); + pr_warn("only able to allocate %ld MB\n", + (PAGE_SIZE << order) >> 20); io_tlb_nslabs = SLABS_PER_PAGE << order; } rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); @@ -770,7 +768,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, err_warn: if (warn && printk_ratelimit()) { - pr_warn("swiotlb: coherent allocation failed for device %s size=%zu\n", + pr_warn("coherent allocation failed for device %s size=%zu\n", dev_name(hwdev), size); dump_stack(); } From b4e4b85708bb31df79d6fc77661bb171e07e0868 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 11 Dec 2018 13:50:37 +0100 Subject: [PATCH 0957/2608] Staging: lustre: remove two build warnings [for older kernels only, lustre has been removed from upstream] When someone writes: strncpy(dest, source, sizeof(source)); they really are just doing the same thing as: strcpy(dest, source); but somehow they feel better because they are now using the "safe" version of the string functions. Cargo-cult programming at its finest... gcc-8 rightfully warns you about doing foolish things like this. Now that the stable kernels are all starting to be built using gcc-8, let's get rid of this warning so that we do not have to gaze at this horror. To dropt the warning, just convert the code to using strcpy() so that if someone really wants to audit this code and find all of the obvious problems, it will be easier to do so. Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lnet/lnet/config.c | 3 +-- drivers/staging/lustre/lustre/lmv/lmv_obd.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c index 26841a7b6213..99b400c4190f 100644 --- a/drivers/staging/lustre/lnet/lnet/config.c +++ b/drivers/staging/lustre/lnet/lnet/config.c @@ -354,8 +354,7 @@ lnet_parse_networks(struct list_head *nilist, char *networks) CERROR("Can't allocate net interface name\n"); goto failed; } - strncpy(ni->ni_interfaces[niface], iface, - strlen(iface)); + strcpy(ni->ni_interfaces[niface], iface); niface++; iface = comma; } while (iface); diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c index c2aadb2d1fea..fa46fe9e1bd9 100644 --- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c +++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c @@ -645,7 +645,7 @@ repeat_fid2path: memmove(ptr + strlen(gf->gf_path) + 1, ptr, strlen(ori_gf->gf_path)); - strncpy(ptr, gf->gf_path, strlen(gf->gf_path)); + strcpy(ptr, gf->gf_path); ptr += strlen(gf->gf_path); *ptr = '/'; } From a604686c468dc0ab9492dbeaa1722cc3878b0582 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 11 Dec 2018 13:50:55 +0100 Subject: [PATCH 0958/2608] staging: atomisp: remove "fun" strncpy warning [for older kernels only, atomisp has been removed from upstream] gcc-8 rightfully warns that this instance of strncpy is just copying from the source, to the same source, for a few bytes. Meaning this call does nothing. As the author of the code obviously meant it to do something, but this code must be working properly, just replace the call to the kernel internal strscpy() which gcc doesn't know about, so the warning goes away. As this driver was deleted from newer kernel versions, none of this really matters but now at least we do not have to worry about a build warning in the stable trees. Signed-off-by: Greg Kroah-Hartman --- .../pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c index 0fa7cb2423d8..0320c089f688 100644 --- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c +++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c @@ -2860,9 +2860,7 @@ ia_css_debug_pipe_graph_dump_stage( if (l <= ENABLE_LINE_MAX_LENGTH) { /* It fits on one line, copy string and init */ /* other helper strings with empty string */ - strcpy_s(enable_info, - sizeof(enable_info), - ei); + strscpy(enable_info, ei, sizeof(enable_info)); } else { /* Too big for one line, find last comma */ p = ENABLE_LINE_MAX_LENGTH; From b505b9b09e531cfe9bfcb602ed9407d8bd902ad4 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 15 Nov 2018 15:20:52 +0100 Subject: [PATCH 0959/2608] cifs: Fix separator when building path from dentry commit c988de29ca161823db6a7125e803d597ef75b49c upstream. Make sure to use the CIFS_DIR_SEP(cifs_sb) as path separator for prefixpath too. Fixes a bug with smb1 UNIX extensions. Fixes: a6b5058fafdf ("fs/cifs: make share unaccessible at root level mountable") Signed-off-by: Paulo Alcantara Reviewed-by: Aurelien Aptel Signed-off-by: Steve French CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 925844343038..ca98afda3cdb 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -174,7 +174,7 @@ cifs_bp_rename_retry: cifs_dbg(FYI, "using cifs_sb prepath <%s>\n", cifs_sb->prepath); memcpy(full_path+dfsplen+1, cifs_sb->prepath, pplen-1); - full_path[dfsplen] = '\\'; + full_path[dfsplen] = dirsep; for (i = 0; i < pplen-1; i++) if (full_path[dfsplen+1+i] == '/') full_path[dfsplen+1+i] = CIFS_DIR_SEP(cifs_sb); From 948ef68099ebe26a52749ad031febb15b4c5dac7 Mon Sep 17 00:00:00 2001 From: Young Xiao Date: Wed, 28 Nov 2018 08:06:53 +0000 Subject: [PATCH 0960/2608] staging: rtl8712: Fix possible buffer overrun commit 300cd664865bed5d50ae0a42fb4e3a6f415e8a10 upstream. In commit 8b7a13c3f404 ("staging: r8712u: Fix possible buffer overrun") we fix a potential off by one by making the limit smaller. The better fix is to make the buffer larger. This makes it match up with the similar code in other drivers. Fixes: 8b7a13c3f404 ("staging: r8712u: Fix possible buffer overrun") Signed-off-by: Young Xiao Cc: stable Reviewed-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/mlme_linux.c | 2 +- drivers/staging/rtl8712/rtl871x_mlme.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/rtl8712/mlme_linux.c b/drivers/staging/rtl8712/mlme_linux.c index a077069d6227..7e367452c339 100644 --- a/drivers/staging/rtl8712/mlme_linux.c +++ b/drivers/staging/rtl8712/mlme_linux.c @@ -158,7 +158,7 @@ void r8712_report_sec_ie(struct _adapter *adapter, u8 authmode, u8 *sec_ie) p = buff; p += sprintf(p, "ASSOCINFO(ReqIEs="); len = sec_ie[1] + 2; - len = (len < IW_CUSTOM_MAX) ? len : IW_CUSTOM_MAX - 1; + len = (len < IW_CUSTOM_MAX) ? len : IW_CUSTOM_MAX; for (i = 0; i < len; i++) p += sprintf(p, "%02x", sec_ie[i]); p += sprintf(p, ")"); diff --git a/drivers/staging/rtl8712/rtl871x_mlme.c b/drivers/staging/rtl8712/rtl871x_mlme.c index bf1ac22bae1c..98c7e8b229d1 100644 --- a/drivers/staging/rtl8712/rtl871x_mlme.c +++ b/drivers/staging/rtl8712/rtl871x_mlme.c @@ -1361,7 +1361,7 @@ sint r8712_restruct_sec_ie(struct _adapter *adapter, u8 *in_ie, u8 *out_ie, uint in_len) { u8 authmode = 0, match; - u8 sec_ie[255], uncst_oui[4], bkup_ie[255]; + u8 sec_ie[IW_CUSTOM_MAX], uncst_oui[4], bkup_ie[255]; u8 wpa_oui[4] = {0x0, 0x50, 0xf2, 0x01}; uint ielength, cnt, remove_cnt; int iEntry; From c33e7a5a3714250ca452ef8353c91cf7ad411ac6 Mon Sep 17 00:00:00 2001 From: Young Xiao Date: Tue, 27 Nov 2018 09:12:20 +0000 Subject: [PATCH 0961/2608] Revert commit ef9209b642f "staging: rtl8723bs: Fix indenting errors and an off-by-one mistake in core/rtw_mlme_ext.c" commit 87e4a5405f087427fbf8b437d2796283dce2b38f upstream. pstapriv->max_num_sta is always <= NUM_STA, since max_num_sta is either set in _rtw_init_sta_priv() or rtw_set_beacon(). Fixes: ef9209b642f1 ("staging: rtl8723bs: Fix indenting errors and an off-by-one mistake in core/rtw_mlme_ext.c") Signed-off-by: Young Xiao Reviewed-by: Dan Carpenter Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/core/rtw_mlme_ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c index b6d137f505e1..111752f0bc27 100644 --- a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c +++ b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c @@ -1574,7 +1574,7 @@ unsigned int OnAssocReq(struct adapter *padapter, union recv_frame *precv_frame) if (pstat->aid > 0) { DBG_871X(" old AID %d\n", pstat->aid); } else { - for (pstat->aid = 1; pstat->aid < NUM_STA; pstat->aid++) + for (pstat->aid = 1; pstat->aid <= NUM_STA; pstat->aid++) if (pstapriv->sta_aid[pstat->aid - 1] == NULL) break; From 99e6900dfa125ef5062e7b0595f6ce5eb75a6d2c Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Thu, 22 Nov 2018 17:53:00 +0800 Subject: [PATCH 0962/2608] drm/amdgpu: update mc firmware image for polaris12 variants commit d7fd67653f847327e545bdb198b901ee124afd7c upstream. Some new variants require updated firmware. Signed-off-by: Junwei Zhang Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 3b3326daf32b..b3de9334ed48 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -52,6 +52,7 @@ MODULE_FIRMWARE("amdgpu/tonga_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris11_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris10_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris12_mc.bin"); +MODULE_FIRMWARE("amdgpu/polaris12_k_mc.bin"); static const u32 golden_settings_tonga_a11[] = { @@ -226,6 +227,15 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) break; case CHIP_POLARIS12: chip_name = "polaris12"; + if (((adev->pdev->device == 0x6987) && + ((adev->pdev->revision == 0xc0) || + (adev->pdev->revision == 0xc3))) || + ((adev->pdev->device == 0x6981) && + ((adev->pdev->revision == 0x00) || + (adev->pdev->revision == 0x01) || + (adev->pdev->revision == 0x10)))) { + chip_name = "polaris12_k"; + } break; case CHIP_FIJI: case CHIP_CARRIZO: From eff5e74f7b680d51c9cd9a087f18ce99788b2aba Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 Nov 2018 23:25:41 -0500 Subject: [PATCH 0963/2608] drm/amdgpu/gmc8: update MC firmware for polaris MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a81a7c9c9ea3042ab02d66ac35def74abf091c15 upstream. Some variants require different MC firmware images. Acked-by: Christian König Reviewed-by: Junwei Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 29 ++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index b3de9334ed48..0f5dc97ae920 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -52,6 +52,8 @@ MODULE_FIRMWARE("amdgpu/tonga_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris11_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris10_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris12_mc.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_k_mc.bin"); +MODULE_FIRMWARE("amdgpu/polaris10_k_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris12_k_mc.bin"); static const u32 golden_settings_tonga_a11[] = @@ -220,22 +222,39 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) chip_name = "tonga"; break; case CHIP_POLARIS11: - chip_name = "polaris11"; + if (((adev->pdev->device == 0x67ef) && + ((adev->pdev->revision == 0xe0) || + (adev->pdev->revision == 0xe5))) || + ((adev->pdev->device == 0x67ff) && + ((adev->pdev->revision == 0xcf) || + (adev->pdev->revision == 0xef) || + (adev->pdev->revision == 0xff)))) + chip_name = "polaris11_k"; + else if ((adev->pdev->device == 0x67ef) && + (adev->pdev->revision == 0xe2)) + chip_name = "polaris11_k"; + else + chip_name = "polaris11"; break; case CHIP_POLARIS10: - chip_name = "polaris10"; + if ((adev->pdev->device == 0x67df) && + ((adev->pdev->revision == 0xe1) || + (adev->pdev->revision == 0xf7))) + chip_name = "polaris10_k"; + else + chip_name = "polaris10"; break; case CHIP_POLARIS12: - chip_name = "polaris12"; if (((adev->pdev->device == 0x6987) && ((adev->pdev->revision == 0xc0) || (adev->pdev->revision == 0xc3))) || ((adev->pdev->device == 0x6981) && ((adev->pdev->revision == 0x00) || (adev->pdev->revision == 0x01) || - (adev->pdev->revision == 0x10)))) { + (adev->pdev->revision == 0x10)))) chip_name = "polaris12_k"; - } + else + chip_name = "polaris12"; break; case CHIP_FIJI: case CHIP_CARRIZO: From c1063941e7360e1a5c998c58de789d5de267175b Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 3 Dec 2018 00:54:35 +0000 Subject: [PATCH 0964/2608] Drivers: hv: vmbus: Offload the handling of channels to two workqueues commit 37c2578c0c40e286bc0d30bdc05290b2058cf66e upstream. vmbus_process_offer() mustn't call channel->sc_creation_callback() directly for sub-channels, because sc_creation_callback() -> vmbus_open() may never get the host's response to the OPEN_CHANNEL message (the host may rescind a channel at any time, e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind() may not wake up the vmbus_open() as it's blocked due to a non-zero vmbus_connection.offer_in_progress, and finally we have a deadlock. The above is also true for primary channels, if the related device drivers use sync probing mode by default. And, usually the handling of primary channels and sub-channels can depend on each other, so we should offload them to different workqueues to avoid possible deadlock, e.g. in sync-probing mode, NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() -> rtnl_lock(), and causes deadlock: the former gets the rtnl_lock and waits for all the sub-channels to appear, but the latter can't get the rtnl_lock and this blocks the handling of sub-channels. The patch can fix the multiple-NIC deadlock described above for v3.x kernels (e.g. RHEL 7.x) which don't support async-probing of devices, and v4.4, v4.9, v4.14 and v4.18 which support async-probing but don't enable async-probing for Hyper-V drivers (yet). The patch can also fix the hang issue in sub-channel's handling described above for all versions of kernels, including v4.19 and v4.20-rc4. So actually the patch should be applied to all the existing kernels, not only the kernels that have 8195b1396ec8. Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug") Cc: stable@vger.kernel.org Cc: Stephen Hemminger Cc: K. Y. Srinivasan Cc: Haiyang Zhang Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 185 +++++++++++++++++++++++++------------- drivers/hv/connection.c | 24 ++++- drivers/hv/hyperv_vmbus.h | 7 ++ include/linux/hyperv.h | 7 ++ 4 files changed, 160 insertions(+), 63 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 752c52f7353d..43eaf54736f4 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -444,61 +444,16 @@ void vmbus_free_channels(void) } } -/* - * vmbus_process_offer - Process the offer by creating a channel/device - * associated with this offer - */ -static void vmbus_process_offer(struct vmbus_channel *newchannel) +/* Note: the function can run concurrently for primary/sub channels. */ +static void vmbus_add_channel_work(struct work_struct *work) { - struct vmbus_channel *channel; - bool fnew = true; + struct vmbus_channel *newchannel = + container_of(work, struct vmbus_channel, add_channel_work); + struct vmbus_channel *primary_channel = newchannel->primary_channel; unsigned long flags; u16 dev_type; int ret; - /* Make sure this is a new offer */ - mutex_lock(&vmbus_connection.channel_mutex); - - /* - * Now that we have acquired the channel_mutex, - * we can release the potentially racing rescind thread. - */ - atomic_dec(&vmbus_connection.offer_in_progress); - - list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { - if (!uuid_le_cmp(channel->offermsg.offer.if_type, - newchannel->offermsg.offer.if_type) && - !uuid_le_cmp(channel->offermsg.offer.if_instance, - newchannel->offermsg.offer.if_instance)) { - fnew = false; - break; - } - } - - if (fnew) - list_add_tail(&newchannel->listentry, - &vmbus_connection.chn_list); - - mutex_unlock(&vmbus_connection.channel_mutex); - - if (!fnew) { - /* - * Check to see if this is a sub-channel. - */ - if (newchannel->offermsg.offer.sub_channel_index != 0) { - /* - * Process the sub-channel. - */ - newchannel->primary_channel = channel; - spin_lock_irqsave(&channel->lock, flags); - list_add_tail(&newchannel->sc_list, &channel->sc_list); - channel->num_sc++; - spin_unlock_irqrestore(&channel->lock, flags); - } else { - goto err_free_chan; - } - } - dev_type = hv_get_dev_type(newchannel); init_vp_index(newchannel, dev_type); @@ -516,21 +471,22 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) /* * This state is used to indicate a successful open * so that when we do close the channel normally, we - * can cleanup properly + * can cleanup properly. */ newchannel->state = CHANNEL_OPEN_STATE; - if (!fnew) { - if (channel->sc_creation_callback != NULL) - channel->sc_creation_callback(newchannel); + if (primary_channel != NULL) { + /* newchannel is a sub-channel. */ + + if (primary_channel->sc_creation_callback != NULL) + primary_channel->sc_creation_callback(newchannel); + newchannel->probe_done = true; return; } /* - * Start the process of binding this offer to the driver - * We need to set the DeviceObject field before calling - * vmbus_child_dev_add() + * Start the process of binding the primary channel to the driver */ newchannel->device_obj = vmbus_device_create( &newchannel->offermsg.offer.if_type, @@ -559,13 +515,28 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) err_deq_chan: mutex_lock(&vmbus_connection.channel_mutex); - list_del(&newchannel->listentry); + + /* + * We need to set the flag, otherwise + * vmbus_onoffer_rescind() can be blocked. + */ + newchannel->probe_done = true; + + if (primary_channel == NULL) { + list_del(&newchannel->listentry); + } else { + spin_lock_irqsave(&primary_channel->lock, flags); + list_del(&newchannel->sc_list); + spin_unlock_irqrestore(&primary_channel->lock, flags); + } + mutex_unlock(&vmbus_connection.channel_mutex); if (newchannel->target_cpu != get_cpu()) { put_cpu(); smp_call_function_single(newchannel->target_cpu, - percpu_channel_deq, newchannel, true); + percpu_channel_deq, + newchannel, true); } else { percpu_channel_deq(newchannel); put_cpu(); @@ -573,14 +544,104 @@ err_deq_chan: vmbus_release_relid(newchannel->offermsg.child_relid); -err_free_chan: free_channel(newchannel); } +/* + * vmbus_process_offer - Process the offer by creating a channel/device + * associated with this offer + */ +static void vmbus_process_offer(struct vmbus_channel *newchannel) +{ + struct vmbus_channel *channel; + struct workqueue_struct *wq; + unsigned long flags; + bool fnew = true; + + mutex_lock(&vmbus_connection.channel_mutex); + + /* + * Now that we have acquired the channel_mutex, + * we can release the potentially racing rescind thread. + */ + atomic_dec(&vmbus_connection.offer_in_progress); + + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { + if (!uuid_le_cmp(channel->offermsg.offer.if_type, + newchannel->offermsg.offer.if_type) && + !uuid_le_cmp(channel->offermsg.offer.if_instance, + newchannel->offermsg.offer.if_instance)) { + fnew = false; + break; + } + } + + if (fnew) + list_add_tail(&newchannel->listentry, + &vmbus_connection.chn_list); + else { + /* + * Check to see if this is a valid sub-channel. + */ + if (newchannel->offermsg.offer.sub_channel_index == 0) { + mutex_unlock(&vmbus_connection.channel_mutex); + /* + * Don't call free_channel(), because newchannel->kobj + * is not initialized yet. + */ + kfree(newchannel); + WARN_ON_ONCE(1); + return; + } + /* + * Process the sub-channel. + */ + newchannel->primary_channel = channel; + spin_lock_irqsave(&channel->lock, flags); + list_add_tail(&newchannel->sc_list, &channel->sc_list); + spin_unlock_irqrestore(&channel->lock, flags); + } + + mutex_unlock(&vmbus_connection.channel_mutex); + + /* + * vmbus_process_offer() mustn't call channel->sc_creation_callback() + * directly for sub-channels, because sc_creation_callback() -> + * vmbus_open() may never get the host's response to the + * OPEN_CHANNEL message (the host may rescind a channel at any time, + * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind() + * may not wake up the vmbus_open() as it's blocked due to a non-zero + * vmbus_connection.offer_in_progress, and finally we have a deadlock. + * + * The above is also true for primary channels, if the related device + * drivers use sync probing mode by default. + * + * And, usually the handling of primary channels and sub-channels can + * depend on each other, so we should offload them to different + * workqueues to avoid possible deadlock, e.g. in sync-probing mode, + * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() -> + * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock + * and waits for all the sub-channels to appear, but the latter + * can't get the rtnl_lock and this blocks the handling of + * sub-channels. + */ + INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work); + wq = fnew ? vmbus_connection.handle_primary_chan_wq : + vmbus_connection.handle_sub_chan_wq; + queue_work(wq, &newchannel->add_channel_work); +} + /* * We use this state to statically distribute the channel interrupt load. */ static int next_numa_node_id; +/* + * init_vp_index() accesses global variables like next_numa_node_id, and + * it can run concurrently for primary channels and sub-channels: see + * vmbus_process_offer(), so we need the lock to protect the global + * variables. + */ +static DEFINE_SPINLOCK(bind_channel_to_cpu_lock); /* * Starting with Win8, we can statically distribute the incoming @@ -618,6 +679,8 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) return; } + spin_lock(&bind_channel_to_cpu_lock); + /* * Based on the channel affinity policy, we will assign the NUMA * nodes. @@ -700,6 +763,8 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) channel->target_cpu = cur_cpu; channel->target_vp = hv_cpu_number_to_vp_number(cur_cpu); + spin_unlock(&bind_channel_to_cpu_lock); + free_cpumask_var(available_mask); } diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 5449fc59b7f5..4b1b70751be3 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -161,6 +161,20 @@ int vmbus_connect(void) goto cleanup; } + vmbus_connection.handle_primary_chan_wq = + create_workqueue("hv_pri_chan"); + if (!vmbus_connection.handle_primary_chan_wq) { + ret = -ENOMEM; + goto cleanup; + } + + vmbus_connection.handle_sub_chan_wq = + create_workqueue("hv_sub_chan"); + if (!vmbus_connection.handle_sub_chan_wq) { + ret = -ENOMEM; + goto cleanup; + } + INIT_LIST_HEAD(&vmbus_connection.chn_msg_list); spin_lock_init(&vmbus_connection.channelmsg_lock); @@ -251,10 +265,14 @@ void vmbus_disconnect(void) */ vmbus_initiate_unload(false); - if (vmbus_connection.work_queue) { - drain_workqueue(vmbus_connection.work_queue); + if (vmbus_connection.handle_sub_chan_wq) + destroy_workqueue(vmbus_connection.handle_sub_chan_wq); + + if (vmbus_connection.handle_primary_chan_wq) + destroy_workqueue(vmbus_connection.handle_primary_chan_wq); + + if (vmbus_connection.work_queue) destroy_workqueue(vmbus_connection.work_queue); - } if (vmbus_connection.int_page) { free_pages((unsigned long)vmbus_connection.int_page, 0); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 49569f8fe038..a166de6efd99 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -327,7 +327,14 @@ struct vmbus_connection { struct list_head chn_list; struct mutex channel_mutex; + /* + * An offer message is handled first on the work_queue, and then + * is further handled on handle_primary_chan_wq or + * handle_sub_chan_wq. + */ struct workqueue_struct *work_queue; + struct workqueue_struct *handle_primary_chan_wq; + struct workqueue_struct *handle_sub_chan_wq; }; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 0c51f753652d..d1324d3c72b0 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -869,6 +869,13 @@ struct vmbus_channel { bool probe_done; + /* + * We must offload the handling of the primary/sub channels + * from the single-threaded vmbus_connection.work_queue to + * two different workqueue, otherwise we can block + * vmbus_connection.work_queue and hang: see vmbus_process_offer(). + */ + struct work_struct add_channel_work; }; static inline bool is_hvsock_channel(const struct vmbus_channel *c) From 4f50d3a08df40a3ad261b8527345df92c9f1a731 Mon Sep 17 00:00:00 2001 From: Peter Shih Date: Tue, 27 Nov 2018 12:49:50 +0800 Subject: [PATCH 0965/2608] tty: serial: 8250_mtk: always resume the device in probe. commit 100bc3e2bebf95506da57cbdf5f26b25f6da4c81 upstream. serial8250_register_8250_port calls uart_config_port, which calls config_port on the port before it tries to power on the port. So we need the port to be on before calling serial8250_register_8250_port. Change the code to always do a runtime resume in probe before registering port, and always do a runtime suspend in remove. This basically reverts the change in commit 68e5fc4a255a ("tty: serial: 8250_mtk: use pm_runtime callbacks for enabling"), but still use pm_runtime callbacks. Fixes: 68e5fc4a255a ("tty: serial: 8250_mtk: use pm_runtime callbacks for enabling") Signed-off-by: Peter Shih Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index fb45770d47aa..fa909fa3c4cd 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -222,17 +222,17 @@ static int mtk8250_probe(struct platform_device *pdev) platform_set_drvdata(pdev, data); - pm_runtime_enable(&pdev->dev); - if (!pm_runtime_enabled(&pdev->dev)) { - err = mtk8250_runtime_resume(&pdev->dev); - if (err) - return err; - } + err = mtk8250_runtime_resume(&pdev->dev); + if (err) + return err; data->line = serial8250_register_8250_port(&uart); if (data->line < 0) return data->line; + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + return 0; } @@ -243,13 +243,11 @@ static int mtk8250_remove(struct platform_device *pdev) pm_runtime_get_sync(&pdev->dev); serial8250_unregister_port(data->line); + mtk8250_runtime_suspend(&pdev->dev); pm_runtime_disable(&pdev->dev); pm_runtime_put_noidle(&pdev->dev); - if (!pm_runtime_status_suspended(&pdev->dev)) - mtk8250_runtime_suspend(&pdev->dev); - return 0; } From e4c3cc6b08087550e7f000bd7510ccb9a887c543 Mon Sep 17 00:00:00 2001 From: Chanho Park Date: Thu, 22 Nov 2018 18:23:47 +0900 Subject: [PATCH 0966/2608] tty: do not set TTY_IO_ERROR flag if console port commit 2a48602615e0a2f563549c7d5c8d507f904cf96e upstream. Since Commit 761ed4a94582 ('tty: serial_core: convert uart_close to use tty_port_close') and Commit 4dda864d7307 ('tty: serial_core: Fix serial console crash on port shutdown), a serial port which is used as console can be stuck when logging out if there is a remained process. After logged out, agetty will try to grab the serial port but it will be failed because the previous process did not release the port correctly. To fix this, TTY_IO_ERROR bit should not be enabled of tty_port_close if the port is console port. Reproduce step: - Run background processes from serial console $ while true; do sleep 10; done & - Log out $ logout -> Stuck - Read journal log by journalctl | tail Jan 28 16:07:01 ubuntu systemd[1]: Stopped Serial Getty on ttyAMA0. Jan 28 16:07:01 ubuntu systemd[1]: Started Serial Getty on ttyAMA0. Jan 28 16:07:02 ubuntu agetty[1643]: /dev/ttyAMA0: not a tty Fixes: 761ed4a94582 ("tty: serial_core: convert uart_close to use tty_port_close") Cc: Geert Uytterhoeven Cc: Rob Herring Cc: Jiri Slaby Signed-off-by: Chanho Park Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_port.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index 6b137194069f..c93a33701d32 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -639,7 +639,8 @@ void tty_port_close(struct tty_port *port, struct tty_struct *tty, if (tty_port_close_start(port, tty, filp) == 0) return; tty_port_shutdown(port, tty); - set_bit(TTY_IO_ERROR, &tty->flags); + if (!port->console) + set_bit(TTY_IO_ERROR, &tty->flags); tty_port_close_end(port, tty); tty_port_tty_set(port, NULL); } From b919b4842b42a148ad12754019c58cfc939ae955 Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Wed, 17 Oct 2018 23:08:38 +0800 Subject: [PATCH 0967/2608] kgdboc: fix KASAN global-out-of-bounds bug in param_set_kgdboc_var() commit dada6a43b0402eba438a17ac86fdc64ac56a4607 upstream. This patch is trying to fix KE issue due to "BUG: KASAN: global-out-of-bounds in param_set_kgdboc_var+0x194/0x198" reported by Syzkaller scan." [26364:syz-executor0][name:report8t]BUG: KASAN: global-out-of-bounds in param_set_kgdboc_var+0x194/0x198 [26364:syz-executor0][name:report&]Read of size 1 at addr ffffff900e44f95f by task syz-executor0/26364 [26364:syz-executor0][name:report&] [26364:syz-executor0]CPU: 7 PID: 26364 Comm: syz-executor0 Tainted: G W 0 [26364:syz-executor0]Call trace: [26364:syz-executor0][] dump_bacIctrace+Ox0/0x470 [26364:syz-executor0][] show_stack+0x20/0x30 [26364:syz-executor0][] dump_stack+Oxd8/0x128 [26364:syz-executor0][] print_address_description +0x80/0x4a8 [26364:syz-executor0][] kasan_report+Ox178/0x390 [26364:syz-executor0][] _asan_report_loadi_noabort+Ox18/0x20 [26364:syz-executor0][] param_set_kgdboc_var+Ox194/0x198 [26364:syz-executor0][] param_attr_store+Ox14c/0x270 [26364:syz-executor0][] module_attr_store+0x60/0x90 [26364:syz-executor0][] sysfs_kl_write+Ox100/0x158 [26364:syz-executor0][] kernfs_fop_write+0x27c/0x3a8 [26364:syz-executor0][] do_loop_readv_writev+0x114/0x1b0 [26364:syz-executor0][] do_readv_writev+0x4f8/0x5e0 [26364:syz-executor0][] vfs_writev+0x7c/Oxb8 [26364:syz-executor0][] SyS_writev+Oxcc/0x208 [26364:syz-executor0][] elO_svc_naked +0x24/0x28 [26364:syz-executor0][name:report&] [26364:syz-executor0][name:report&]The buggy address belongs to the variable: [26364:syz-executor0][name:report&] kgdb_tty_line+Ox3f/0x40 [26364:syz-executor0][name:report&] [26364:syz-executor0][name:report&]Memory state around the buggy address: [26364:syz-executor0] ffffff900e44f800: 00 00 00 00 00 04 fa fa fa fa fa fa 00 fa fa fa [26364:syz-executor0] ffffff900e44f880: fa fa fa fa 00 fa fa fa fa fa fa fa 00 fa fa fa [26364:syz-executor0]> ffffff900e44f900: fa fa fa fa 04 fa fa fa fa fa fa fa 00 00 00 00 [26364:syz-executor0][name:report&] ^ [26364:syz-executor0] ffffff900e44f980: 00 fa fa fa fa fa fa fa 04 fa fa fa fa fa fa fa [26364:syz-executor0] ffffff900e44fa00: 04 fa fa fa fa fa fa fa 00 fa fa fa fa fa fa fa [26364:syz-executor0][name:report&] [26364:syz-executor0][name:panic&]Disabling lock debugging due to kernel taint [26364:syz-executor0]------------[cut here]------------ After checking the source code, we've found there might be an out-of-bounds access to "config[len - 1]" array when the variable "len" is zero. Signed-off-by: Macpaul Lin Acked-by: Daniel Thompson Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/kgdboc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index c448225ef5ca..f2b0d8cee8ef 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -232,7 +232,7 @@ static void kgdboc_put_char(u8 chr) static int param_set_kgdboc_var(const char *kmessage, struct kernel_param *kp) { - int len = strlen(kmessage); + size_t len = strlen(kmessage); if (len >= MAX_CONFIG_LEN) { printk(KERN_ERR "kgdboc: config string too long\n"); @@ -254,7 +254,7 @@ static int param_set_kgdboc_var(const char *kmessage, struct kernel_param *kp) strcpy(config, kmessage); /* Chop out \n char as a result of echo */ - if (config[len - 1] == '\n') + if (len && config[len - 1] == '\n') config[len - 1] = '\0'; if (configured == 1) From 4794d94b44c4903514a878b9097e99417a5e6dbe Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 24 Nov 2018 10:47:04 -0800 Subject: [PATCH 0968/2608] libnvdimm, pfn: Pad pfn namespaces relative to other regions commit ae86cbfef3818300f1972e52f67a93211acb0e24 upstream. Commit cfe30b872058 "libnvdimm, pmem: adjust for section collisions with 'System RAM'" enabled Linux to workaround occasions where platform firmware arranges for "System RAM" and "Persistent Memory" to collide within a single section boundary. Unfortunately, as reported in this issue [1], platform firmware can inflict the same collision between persistent memory regions. The approach of interrogating iomem_resource does not work in this case because platform firmware may merge multiple regions into a single iomem_resource range. Instead provide a method to interrogate regions that share the same parent bus. This is a stop-gap until the core-MM can grow support for hotplug on sub-section boundaries. [1]: https://github.com/pmem/ndctl/issues/76 Fixes: cfe30b872058 ("libnvdimm, pmem: adjust for section collisions with...") Cc: Reported-by: Patrick Geary Tested-by: Patrick Geary Reviewed-by: Vishal Verma Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/nd-core.h | 2 ++ drivers/nvdimm/pfn_devs.c | 64 +++++++++++++++++++++--------------- drivers/nvdimm/region_devs.c | 41 +++++++++++++++++++++++ 3 files changed, 80 insertions(+), 27 deletions(-) diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 86bc19ae30da..f7b0c39ac339 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -105,6 +105,8 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, struct nd_mapping *nd_mapping, resource_size_t *overlap); resource_size_t nd_blk_available_dpa(struct nd_region *nd_region); resource_size_t nd_region_available_dpa(struct nd_region *nd_region); +int nd_region_conflict(struct nd_region *nd_region, resource_size_t start, + resource_size_t size); resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd, struct nd_label_id *label_id); int alias_dpa_busy(struct device *dev, void *data); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 2adada1a5855..6d38191ff0da 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -589,14 +589,47 @@ static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) ALIGN_DOWN(phys, nd_pfn->align)); } +/* + * Check if pmem collides with 'System RAM', or other regions when + * section aligned. Trim it accordingly. + */ +static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trunc) +{ + struct nd_namespace_common *ndns = nd_pfn->ndns; + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); + const resource_size_t start = nsio->res.start; + const resource_size_t end = start + resource_size(&nsio->res); + resource_size_t adjust, size; + + *start_pad = 0; + *end_trunc = 0; + + adjust = start - PHYS_SECTION_ALIGN_DOWN(start); + size = resource_size(&nsio->res) + adjust; + if (region_intersects(start - adjust, size, IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED + || nd_region_conflict(nd_region, start - adjust, size)) + *start_pad = PHYS_SECTION_ALIGN_UP(start) - start; + + /* Now check that end of the range does not collide. */ + adjust = PHYS_SECTION_ALIGN_UP(end) - end; + size = resource_size(&nsio->res) + adjust; + if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED + || !IS_ALIGNED(end, nd_pfn->align) + || nd_region_conflict(nd_region, start, size + adjust)) + *end_trunc = end - phys_pmem_align_down(nd_pfn, end); +} + static int nd_pfn_init(struct nd_pfn *nd_pfn) { u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; struct nd_namespace_common *ndns = nd_pfn->ndns; - u32 start_pad = 0, end_trunc = 0; + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); resource_size_t start, size; - struct nd_namespace_io *nsio; struct nd_region *nd_region; + u32 start_pad, end_trunc; struct nd_pfn_sb *pfn_sb; unsigned long npfns; phys_addr_t offset; @@ -628,30 +661,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) memset(pfn_sb, 0, sizeof(*pfn_sb)); - /* - * Check if pmem collides with 'System RAM' when section aligned and - * trim it accordingly - */ - nsio = to_nd_namespace_io(&ndns->dev); - start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start); - size = resource_size(&nsio->res); - if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE) == REGION_MIXED) { - start = nsio->res.start; - start_pad = PHYS_SECTION_ALIGN_UP(start) - start; - } - - start = nsio->res.start; - size = PHYS_SECTION_ALIGN_UP(start + size) - start; - if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE) == REGION_MIXED - || !IS_ALIGNED(start + resource_size(&nsio->res), - nd_pfn->align)) { - size = resource_size(&nsio->res); - end_trunc = start + size - phys_pmem_align_down(nd_pfn, - start + size); - } - + trim_pfn_device(nd_pfn, &start_pad, &end_trunc); if (start_pad + end_trunc) dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n", dev_name(&ndns->dev), start_pad + end_trunc); @@ -662,7 +672,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * implementation will limit the pfns advertised through * ->direct_access() to those that are included in the memmap. */ - start += start_pad; + start = nsio->res.start + start_pad; size = resource_size(&nsio->res); npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - SZ_8K) / PAGE_SIZE); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 050deb56ee62..708043d20d0d 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1112,6 +1112,47 @@ int nvdimm_has_cache(struct nd_region *nd_region) } EXPORT_SYMBOL_GPL(nvdimm_has_cache); +struct conflict_context { + struct nd_region *nd_region; + resource_size_t start, size; +}; + +static int region_conflict(struct device *dev, void *data) +{ + struct nd_region *nd_region; + struct conflict_context *ctx = data; + resource_size_t res_end, region_end, region_start; + + if (!is_memory(dev)) + return 0; + + nd_region = to_nd_region(dev); + if (nd_region == ctx->nd_region) + return 0; + + res_end = ctx->start + ctx->size; + region_start = nd_region->ndr_start; + region_end = region_start + nd_region->ndr_size; + if (ctx->start >= region_start && ctx->start < region_end) + return -EBUSY; + if (res_end > region_start && res_end <= region_end) + return -EBUSY; + return 0; +} + +int nd_region_conflict(struct nd_region *nd_region, resource_size_t start, + resource_size_t size) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); + struct conflict_context ctx = { + .nd_region = nd_region, + .start = start, + .size = size, + }; + + return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict); +} + void __exit nd_region_devs_exit(void) { ida_destroy(®ion_ida); From 199a81acf2bb66b5a640e6484aacd87bdd2a4854 Mon Sep 17 00:00:00 2001 From: Vasyl Vavrychuk Date: Thu, 18 Oct 2018 01:02:12 +0300 Subject: [PATCH 0969/2608] mac80211_hwsim: Timer should be initialized before device registered commit a1881c9b8a1edef0a5ae1d5c1b61406fe3402114 upstream. Otherwise if network manager starts configuring Wi-Fi interface immidiatelly after getting notification of its creation, we will get NULL pointer dereference: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] hrtimer_active+0x28/0x50 ... Call Trace: [] ? hrtimer_try_to_cancel+0x27/0x110 [] ? hrtimer_cancel+0x15/0x20 [] ? mac80211_hwsim_config+0x140/0x1c0 [mac80211_hwsim] Cc: stable@vger.kernel.org Signed-off-by: Vasyl Vavrychuk Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mac80211_hwsim.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 477f9f2f6626..670224be3c8b 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2698,6 +2698,10 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); + tasklet_hrtimer_init(&data->beacon_timer, + mac80211_hwsim_beacon, + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + err = ieee80211_register_hw(hw); if (err < 0) { printk(KERN_DEBUG "mac80211_hwsim: ieee80211_register_hw failed (%d)\n", @@ -2722,10 +2726,6 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, data->debugfs, data, &hwsim_simulate_radar); - tasklet_hrtimer_init(&data->beacon_timer, - mac80211_hwsim_beacon, - CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - spin_lock_bh(&hwsim_radio_lock); list_add_tail(&data->list, &hwsim_radios); spin_unlock_bh(&hwsim_radio_lock); From 9e82abdd0c0341b880f510fc8b19464410912069 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Tue, 23 Oct 2018 13:36:52 -0700 Subject: [PATCH 0970/2608] mac80211: Clear beacon_int in ieee80211_do_stop commit 5c21e8100dfd57c806e833ae905e26efbb87840f upstream. This fixes stale beacon-int values that would keep a netdev from going up. To reproduce: Create two VAP on one radio. vap1 has beacon-int 100, start it. vap2 has beacon-int 240, start it (and it will fail because beacon-int mismatch). reconfigure vap2 to have beacon-int 100 and start it. It will fail because the stale beacon-int 240 will be used in the ifup path and hostapd never gets a chance to set the new beacon interval. Cc: stable@vger.kernel.org Signed-off-by: Ben Greear Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/iface.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index e4912858b72c..222c063244f5 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1032,6 +1032,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, if (local->open_count == 0) ieee80211_clear_tx_pending(local); + sdata->vif.bss_conf.beacon_int = 0; + /* * If the interface goes down while suspended, presumably because * the device was unplugged and that happens before our resume, From a2c934fbbd43b5c349157c05374b9598f53a65f1 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 13 Nov 2018 20:32:13 +0100 Subject: [PATCH 0971/2608] mac80211: ignore tx status for PS stations in ieee80211_tx_status_ext commit a317e65face482371de30246b6494feb093ff7f9 upstream. Make it behave like regular ieee80211_tx_status calls, except for the lack of filtered frame processing. This fixes spurious low-ack triggered disconnections with powersave clients connected to an AP. Fixes: f027c2aca0cf4 ("mac80211: add ieee80211_tx_status_noskb") Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/status.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index bdf131ed5ce8..35912270087c 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -953,6 +953,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, /* Track when last TDLS packet was ACKed */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) sta->status_stats.last_tdls_pkt_time = jiffies; + } else if (test_sta_flag(sta, WLAN_STA_PS_STA)) { + return; } else { ieee80211_lost_packet(sta, info); } From bbc4242a9c2e3c97a5a8e7822b9c27915599f0c4 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 28 Nov 2018 22:39:16 +0100 Subject: [PATCH 0972/2608] mac80211: fix reordering of buffered broadcast packets commit 9ec1190d065998650fd9260dea8cf3e1f56c0e8c upstream. If the buffered broadcast queue contains packets, letting new packets bypass that queue can lead to heavy reordering, since the driver is probably throttling transmission of buffered multicast packets after beacons. Keep buffering packets until the buffer has been cleared (and no client is in powersave mode). Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/tx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index a17a56032a21..6b9bf9c027a2 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -435,8 +435,8 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) if (ieee80211_hw_check(&tx->local->hw, QUEUE_CONTROL)) info->hw_queue = tx->sdata->vif.cab_queue; - /* no stations in PS mode */ - if (!atomic_read(&ps->num_sta_ps)) + /* no stations in PS mode and no buffered packets */ + if (!atomic_read(&ps->num_sta_ps) && skb_queue_empty(&ps->bc_buf)) return TX_CONTINUE; info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM; From 7b6d932725ec18c1d84892b73c9da4d624939955 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 3 Dec 2018 21:16:07 +0200 Subject: [PATCH 0973/2608] mac80211: ignore NullFunc frames in the duplicate detection commit 990d71846a0b7281bd933c34d734e6afc7408e7e upstream. NullFunc packets should never be duplicate just like QoS-NullFunc packets. We saw a client that enters / exits power save with NullFunc frames (and not with QoS-NullFunc) despite the fact that the association supports HT. This specific client also re-uses a non-zero sequence number for different NullFunc frames. At some point, the client had to send a retransmission of the NullFunc frame and we dropped it, leading to a misalignment in the power save state. Fix this by never consider a NullFunc frame as duplicate, just like we do for QoS NullFunc frames. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=201449 CC: Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index dddd498e1338..9e19ddbcb06e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1254,6 +1254,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx) return RX_CONTINUE; if (ieee80211_is_ctl(hdr->frame_control) || + ieee80211_is_nullfunc(hdr->frame_control) || ieee80211_is_qos_nullfunc(hdr->frame_control) || is_multicast_ether_addr(hdr->addr1)) return RX_CONTINUE; From 1bb538a39cf959009d4e424ea4e590a1f58b2ed6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 13 Dec 2018 09:18:54 +0100 Subject: [PATCH 0974/2608] Linux 4.14.88 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 322484348f3e..3fdee40861a1 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 87 +SUBLEVEL = 88 EXTRAVERSION = NAME = Petit Gorille From da4b692991fc0f4ac74cb1546f52e67a8fe54801 Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Wed, 5 Dec 2018 16:55:29 +0100 Subject: [PATCH 0975/2608] ipv4: ipv6: netfilter: Adjust the frag mem limit when truesize changes [ Upstream commit ebaf39e6032faf77218220707fc3fa22487784e0 ] The *_frag_reasm() functions are susceptible to miscalculating the byte count of packet fragments in case the truesize of a head buffer changes. The truesize member may be changed by the call to skb_unclone(), leaving the fragment memory limit counter unbalanced even if all fragments are processed. This miscalculation goes unnoticed as long as the network namespace which holds the counter is not destroyed. Should an attempt be made to destroy a network namespace that holds an unbalanced fragment memory limit counter the cleanup of the namespace never finishes. The thread handling the cleanup gets stuck in inet_frags_exit_net() waiting for the percpu counter to reach zero. The thread is usually in running state with a stacktrace similar to: PID: 1073 TASK: ffff880626711440 CPU: 1 COMMAND: "kworker/u48:4" #5 [ffff880621563d48] _raw_spin_lock at ffffffff815f5480 #6 [ffff880621563d48] inet_evict_bucket at ffffffff8158020b #7 [ffff880621563d80] inet_frags_exit_net at ffffffff8158051c #8 [ffff880621563db0] ops_exit_list at ffffffff814f5856 #9 [ffff880621563dd8] cleanup_net at ffffffff814f67c0 #10 [ffff880621563e38] process_one_work at ffffffff81096f14 It is not possible to create new network namespaces, and processes that call unshare() end up being stuck in uninterruptible sleep state waiting to acquire the net_mutex. The bug was observed in the IPv6 netfilter code by Per Sundstrom. I thank him for his analysis of the problem. The parts of this patch that apply to IPv4 and IPv6 fragment reassembly are preemptive measures. Signed-off-by: Jiri Wiesner Reported-by: Per Sundstrom Acked-by: Peter Oskolkov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 7 +++++++ net/ipv6/netfilter/nf_conntrack_reasm.c | 8 +++++++- net/ipv6/reassembly.c | 8 +++++++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cb8fa5d7afe1..f686d7761acb 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -513,6 +513,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, struct rb_node *rbn; int len; int ihlen; + int delta; int err; u8 ecn; @@ -554,10 +555,16 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, if (len > 65535) goto out_oversize; + delta = - head->truesize; + /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) goto out_nomem; + delta += head->truesize; + if (delta) + add_frag_mem_limit(qp->q.net, delta); + /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 611d406c4656..237fb04c6716 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -349,7 +349,7 @@ static bool nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { struct sk_buff *fp, *head = fq->q.fragments; - int payload_len; + int payload_len, delta; u8 ecn; inet_frag_kill(&fq->q); @@ -371,10 +371,16 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic return false; } + delta = - head->truesize; + /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) return false; + delta += head->truesize; + if (delta) + add_frag_mem_limit(fq->q.net, delta); + /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index ede0061b6f5d..2a8c680b67cd 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -348,7 +348,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, { struct net *net = container_of(fq->q.net, struct net, ipv6.frags); struct sk_buff *fp, *head = fq->q.fragments; - int payload_len; + int payload_len, delta; unsigned int nhoff; int sum_truesize; u8 ecn; @@ -389,10 +389,16 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, if (payload_len > IPV6_MAXPLEN) goto out_oversize; + delta = - head->truesize; + /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) goto out_oom; + delta += head->truesize; + if (delta) + add_frag_mem_limit(fq->q.net, delta); + /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ From ece6a4b7efa8ec7764bda17ad37f46be5e141d16 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 6 Dec 2018 19:30:36 +0100 Subject: [PATCH 0976/2608] ipv6: Check available headroom in ip6_xmit() even without options [ Upstream commit 66033f47ca60294a95fc85ec3a3cc909dab7b765 ] Even if we send an IPv6 packet without options, MAX_HEADER might not be enough to account for the additional headroom required by alignment of hardware headers. On a configuration without HYPERV_NET, WLAN, AX25, and with IPV6_TUNNEL, sending short SCTP packets over IPv4 over L2TP over IPv6, we start with 100 bytes of allocated headroom in sctp_packet_transmit(), end up with 54 bytes after l2tp_xmit_skb(), and 14 bytes in ip6_finish_output2(). Those would be enough to append our 14 bytes header, but we're going to align that to 16 bytes, and write 2 bytes out of the allocated slab in neigh_hh_output(). KASan says: [ 264.967848] ================================================================== [ 264.967861] BUG: KASAN: slab-out-of-bounds in ip6_finish_output2+0x1aec/0x1c70 [ 264.967866] Write of size 16 at addr 000000006af1c7fe by task netperf/6201 [ 264.967870] [ 264.967876] CPU: 0 PID: 6201 Comm: netperf Not tainted 4.20.0-rc4+ #1 [ 264.967881] Hardware name: IBM 2827 H43 400 (z/VM 6.4.0) [ 264.967887] Call Trace: [ 264.967896] ([<00000000001347d6>] show_stack+0x56/0xa0) [ 264.967903] [<00000000017e379c>] dump_stack+0x23c/0x290 [ 264.967912] [<00000000007bc594>] print_address_description+0xf4/0x290 [ 264.967919] [<00000000007bc8fc>] kasan_report+0x13c/0x240 [ 264.967927] [<000000000162f5e4>] ip6_finish_output2+0x1aec/0x1c70 [ 264.967935] [<000000000163f890>] ip6_finish_output+0x430/0x7f0 [ 264.967943] [<000000000163fe44>] ip6_output+0x1f4/0x580 [ 264.967953] [<000000000163882a>] ip6_xmit+0xfea/0x1ce8 [ 264.967963] [<00000000017396e2>] inet6_csk_xmit+0x282/0x3f8 [ 264.968033] [<000003ff805fb0ba>] l2tp_xmit_skb+0xe02/0x13e0 [l2tp_core] [ 264.968037] [<000003ff80631192>] l2tp_eth_dev_xmit+0xda/0x150 [l2tp_eth] [ 264.968041] [<0000000001220020>] dev_hard_start_xmit+0x268/0x928 [ 264.968069] [<0000000001330e8e>] sch_direct_xmit+0x7ae/0x1350 [ 264.968071] [<000000000122359c>] __dev_queue_xmit+0x2b7c/0x3478 [ 264.968075] [<00000000013d2862>] ip_finish_output2+0xce2/0x11a0 [ 264.968078] [<00000000013d9b14>] ip_finish_output+0x56c/0x8c8 [ 264.968081] [<00000000013ddd1e>] ip_output+0x226/0x4c0 [ 264.968083] [<00000000013dbd6c>] __ip_queue_xmit+0x894/0x1938 [ 264.968100] [<000003ff80bc3a5c>] sctp_packet_transmit+0x29d4/0x3648 [sctp] [ 264.968116] [<000003ff80b7bf68>] sctp_outq_flush_ctrl.constprop.5+0x8d0/0xe50 [sctp] [ 264.968131] [<000003ff80b7c716>] sctp_outq_flush+0x22e/0x7d8 [sctp] [ 264.968146] [<000003ff80b35c68>] sctp_cmd_interpreter.isra.16+0x530/0x6800 [sctp] [ 264.968161] [<000003ff80b3410a>] sctp_do_sm+0x222/0x648 [sctp] [ 264.968177] [<000003ff80bbddac>] sctp_primitive_ASSOCIATE+0xbc/0xf8 [sctp] [ 264.968192] [<000003ff80b93328>] __sctp_connect+0x830/0xc20 [sctp] [ 264.968208] [<000003ff80bb11ce>] sctp_inet_connect+0x2e6/0x378 [sctp] [ 264.968212] [<0000000001197942>] __sys_connect+0x21a/0x450 [ 264.968215] [<000000000119aff8>] sys_socketcall+0x3d0/0xb08 [ 264.968218] [<000000000184ea7a>] system_call+0x2a2/0x2c0 [...] Just like ip_finish_output2() does for IPv4, check that we have enough headroom in ip6_xmit(), and reallocate it if we don't. This issue is older than git history. Reported-by: Jianlin Shi Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9ab1e0fcbc13..7ca8264cbdf9 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -195,37 +195,37 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); + unsigned int head_room; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; int seg_len = skb->len; int hlimit = -1; u32 mtu; - if (opt) { - unsigned int head_room; + head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); + if (opt) + head_room += opt->opt_nflen + opt->opt_flen; - /* First: exthdrs may take lots of space (~8K for now) - MAX_HEADER is not enough. - */ - head_room = opt->opt_nflen + opt->opt_flen; - seg_len += head_room; - head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); - - if (skb_headroom(skb) < head_room) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); - if (!skb2) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTDISCARDS); - kfree_skb(skb); - return -ENOBUFS; - } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; + if (unlikely(skb_headroom(skb) < head_room)) { + struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); + if (!skb2) { + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + return -ENOBUFS; } + if (skb->sk) + skb_set_owner_w(skb2, skb->sk); + consume_skb(skb); + skb = skb2; + } + + if (opt) { + seg_len += opt->opt_nflen + opt->opt_flen; + if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); + if (opt->opt_nflen) ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, &fl6->saddr); From 9380dc7c5101c0ac539c282babd5d3c91394b94a Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 6 Dec 2018 19:30:37 +0100 Subject: [PATCH 0977/2608] neighbour: Avoid writing before skb->head in neigh_hh_output() [ Upstream commit e6ac64d4c4d095085d7dd71cbd05704ac99829b2 ] While skb_push() makes the kernel panic if the skb headroom is less than the unaligned hardware header size, it will proceed normally in case we copy more than that because of alignment, and we'll silently corrupt adjacent slabs. In the case fixed by the previous patch, "ipv6: Check available headroom in ip6_xmit() even without options", we end up in neigh_hh_output() with 14 bytes headroom, 14 bytes hardware header and write 16 bytes, starting 2 bytes before the allocated buffer. Always check we're not writing before skb->head and, if the headroom is not enough, warn and drop the packet. v2: - instead of panicking with BUG_ON(), WARN_ON_ONCE() and drop the packet (Eric Dumazet) - if we avoid the panic, though, we need to explicitly check the headroom before the memcpy(), otherwise we'll have corrupted slabs on a running kernel, after we warn - use __skb_push() instead of skb_push(), as the headroom check is already implemented here explicitly (Eric Dumazet) Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/neighbour.h | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index a964366a7ef5..393099b1901a 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -452,6 +452,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) { + unsigned int hh_alen = 0; unsigned int seq; unsigned int hh_len; @@ -459,16 +460,33 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb seq = read_seqbegin(&hh->hh_lock); hh_len = hh->hh_len; if (likely(hh_len <= HH_DATA_MOD)) { - /* this is inlined by gcc */ - memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD); - } else { - unsigned int hh_alen = HH_DATA_ALIGN(hh_len); + hh_alen = HH_DATA_MOD; - memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); + /* skb_push() would proceed silently if we have room for + * the unaligned size but not for the aligned size: + * check headroom explicitly. + */ + if (likely(skb_headroom(skb) >= HH_DATA_MOD)) { + /* this is inlined by gcc */ + memcpy(skb->data - HH_DATA_MOD, hh->hh_data, + HH_DATA_MOD); + } + } else { + hh_alen = HH_DATA_ALIGN(hh_len); + + if (likely(skb_headroom(skb) >= hh_alen)) { + memcpy(skb->data - hh_alen, hh->hh_data, + hh_alen); + } } } while (read_seqretry(&hh->hh_lock, seq)); - skb_push(skb, hh_len); + if (WARN_ON_ONCE(skb_headroom(skb) < hh_alen)) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + __skb_push(skb, hh_len); return dev_queue_xmit(skb); } From 25f111f2432d69a5014503dc27cf729d6adea285 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Fri, 7 Dec 2018 09:50:17 +0200 Subject: [PATCH 0978/2608] ipv6: sr: properly initialize flowi6 prior passing to ip6_route_output [ Upstream commit 1b4e5ad5d6b9f15cd0b5121f86d4719165958417 ] In 'seg6_output', stack variable 'struct flowi6 fl6' was missing initialization. Fixes: 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels") Signed-off-by: Shmulik Ladkani Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/seg6_iptunnel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index bf4763fd68c2..cf9342bfe95a 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -327,6 +327,7 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) struct ipv6hdr *hdr = ipv6_hdr(skb); struct flowi6 fl6; + memset(&fl6, 0, sizeof(fl6)); fl6.daddr = hdr->daddr; fl6.saddr = hdr->saddr; fl6.flowlabel = ip6_flowinfo(hdr); From 4c65c73cf124dedd9bb64374851f7466d5704750 Mon Sep 17 00:00:00 2001 From: Su Yanjun Date: Mon, 3 Dec 2018 15:33:07 +0800 Subject: [PATCH 0979/2608] net: 8139cp: fix a BUG triggered by changing mtu with network traffic [ Upstream commit a5d4a89245ead1f37ed135213653c5beebea4237 ] When changing mtu many times with traffic, a bug is triggered: [ 1035.684037] kernel BUG at lib/dynamic_queue_limits.c:26! [ 1035.684042] invalid opcode: 0000 [#1] SMP [ 1035.684049] Modules linked in: loop binfmt_misc 8139cp(OE) macsec tcp_diag udp_diag inet_diag unix_diag af_packet_diag netlink_diag tcp_lp fuse uinput xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter devlink ip6_tables iptable_filter sunrpc snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep ppdev snd_seq iosf_mbi crc32_pclmul parport_pc snd_seq_device ghash_clmulni_intel parport snd_pcm aesni_intel joydev lrw snd_timer virtio_balloon sg gf128mul glue_helper ablk_helper cryptd snd soundcore i2c_piix4 pcspkr ip_tables xfs libcrc32c sr_mod sd_mod cdrom crc_t10dif crct10dif_generic ata_generic [ 1035.684102] pata_acpi virtio_console qxl drm_kms_helper syscopyarea sysfillrect sysimgblt floppy fb_sys_fops crct10dif_pclmul crct10dif_common ttm crc32c_intel serio_raw ata_piix drm libata 8139too virtio_pci drm_panel_orientation_quirks virtio_ring virtio mii dm_mirror dm_region_hash dm_log dm_mod [last unloaded: 8139cp] [ 1035.684132] CPU: 9 PID: 25140 Comm: if-mtu-change Kdump: loaded Tainted: G OE ------------ T 3.10.0-957.el7.x86_64 #1 [ 1035.684134] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 [ 1035.684136] task: ffff8f59b1f5a080 ti: ffff8f5a2e32c000 task.ti: ffff8f5a2e32c000 [ 1035.684149] RIP: 0010:[] [] dql_completed+0x180/0x190 [ 1035.684162] RSP: 0000:ffff8f5a75483e50 EFLAGS: 00010093 [ 1035.684162] RAX: 00000000000000c2 RBX: ffff8f5a6f91c000 RCX: 0000000000000000 [ 1035.684162] RDX: 0000000000000000 RSI: 0000000000000184 RDI: ffff8f599fea3ec0 [ 1035.684162] RBP: ffff8f5a75483ea8 R08: 00000000000000c2 R09: 0000000000000000 [ 1035.684162] R10: 00000000000616ef R11: ffff8f5a75483b56 R12: ffff8f599fea3e00 [ 1035.684162] R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000184 [ 1035.684162] FS: 00007fa8434de740(0000) GS:ffff8f5a75480000(0000) knlGS:0000000000000000 [ 1035.684162] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1035.684162] CR2: 00000000004305d0 CR3: 000000024eb66000 CR4: 00000000001406e0 [ 1035.684162] Call Trace: [ 1035.684162] [ 1035.684162] [] ? cp_interrupt+0x478/0x580 [8139cp] [ 1035.684162] [] __handle_irq_event_percpu+0x44/0x1c0 [ 1035.684162] [] handle_irq_event_percpu+0x32/0x80 [ 1035.684162] [] handle_irq_event+0x3c/0x60 [ 1035.684162] [] handle_fasteoi_irq+0x59/0x110 [ 1035.684162] [] handle_irq+0xe4/0x1a0 [ 1035.684162] [] do_IRQ+0x4d/0xf0 [ 1035.684162] [] common_interrupt+0x162/0x162 [ 1035.684162] [ 1035.684162] [] ? __wake_up_bit+0x24/0x70 [ 1035.684162] [] ? do_set_pte+0xd5/0x120 [ 1035.684162] [] unlock_page+0x2b/0x30 [ 1035.684162] [] do_read_fault.isra.61+0x139/0x1b0 [ 1035.684162] [] handle_pte_fault+0x2f4/0xd10 [ 1035.684162] [] handle_mm_fault+0x39d/0x9b0 [ 1035.684162] [] __do_page_fault+0x203/0x500 [ 1035.684162] [] trace_do_page_fault+0x56/0x150 [ 1035.684162] [] do_async_page_fault+0x22/0xf0 [ 1035.684162] [] async_page_fault+0x28/0x30 [ 1035.684162] Code: 54 c7 47 54 ff ff ff ff 44 0f 49 ce 48 8b 35 48 2f 9c 00 48 89 77 58 e9 fe fe ff ff 0f 1f 80 00 00 00 00 41 89 d1 e9 ef fe ff ff <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 55 8d 42 ff 48 [ 1035.684162] RIP [] dql_completed+0x180/0x190 [ 1035.684162] RSP It's not the same as in 7fe0ee09 patch described. As 8139cp uses shared irq mode, other device irq will trigger cp_interrupt to execute. cp_change_mtu -> cp_close -> cp_open In cp_close routine just before free_irq(), some interrupt may occur. In my environment, cp_interrupt exectutes and IntrStatus is 0x4, exactly TxOk. That will cause cp_tx to wake device queue. As device queue is started, cp_start_xmit and cp_open will run at same time which will cause kernel BUG. For example: [#] for tx descriptor At start: [#][#][#] num_queued=3 After cp_init_hw->cp_start_hw->netdev_reset_queue: [#][#][#] num_queued=0 When 8139cp starts to work then cp_tx will check num_queued mismatchs the complete_bytes. The patch will check IntrMask before check IntrStatus in cp_interrupt. When 8139cp interrupt is disabled, just return. Signed-off-by: Su Yanjun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/8139cp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index e7ab23e87de2..d1e88712a275 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -571,6 +571,7 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance) struct cp_private *cp; int handled = 0; u16 status; + u16 mask; if (unlikely(dev == NULL)) return IRQ_NONE; @@ -578,6 +579,10 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance) spin_lock(&cp->lock); + mask = cpr16(IntrMask); + if (!mask) + goto out_unlock; + status = cpr16(IntrStatus); if (!status || (status == 0xFFFF)) goto out_unlock; From 76f67ede4df921f5e4f0c02129a6407c977cab57 Mon Sep 17 00:00:00 2001 From: Tarick Bedeir Date: Fri, 7 Dec 2018 00:30:26 -0800 Subject: [PATCH 0980/2608] net/mlx4_core: Correctly set PFC param if global pause is turned off. [ Upstream commit bd5122cd1e0644d8bd8dd84517c932773e999766 ] rx_ppp and tx_ppp can be set between 0 and 255, so don't clamp to 1. Fixes: 6e8814ceb7e8 ("net/mlx4_en: Fix mixed PFC and Global pause user control requests") Signed-off-by: Tarick Bedeir Reviewed-by: Eran Ben Elisha Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 5fe56dc4cfae..5363cee88a0a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1070,8 +1070,8 @@ static int mlx4_en_set_pauseparam(struct net_device *dev, tx_pause = !!(pause->tx_pause); rx_pause = !!(pause->rx_pause); - rx_ppp = priv->prof->rx_ppp && !(tx_pause || rx_pause); - tx_ppp = priv->prof->tx_ppp && !(tx_pause || rx_pause); + rx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->rx_ppp; + tx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->tx_ppp; err = mlx4_SET_PORT_general(mdev->dev, priv->port, priv->rx_skb_size + ETH_FCS_LEN, From 583e170ce81654554243790ac2e340ebe2180d14 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sun, 2 Dec 2018 14:34:36 +0200 Subject: [PATCH 0981/2608] net/mlx4_en: Change min MTU size to ETH_MIN_MTU [ Upstream commit 24be19e47779d604d1492c114459dca9a92acf78 ] NIC driver minimal MTU size shall be set to ETH_MIN_MTU, as defined in the RFC791 and in the network stack. Remove old mlx4_en only define for it, which was set to wrong value. Fixes: b80f71f5816f ("ethernet/mellanox: use core min/max MTU checking") Signed-off-by: Eran Ben Elisha Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 4 ++-- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index faa4bd21f148..0fb85d71c11b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3505,8 +3505,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; } - /* MTU range: 46 - hw-specific max */ - dev->min_mtu = MLX4_EN_MIN_MTU; + /* MTU range: 68 - hw-specific max */ + dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = priv->max_mtu; mdev->pndev[port] = dev; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 09f4764a3f39..bdd87438a354 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -157,7 +157,6 @@ #define HEADER_COPY_SIZE (128 - NET_IP_ALIGN) #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN) -#define MLX4_EN_MIN_MTU 46 /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple * headers. (For example: ETH_P_8021Q and ETH_P_8021AD). */ From e44c8cbd09da1c3a9f2c752492b8efa3a53487f9 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 3 Dec 2018 08:19:33 +0100 Subject: [PATCH 0982/2608] net: phy: don't allow __set_phy_supported to add unsupported modes [ Upstream commit d2a36971ef595069b7a600d1144c2e0881a930a1 ] Currently __set_phy_supported allows to add modes w/o checking whether the PHY supports them. This is wrong, it should never add modes but only remove modes we don't want to support. The commit marked as fixed didn't do anything wrong, it just copied existing functionality to the helper which is being fixed now. Fixes: f3a6bd393c2c ("phylib: Add phy_set_max_speed helper") Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index fe76e2c4022a..5b56a86e88ff 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1703,20 +1703,17 @@ EXPORT_SYMBOL(genphy_loopback); static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) { - phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES | - PHY_10BT_FEATURES); - switch (max_speed) { - default: - return -ENOTSUPP; - case SPEED_1000: - phydev->supported |= PHY_1000BT_FEATURES; + case SPEED_10: + phydev->supported &= ~PHY_100BT_FEATURES; /* fall through */ case SPEED_100: - phydev->supported |= PHY_100BT_FEATURES; - /* fall through */ - case SPEED_10: - phydev->supported |= PHY_10BT_FEATURES; + phydev->supported &= ~PHY_1000BT_FEATURES; + break; + case SPEED_1000: + break; + default: + return -ENOTSUPP; } return 0; From 56ed9f33e03f0a4070a8f3aba9d1ccc443d180a8 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Thu, 29 Nov 2018 16:01:04 -0800 Subject: [PATCH 0983/2608] net: Prevent invalid access to skb->prev in __qdisc_drop_all [ Upstream commit 9410d386d0a829ace9558336263086c2fbbe8aed ] __qdisc_drop_all() accesses skb->prev to get to the tail of the segment-list. With commit 68d2f84a1368 ("net: gro: properly remove skb from list") the skb-list handling has been changed to set skb->next to NULL and set the list-poison on skb->prev. With that change, __qdisc_drop_all() will panic when it tries to dereference skb->prev. Since commit 992cba7e276d ("net: Add and use skb_list_del_init().") __list_del_entry is used, leaving skb->prev unchanged (thus, pointing to the list-head if it's the first skb of the list). This will make __qdisc_drop_all modify the next-pointer of the list-head and result in a panic later on: [ 34.501053] general protection fault: 0000 [#1] SMP KASAN PTI [ 34.501968] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.20.0-rc2.mptcp #108 [ 34.502887] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.5.1 01/01/2011 [ 34.504074] RIP: 0010:dev_gro_receive+0x343/0x1f90 [ 34.504751] Code: e0 48 c1 e8 03 42 80 3c 30 00 0f 85 4a 1c 00 00 4d 8b 24 24 4c 39 65 d0 0f 84 0a 04 00 00 49 8d 7c 24 38 48 89 f8 48 c1 e8 03 <42> 0f b6 04 30 84 c0 74 08 3c 04 [ 34.507060] RSP: 0018:ffff8883af507930 EFLAGS: 00010202 [ 34.507761] RAX: 0000000000000007 RBX: ffff8883970b2c80 RCX: 1ffff11072e165a6 [ 34.508640] RDX: 1ffff11075867008 RSI: ffff8883ac338040 RDI: 0000000000000038 [ 34.509493] RBP: ffff8883af5079d0 R08: ffff8883970b2d40 R09: 0000000000000062 [ 34.510346] R10: 0000000000000034 R11: 0000000000000000 R12: 0000000000000000 [ 34.511215] R13: 0000000000000000 R14: dffffc0000000000 R15: ffff8883ac338008 [ 34.512082] FS: 0000000000000000(0000) GS:ffff8883af500000(0000) knlGS:0000000000000000 [ 34.513036] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 34.513741] CR2: 000055ccc3e9d020 CR3: 00000003abf32000 CR4: 00000000000006e0 [ 34.514593] Call Trace: [ 34.514893] [ 34.515157] napi_gro_receive+0x93/0x150 [ 34.515632] receive_buf+0x893/0x3700 [ 34.516094] ? __netif_receive_skb+0x1f/0x1a0 [ 34.516629] ? virtnet_probe+0x1b40/0x1b40 [ 34.517153] ? __stable_node_chain+0x4d0/0x850 [ 34.517684] ? kfree+0x9a/0x180 [ 34.518067] ? __kasan_slab_free+0x171/0x190 [ 34.518582] ? detach_buf+0x1df/0x650 [ 34.519061] ? lapic_next_event+0x5a/0x90 [ 34.519539] ? virtqueue_get_buf_ctx+0x280/0x7f0 [ 34.520093] virtnet_poll+0x2df/0xd60 [ 34.520533] ? receive_buf+0x3700/0x3700 [ 34.521027] ? qdisc_watchdog_schedule_ns+0xd5/0x140 [ 34.521631] ? htb_dequeue+0x1817/0x25f0 [ 34.522107] ? sch_direct_xmit+0x142/0xf30 [ 34.522595] ? virtqueue_napi_schedule+0x26/0x30 [ 34.523155] net_rx_action+0x2f6/0xc50 [ 34.523601] ? napi_complete_done+0x2f0/0x2f0 [ 34.524126] ? kasan_check_read+0x11/0x20 [ 34.524608] ? _raw_spin_lock+0x7d/0xd0 [ 34.525070] ? _raw_spin_lock_bh+0xd0/0xd0 [ 34.525563] ? kvm_guest_apic_eoi_write+0x6b/0x80 [ 34.526130] ? apic_ack_irq+0x9e/0xe0 [ 34.526567] __do_softirq+0x188/0x4b5 [ 34.527015] irq_exit+0x151/0x180 [ 34.527417] do_IRQ+0xdb/0x150 [ 34.527783] common_interrupt+0xf/0xf [ 34.528223] This patch makes sure that skb->prev is set to NULL when entering netem_enqueue. Cc: Prashant Bhole Cc: Tyler Hicks Cc: Eric Dumazet Fixes: 68d2f84a1368 ("net: gro: properly remove skb from list") Suggested-by: Eric Dumazet Signed-off-by: Christoph Paasch Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_netem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 3d325b840802..3f4f0b946798 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -436,6 +436,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, int count = 1; int rc = NET_XMIT_SUCCESS; + /* Do not fool qdisc_drop_all() */ + skb->prev = NULL; + /* Random duplication */ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) ++count; From 22fa15e6f5b7b0cdf5ccd6beb5e2bf48aa6ac306 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Dec 2018 09:40:35 -0800 Subject: [PATCH 0984/2608] rtnetlink: ndo_dflt_fdb_dump() only work for ARPHRD_ETHER devices [ Upstream commit 688838934c231bb08f46db687e57f6d8bf82709c ] kmsan was able to trigger a kernel-infoleak using a gre device [1] nlmsg_populate_fdb_fill() has a hard coded assumption that dev->addr_len is ETH_ALEN, as normally guaranteed for ARPHRD_ETHER devices. A similar issue was fixed recently in commit da71577545a5 ("rtnetlink: Disallow FDB configuration for non-Ethernet device") [1] BUG: KMSAN: kernel-infoleak in copyout lib/iov_iter.c:143 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x4c0/0x2700 lib/iov_iter.c:576 CPU: 0 PID: 6697 Comm: syz-executor310 Not tainted 4.20.0-rc3+ #95 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x32d/0x480 lib/dump_stack.c:113 kmsan_report+0x12c/0x290 mm/kmsan/kmsan.c:683 kmsan_internal_check_memory+0x32a/0xa50 mm/kmsan/kmsan.c:743 kmsan_copy_to_user+0x78/0xd0 mm/kmsan/kmsan_hooks.c:634 copyout lib/iov_iter.c:143 [inline] _copy_to_iter+0x4c0/0x2700 lib/iov_iter.c:576 copy_to_iter include/linux/uio.h:143 [inline] skb_copy_datagram_iter+0x4e2/0x1070 net/core/datagram.c:431 skb_copy_datagram_msg include/linux/skbuff.h:3316 [inline] netlink_recvmsg+0x6f9/0x19d0 net/netlink/af_netlink.c:1975 sock_recvmsg_nosec net/socket.c:794 [inline] sock_recvmsg+0x1d1/0x230 net/socket.c:801 ___sys_recvmsg+0x444/0xae0 net/socket.c:2278 __sys_recvmsg net/socket.c:2327 [inline] __do_sys_recvmsg net/socket.c:2337 [inline] __se_sys_recvmsg+0x2fa/0x450 net/socket.c:2334 __x64_sys_recvmsg+0x4a/0x70 net/socket.c:2334 do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x441119 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 db 0a fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fffc7f008a8 EFLAGS: 00000207 ORIG_RAX: 000000000000002f RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000441119 RDX: 0000000000000040 RSI: 00000000200005c0 RDI: 0000000000000003 RBP: 00000000006cc018 R08: 0000000000000100 R09: 0000000000000100 R10: 0000000000000100 R11: 0000000000000207 R12: 0000000000402080 R13: 0000000000402110 R14: 0000000000000000 R15: 0000000000000000 Uninit was stored to memory at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:246 [inline] kmsan_save_stack mm/kmsan/kmsan.c:261 [inline] kmsan_internal_chain_origin+0x13d/0x240 mm/kmsan/kmsan.c:469 kmsan_memcpy_memmove_metadata+0x1a9/0xf70 mm/kmsan/kmsan.c:344 kmsan_memcpy_metadata+0xb/0x10 mm/kmsan/kmsan.c:362 __msan_memcpy+0x61/0x70 mm/kmsan/kmsan_instr.c:162 __nla_put lib/nlattr.c:744 [inline] nla_put+0x20a/0x2d0 lib/nlattr.c:802 nlmsg_populate_fdb_fill+0x444/0x810 net/core/rtnetlink.c:3466 nlmsg_populate_fdb net/core/rtnetlink.c:3775 [inline] ndo_dflt_fdb_dump+0x73a/0x960 net/core/rtnetlink.c:3807 rtnl_fdb_dump+0x1318/0x1cb0 net/core/rtnetlink.c:3979 netlink_dump+0xc79/0x1c90 net/netlink/af_netlink.c:2244 __netlink_dump_start+0x10c4/0x11d0 net/netlink/af_netlink.c:2352 netlink_dump_start include/linux/netlink.h:216 [inline] rtnetlink_rcv_msg+0x141b/0x1540 net/core/rtnetlink.c:4910 netlink_rcv_skb+0x394/0x640 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4965 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x1699/0x1740 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x13c7/0x1440 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe3b/0x1240 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x305/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:246 [inline] kmsan_internal_poison_shadow+0x6d/0x130 mm/kmsan/kmsan.c:170 kmsan_kmalloc+0xa1/0x100 mm/kmsan/kmsan_hooks.c:186 __kmalloc+0x14c/0x4d0 mm/slub.c:3825 kmalloc include/linux/slab.h:551 [inline] __hw_addr_create_ex net/core/dev_addr_lists.c:34 [inline] __hw_addr_add_ex net/core/dev_addr_lists.c:80 [inline] __dev_mc_add+0x357/0x8a0 net/core/dev_addr_lists.c:670 dev_mc_add+0x6d/0x80 net/core/dev_addr_lists.c:687 ip_mc_filter_add net/ipv4/igmp.c:1128 [inline] igmp_group_added+0x4d4/0xb80 net/ipv4/igmp.c:1311 __ip_mc_inc_group+0xea9/0xf70 net/ipv4/igmp.c:1444 ip_mc_inc_group net/ipv4/igmp.c:1453 [inline] ip_mc_up+0x1c3/0x400 net/ipv4/igmp.c:1775 inetdev_event+0x1d03/0x1d80 net/ipv4/devinet.c:1522 notifier_call_chain kernel/notifier.c:93 [inline] __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x13d/0x240 kernel/notifier.c:401 __dev_notify_flags+0x3da/0x860 net/core/dev.c:1733 dev_change_flags+0x1ac/0x230 net/core/dev.c:7569 do_setlink+0x165f/0x5ea0 net/core/rtnetlink.c:2492 rtnl_newlink+0x2ad7/0x35a0 net/core/rtnetlink.c:3111 rtnetlink_rcv_msg+0x1148/0x1540 net/core/rtnetlink.c:4947 netlink_rcv_skb+0x394/0x640 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4965 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x1699/0x1740 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x13c7/0x1440 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe3b/0x1240 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x305/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 Bytes 36-37 of 105 are uninitialized Memory access of size 105 starts at ffff88819686c000 Data copied to user address 0000000020000380 Fixes: d83b06036048 ("net: add fdb generic dump routine") Signed-off-by: Eric Dumazet Cc: John Fastabend Cc: Ido Schimmel Cc: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c392a77ff788..925af6b43017 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3280,6 +3280,9 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb, { int err; + if (dev->type != ARPHRD_ETHER) + return -EINVAL; + netif_addr_lock_bh(dev); err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc); if (err) From 862b5ab951551cd14ee13c8ab0bde21b63b93bd6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 1 Dec 2018 01:36:59 +0800 Subject: [PATCH 0985/2608] sctp: kfree_rcu asoc [ Upstream commit fb6df5a6234c38a9c551559506a49a677ac6f07a ] In sctp_hash_transport/sctp_epaddr_lookup_transport, it dereferences a transport's asoc under rcu_read_lock while asoc is freed not after a grace period, which leads to a use-after-free panic. This patch fixes it by calling kfree_rcu to make asoc be freed after a grace period. Note that only the asoc's memory is delayed to free in the patch, it won't cause sk to linger longer. Thanks Neil and Marcelo to make this clear. Fixes: 7fda702f9315 ("sctp: use new rhlist interface on sctp transport rhashtable") Fixes: cd2b70875058 ("sctp: check duplicate node before inserting a new transport") Reported-by: syzbot+0b05d8aa7cb185107483@syzkaller.appspotmail.com Reported-by: syzbot+aad231d51b1923158444@syzkaller.appspotmail.com Suggested-by: Neil Horman Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sctp/structs.h | 2 ++ net/sctp/associola.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8e1e1dc490fd..94c775773f58 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1902,6 +1902,8 @@ struct sctp_association { __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; + + struct rcu_head rcu; }; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 4982b31fec8e..23fec3817e0c 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -432,7 +432,7 @@ static void sctp_association_destroy(struct sctp_association *asoc) WARN_ON(atomic_read(&asoc->rmem_alloc)); - kfree(asoc); + kfree_rcu(asoc, rcu); SCTP_DBG_OBJCNT_DEC(assoc); } From 5567e5fefaa904d051ce03a330183125f92d2d37 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Dec 2018 14:24:31 -0800 Subject: [PATCH 0986/2608] tcp: Do not underestimate rwnd_limited [ Upstream commit 41727549de3e7281feb174d568c6e46823db8684 ] If available rwnd is too small, tcp_tso_should_defer() can decide it is worth waiting before splitting a TSO packet. This really means we are rwnd limited. Fixes: 5615f88614a4 ("tcp: instrument how long TCP is limited by receive window") Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b2ead31afcba..12cd64382768 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2328,8 +2328,11 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } else { if (!push_one && tcp_tso_should_defer(sk, skb, &is_cwnd_limited, - max_segs)) + max_segs)) { + if (!is_cwnd_limited) + is_rwnd_limited = true; break; + } } limit = mss_now; From 6293016fbd9ba1b5ecc7d1afe67fb359d4c05512 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 5 Dec 2018 14:38:38 -0800 Subject: [PATCH 0987/2608] tcp: fix NULL ref in tail loss probe [ Upstream commit b2b7af861122a0c0f6260155c29a1b2e594cd5b5 ] TCP loss probe timer may fire when the retranmission queue is empty but has a non-zero tp->packets_out counter. tcp_send_loss_probe will call tcp_rearm_rto which triggers NULL pointer reference by fetching the retranmission queue head in its sub-routines. Add a more detailed warning to help catch the root cause of the inflight accounting inconsistency. Reported-by: Rafael Tinoco Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 12cd64382768..1b31b0a1c7fa 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2476,14 +2476,18 @@ void tcp_send_loss_probe(struct sock *sk) skb = tcp_write_queue_tail(sk); } + if (unlikely(!skb)) { + WARN_ONCE(tp->packets_out, + "invalid inflight: %u state %u cwnd %u mss %d\n", + tp->packets_out, sk->sk_state, tp->snd_cwnd, mss); + inet_csk(sk)->icsk_pending = 0; + return; + } + /* At most one outstanding TLP retransmission. */ if (tp->tlp_high_seq) goto rearm_timer; - /* Retransmit last segment. */ - if (WARN_ON(!skb)) - goto rearm_timer; - if (skb_still_in_host_queue(sk, skb)) goto rearm_timer; From 1d2cda4e8ffef837ff0f780633d536eb233a14d5 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 29 Nov 2018 14:45:39 +0100 Subject: [PATCH 0988/2608] tun: forbid iface creation with rtnl ops [ Upstream commit 35b827b6d06199841a83839e8bb69c0cd13a28be ] It's not supported right now (the goal of the initial patch was to support 'ip link del' only). Before the patch: $ ip link add foo type tun [ 239.632660] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [snip] [ 239.636410] RIP: 0010:register_netdevice+0x8e/0x3a0 This panic occurs because dev->netdev_ops is not set by tun_setup(). But to have something usable, it will require more than just setting netdev_ops. Fixes: f019a7a594d9 ("tun: Implement ip link del tunXXX") CC: Eric W. Biederman Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 0a008d136aae..2956bb6cda72 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1818,9 +1818,9 @@ static void tun_setup(struct net_device *dev) static int tun_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - if (!data) - return 0; - return -EINVAL; + NL_SET_ERR_MSG(extack, + "tun/tap creation via rtnetlink is not supported."); + return -EOPNOTSUPP; } static struct rtnl_link_ops tun_link_ops __read_mostly = { From 520e8609ecf23a3a3ac4ba88f084cb11d00f082f Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 29 Nov 2018 13:53:16 +0800 Subject: [PATCH 0989/2608] virtio-net: keep vnet header zeroed after processing XDP [ Upstream commit 436c9453a1ac0944b82870ef2e0d9be956b396d9 ] We copy vnet header unconditionally in page_to_skb() this is wrong since XDP may modify the packet data. So let's keep a zeroed vnet header for not confusing the conversion between vnet header and skb metadata. In the future, we should able to detect whether or not the packet was modified and keep using the vnet header when packet was not touched. Fixes: f600b6905015 ("virtio_net: Add XDP support") Reported-by: Pavel Popa Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0e8e3be50332..215696f21d67 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -309,7 +309,8 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, - unsigned int len, unsigned int truesize) + unsigned int len, unsigned int truesize, + bool hdr_valid) { struct sk_buff *skb; struct virtio_net_hdr_mrg_rxbuf *hdr; @@ -331,7 +332,8 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, else hdr_padded_len = sizeof(struct padded_vnet_hdr); - memcpy(hdr, p, hdr_len); + if (hdr_valid) + memcpy(hdr, p, hdr_len); len -= hdr_len; offset += hdr_padded_len; @@ -594,7 +596,8 @@ static struct sk_buff *receive_big(struct net_device *dev, unsigned int len) { struct page *page = buf; - struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); + struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, + PAGE_SIZE, true); if (unlikely(!skb)) goto err; @@ -678,7 +681,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, rcu_read_unlock(); put_page(page); head_skb = page_to_skb(vi, rq, xdp_page, - offset, len, PAGE_SIZE); + offset, len, + PAGE_SIZE, false); ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); return head_skb; } @@ -712,7 +716,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, goto err_skb; } - head_skb = page_to_skb(vi, rq, page, offset, len, truesize); + head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog); curr_skb = head_skb; if (unlikely(!curr_skb)) From 635e16fc7402e663c03f45e92df5aa6acdf079fc Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 17 Oct 2018 17:54:00 -0700 Subject: [PATCH 0990/2608] ARM: OMAP2+: prm44xx: Fix section annotation on omap44xx_prm_enable_io_wakeup [ Upstream commit eef3dc34a1e0b01d53328b88c25237bcc7323777 ] When building the kernel with Clang, the following section mismatch warning appears: WARNING: vmlinux.o(.text+0x38b3c): Section mismatch in reference from the function omap44xx_prm_late_init() to the function .init.text:omap44xx_prm_enable_io_wakeup() The function omap44xx_prm_late_init() references the function __init omap44xx_prm_enable_io_wakeup(). This is often because omap44xx_prm_late_init lacks a __init annotation or the annotation of omap44xx_prm_enable_io_wakeup is wrong. Remove the __init annotation from omap44xx_prm_enable_io_wakeup so there is no more mismatch. Signed-off-by: Nathan Chancellor Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/prm44xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c index 1c0c1663f078..5affa9f5300b 100644 --- a/arch/arm/mach-omap2/prm44xx.c +++ b/arch/arm/mach-omap2/prm44xx.c @@ -344,7 +344,7 @@ static void omap44xx_prm_reconfigure_io_chain(void) * to occur, WAKEUPENABLE bits must be set in the pad mux registers, and * omap44xx_prm_reconfigure_io_chain() must be called. No return value. */ -static void __init omap44xx_prm_enable_io_wakeup(void) +static void omap44xx_prm_enable_io_wakeup(void) { s32 inst = omap4_prmst_get_prm_dev_inst(); From b34888a95334876d971199f94a0f44e818b4a396 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 31 Oct 2018 00:48:12 +0000 Subject: [PATCH 0991/2608] ASoC: rsnd: fixup clock start checker [ Upstream commit 3ee9a76a8c5a10e1bfb04b81db767c6d562ddaf3 ] commit 4d230d12710646 ("ASoC: rsnd: fixup not to call clk_get/set under non-atomic") fixuped clock start timing. But it exchanged clock start checker from ssi->usrcnt to ssi->rate. Current rsnd_ssi_master_clk_start() is called from .prepare, but some player (for example GStreamer) might calls it many times. In such case, the checker might returns error even though it was not error. It should check ssi->usrcnt instead of ssi->rate. This patch fixup it. Without this patch, GStreamer can't switch 48kHz / 44.1kHz. Reported-by: Yusuke Goda Signed-off-by: Kuninori Morimoto Tested-by: Yusuke Goda Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sh/rcar/ssi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 34223c8c28a8..0db2791f7035 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -280,7 +280,7 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod, if (rsnd_ssi_is_multi_slave(mod, io)) return 0; - if (ssi->rate) { + if (ssi->usrcnt > 1) { if (ssi->rate != rate) { dev_err(dev, "SSI parent/child should use same rate\n"); return -EINVAL; From f55ad8d2a215da47b4f1a265a4b1b850ba7167a1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 17 Oct 2018 10:15:34 +0200 Subject: [PATCH 0992/2608] staging: rtl8723bs: Fix the return value in case of error in 'rtw_wx_read32()' [ Upstream commit c3e43d8b958bd6849817393483e805d8638a8ab7 ] We return 0 unconditionally in 'rtw_wx_read32()'. However, 'ret' is set to some error codes in several error handling paths. Return 'ret' instead to propagate the error code. Fixes: 554c0a3abf216 ("staging: Add rtl8723bs sdio wifi driver") Signed-off-by: Christophe JAILLET Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/rtl8723bs/os_dep/ioctl_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index d5e5f830f2a1..1b61da61690b 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -2383,7 +2383,7 @@ static int rtw_wx_read32(struct net_device *dev, exit: kfree(ptmp); - return 0; + return ret; } static int rtw_wx_write32(struct net_device *dev, From 5b2afd6261aeaf17da4953e07b731ca71bcabd17 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 28 Oct 2018 15:29:27 -0500 Subject: [PATCH 0993/2608] ARM: dts: logicpd-somlv: Fix interrupt on mmc3_dat1 [ Upstream commit 3d8b804bc528d3720ec0c39c212af92dafaf6e84 ] The interrupt on mmc3_dat1 is wrong which prevents this from appearing in /proc/interrupts. Fixes: ab8dd3aed011 ("ARM: DTS: Add minimal Support for Logic PD DM3730 SOM-LV") #Kernel 4.9+ Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/logicpd-som-lv.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index c335b923753a..a7883676f675 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -123,7 +123,7 @@ }; &mmc3 { - interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>; + interrupts-extended = <&intc 94 &omap3_pmx_core 0x136>; pinctrl-0 = <&mmc3_pins &wl127x_gpio>; pinctrl-names = "default"; vmmc-supply = <&wl12xx_vmmc>; From 21ffeda14e83665ccd350005e3d51fdb0793d248 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Wed, 7 Nov 2018 22:30:31 +0100 Subject: [PATCH 0994/2608] ARM: OMAP1: ams-delta: Fix possible use of uninitialized field [ Upstream commit cec83ff1241ec98113a19385ea9e9cfa9aa4125b ] While playing with initialization order of modem device, it has been discovered that under some circumstances (early console init, I believe) its .pm() callback may be called before the uart_port->private_data pointer is initialized from plat_serial8250_port->private_data, resulting in NULL pointer dereference. Fix it by checking for uninitialized pointer before using it in modem_pm(). Fixes: aabf31737a6a ("ARM: OMAP1: ams-delta: update the modem to use regulator API") Signed-off-by: Janusz Krzysztofik Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap1/board-ams-delta.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index 6cbc69c92913..4174fa86bfb1 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -512,6 +512,9 @@ static void modem_pm(struct uart_port *port, unsigned int state, unsigned old) struct modem_private_data *priv = port->private_data; int ret; + if (!priv) + return; + if (IS_ERR(priv->regulator)) return; From 2ac368e8d504d66c315483b16cb9a3750b4cfb15 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 10 Nov 2018 04:13:24 +0000 Subject: [PATCH 0995/2608] sysv: return 'err' instead of 0 in __sysv_write_inode [ Upstream commit c4b7d1ba7d263b74bb72e9325262a67139605cde ] Fixes gcc '-Wunused-but-set-variable' warning: fs/sysv/inode.c: In function '__sysv_write_inode': fs/sysv/inode.c:239:6: warning: variable 'err' set but not used [-Wunused-but-set-variable] __sysv_write_inode should return 'err' instead of 0 Fixes: 05459ca81ac3 ("repair sysv_write_inode(), switch sysv to simple_fsync()") Signed-off-by: YueHaibing Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/sysv/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 3c47b7d5d4cf..9e0874d1524c 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -275,7 +275,7 @@ static int __sysv_write_inode(struct inode *inode, int wait) } } brelse(bh); - return 0; + return err; } int sysv_write_inode(struct inode *inode, struct writeback_control *wbc) From cae5446b33f65162fe21fdb633a63619633e7ea1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 31 Oct 2018 18:26:21 +0100 Subject: [PATCH 0996/2608] selftests: add script to stress-test nft packet path vs. control plane [ Upstream commit 25d8bcedbf4329895dbaf9dd67baa6f18dad918c ] Start flood ping for each cpu while loading/flushing rulesets to make sure we do not access already-free'd rules from nf_tables evaluation loop. Also add this to TARGETS so 'make run_tests' in selftest dir runs it automatically. This would have caught the bug fixed in previous change ("netfilter: nf_tables: do not skip inactive chains during generation update") sooner. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/netfilter/Makefile | 6 ++ tools/testing/selftests/netfilter/config | 2 + .../selftests/netfilter/nft_trans_stress.sh | 78 +++++++++++++++++++ 4 files changed, 87 insertions(+) create mode 100644 tools/testing/selftests/netfilter/Makefile create mode 100644 tools/testing/selftests/netfilter/config create mode 100755 tools/testing/selftests/netfilter/nft_trans_stress.sh diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index ea300e7818a7..10b89f5b9af7 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -20,6 +20,7 @@ TARGETS += memory-hotplug TARGETS += mount TARGETS += mqueue TARGETS += net +TARGETS += netfilter TARGETS += nsfs TARGETS += powerpc TARGETS += pstore diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile new file mode 100644 index 000000000000..47ed6cef93fb --- /dev/null +++ b/tools/testing/selftests/netfilter/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for netfilter selftests + +TEST_PROGS := nft_trans_stress.sh + +include ../lib.mk diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config new file mode 100644 index 000000000000..1017313e41a8 --- /dev/null +++ b/tools/testing/selftests/netfilter/config @@ -0,0 +1,2 @@ +CONFIG_NET_NS=y +NF_TABLES_INET=y diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh new file mode 100755 index 000000000000..f1affd12c4b1 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# +# This test is for stress-testing the nf_tables config plane path vs. +# packet path processing: Make sure we never release rules that are +# still visible to other cpus. +# +# set -e + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +testns=testns1 +tables="foo bar baz quux" + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +tmp=$(mktemp) + +for table in $tables; do + echo add table inet "$table" >> "$tmp" + echo flush table inet "$table" >> "$tmp" + + echo "add chain inet $table INPUT { type filter hook input priority 0; }" >> "$tmp" + echo "add chain inet $table OUTPUT { type filter hook output priority 0; }" >> "$tmp" + for c in $(seq 1 400); do + chain=$(printf "chain%03u" "$c") + echo "add chain inet $table $chain" >> "$tmp" + done + + for c in $(seq 1 400); do + chain=$(printf "chain%03u" "$c") + for BASE in INPUT OUTPUT; do + echo "add rule inet $table $BASE counter jump $chain" >> "$tmp" + done + echo "add rule inet $table $chain counter return" >> "$tmp" + done +done + +ip netns add "$testns" +ip -netns "$testns" link set lo up + +lscpu | grep ^CPU\(s\): | ( read cpu cpunum ; +cpunum=$((cpunum-1)) +for i in $(seq 0 $cpunum);do + mask=$(printf 0x%x $((1<<$i))) + ip netns exec "$testns" taskset $mask ping -4 127.0.0.1 -fq > /dev/null & + ip netns exec "$testns" taskset $mask ping -6 ::1 -fq > /dev/null & +done) + +sleep 1 + +for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done + +for table in $tables;do + randsleep=$((RANDOM%10)) + sleep $randsleep + ip netns exec "$testns" nft delete table inet $table 2>/dev/null +done + +randsleep=$((RANDOM%10)) +sleep $randsleep + +pkill -9 ping + +wait + +rm -f "$tmp" +ip netns del "$testns" From 38e22947994de227bc1ac57ec82314c24a605605 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 12 Nov 2018 22:43:45 +0100 Subject: [PATCH 0997/2608] netfilter: nf_tables: fix use-after-free when deleting compat expressions [ Upstream commit 29e3880109e357fdc607b4393f8308cef6af9413 ] nft_compat ops do not have static storage duration, unlike all other expressions. When nf_tables_expr_destroy() returns, expr->ops might have been free'd already, so we need to store next address before calling expression destructor. For same reason, we can't deref match pointer after nft_xt_put(). This can be easily reproduced by adding msleep() before nft_match_destroy() returns. Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Reported-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 5 +++-- net/netfilter/nft_compat.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3ae365f92bff..ea1e57daf50e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2252,7 +2252,7 @@ err: static void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule) { - struct nft_expr *expr; + struct nft_expr *expr, *next; /* * Careful: some expressions might not be initialized in case this @@ -2260,8 +2260,9 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, */ expr = nft_expr_first(rule); while (expr != nft_expr_last(rule) && expr->ops) { + next = nft_expr_next(expr); nf_tables_expr_destroy(ctx, expr); - expr = nft_expr_next(expr); + expr = next; } kfree(rule); } diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 6da1cec1494a..7533c2fd6b76 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -497,6 +497,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr, void *info) { struct xt_match *match = expr->ops->data; + struct module *me = match->me; struct xt_mtdtor_param par; par.net = ctx->net; @@ -507,7 +508,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr, par.match->destroy(&par); if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) - module_put(match->me); + module_put(me); } static void From de2324a00979257c7fff8cf9f2bdc3023957fef8 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 9 Nov 2018 16:42:14 -0800 Subject: [PATCH 0998/2608] hwmon (ina2xx) Fix NULL id pointer in probe() [ Upstream commit 70df9ebbd82c794ddfbb49d45b337f18d5588dc2 ] When using DT configurations, the id pointer might turn out to be NULL. Then the driver encounters NULL pointer access: Unable to handle kernel read from unreadable memory at vaddr 00000018 [...] PC is at ina2xx_probe+0x114/0x200 LR is at ina2xx_probe+0x10c/0x200 [...] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b The reason is that i2c core returns the id pointer by matching id_table with client->name, while the client->name is actually using the name from the first string in the DT compatible list, not the best one. So i2c core would fail to match the id_table if the best matched compatible string isn't the first one, and then would return a NULL id pointer. This probably should be fixed in i2c core. But it doesn't hurt to make the driver robust. So this patch fixes it by using the "chip" that's added to unify both DT and non-DT configurations. Additionally, since id pointer could be null, so as id->name: ina2xx 10-0047: power monitor (null) (Rshunt = 1000 uOhm) ina2xx 10-0048: power monitor (null) (Rshunt = 10000 uOhm) So this patch also fixes NULL name pointer, using client->name to play safe and to align with hwmon->name. Fixes: bd0ddd4d0883 ("hwmon: (ina2xx) Add OF device ID table") Signed-off-by: Nicolin Chen Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/ina2xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index 71d3445ba869..c2252cf452f5 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -491,7 +491,7 @@ static int ina2xx_probe(struct i2c_client *client, } data->groups[group++] = &ina2xx_group; - if (id->driver_data == ina226) + if (chip == ina226) data->groups[group++] = &ina226_group; hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name, @@ -500,7 +500,7 @@ static int ina2xx_probe(struct i2c_client *client, return PTR_ERR(hwmon_dev); dev_info(dev, "power monitor %s (Rshunt = %li uOhm)\n", - id->name, data->rshunt); + client->name, data->rshunt); return 0; } From 5391e32b9d0692abb1996ba3c3fdf2e0c97b72e4 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 12 Nov 2018 13:36:38 +0000 Subject: [PATCH 0999/2608] ASoC: wm_adsp: Fix dma-unsafe read of scratch registers [ Upstream commit 20e00db2f59bdddf8a8e241473ef8be94631d3ae ] Stack memory isn't DMA-safe so it isn't safe to use either regmap_raw_read or regmap_bulk_read to read into stack memory. The two functions to read the scratch registers were using stack memory and regmap_raw_read. It's not worth allocating memory just for this trivial read, and it isn't time-critical. A simple regmap_read for each register is sufficient. Signed-off-by: Richard Fitzgerald Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm_adsp.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 989d093abda7..67330b6ab204 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -787,38 +787,41 @@ static unsigned int wm_adsp_region_to_reg(struct wm_adsp_region const *mem, static void wm_adsp2_show_fw_status(struct wm_adsp *dsp) { - u16 scratch[4]; + unsigned int scratch[4]; + unsigned int addr = dsp->base + ADSP2_SCRATCH0; + unsigned int i; int ret; - ret = regmap_raw_read(dsp->regmap, dsp->base + ADSP2_SCRATCH0, - scratch, sizeof(scratch)); - if (ret) { - adsp_err(dsp, "Failed to read SCRATCH regs: %d\n", ret); - return; + for (i = 0; i < ARRAY_SIZE(scratch); ++i) { + ret = regmap_read(dsp->regmap, addr + i, &scratch[i]); + if (ret) { + adsp_err(dsp, "Failed to read SCRATCH%u: %d\n", i, ret); + return; + } } adsp_dbg(dsp, "FW SCRATCH 0:0x%x 1:0x%x 2:0x%x 3:0x%x\n", - be16_to_cpu(scratch[0]), - be16_to_cpu(scratch[1]), - be16_to_cpu(scratch[2]), - be16_to_cpu(scratch[3])); + scratch[0], scratch[1], scratch[2], scratch[3]); } static void wm_adsp2v2_show_fw_status(struct wm_adsp *dsp) { - u32 scratch[2]; + unsigned int scratch[2]; int ret; - ret = regmap_raw_read(dsp->regmap, dsp->base + ADSP2V2_SCRATCH0_1, - scratch, sizeof(scratch)); - + ret = regmap_read(dsp->regmap, dsp->base + ADSP2V2_SCRATCH0_1, + &scratch[0]); if (ret) { - adsp_err(dsp, "Failed to read SCRATCH regs: %d\n", ret); + adsp_err(dsp, "Failed to read SCRATCH0_1: %d\n", ret); return; } - scratch[0] = be32_to_cpu(scratch[0]); - scratch[1] = be32_to_cpu(scratch[1]); + ret = regmap_read(dsp->regmap, dsp->base + ADSP2V2_SCRATCH2_3, + &scratch[1]); + if (ret) { + adsp_err(dsp, "Failed to read SCRATCH2_3: %d\n", ret); + return; + } adsp_dbg(dsp, "FW SCRATCH 0:0x%x 1:0x%x 2:0x%x 3:0x%x\n", scratch[0] & 0xFFFF, From 250dea0dfb12e2803d6de0105fae7177e4bd1b80 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 13 Nov 2018 15:38:22 +0000 Subject: [PATCH 1000/2608] s390/cpum_cf: Reject request for sampling in event initialization [ Upstream commit 613a41b0d16e617f46776a93b975a1eeea96417c ] On s390 command perf top fails [root@s35lp76 perf] # ./perf top -F100000 --stdio Error: cycles: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat' [root@s35lp76 perf] # Using event -e rb0000 works as designed. Event rb0000 is the event number of the sampling facility for basic sampling. During system start up the following PMUs are installed in the kernel's PMU list (from head to tail): cpum_cf --> s390 PMU counter facility device driver cpum_sf --> s390 PMU sampling facility device driver uprobe kprobe tracepoint task_clock cpu_clock Perf top executes following functions and calls perf_event_open(2) system call with different parameters many times: cmd_top --> __cmd_top --> perf_evlist__add_default --> __perf_evlist__add_default --> perf_evlist__new_cycles (creates event type:0 (HW) config 0 (CPU_CYCLES) --> perf_event_attr__set_max_precise_ip Uses perf_event_open(2) to detect correct precise_ip level. Fails 3 times on s390 which is ok. Then functions cmd_top --> __cmd_top --> perf_top__start_counters -->perf_evlist__config --> perf_can_comm_exec --> perf_probe_api This functions test support for the following events: "cycles:u", "instructions:u", "cpu-clock:u" using --> perf_do_probe_api --> perf_event_open_cloexec Test the close on exec flag support with perf_event_open(2). perf_do_probe_api returns true if the event is supported. The function returns true because event cpu-clock is supported by the PMU cpu_clock. This is achieved by many calls to perf_event_open(2). Function perf_top__start_counters now calls perf_evsel__open() for every event, which is the default event cpu_cycles (config:0) and type HARDWARE (type:0) which a predfined frequence of 4000. Given the above order of the PMU list, the PMU cpum_cf gets called first and returns 0, which indicates support for this sampling. The event is fully allocated in the function perf_event_open (file kernel/event/core.c near line 10521 and the following check fails: event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL, NULL, cgroup_fd); if (IS_ERR(event)) { err = PTR_ERR(event); goto err_cred; } if (is_sampling_event(event)) { if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { err = -EOPNOTSUPP; goto err_alloc; } } The check for the interrupt capabilities fails and the system call perf_event_open() returns -EOPNOTSUPP (-95). Add a check to return -ENODEV when sampling is requested in PMU cpum_cf. This allows common kernel code in the perf_event_open() system call to test the next PMU in above list. Fixes: 97b1198fece0 (" "s390, perf: Use common PMU interrupt disabled code") Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- arch/s390/kernel/perf_cpum_cf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 61e91fee8467..edf6a61f0a64 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -349,6 +349,8 @@ static int __hw_perf_event_init(struct perf_event *event) break; case PERF_TYPE_HARDWARE: + if (is_sampling_event(event)) /* No sampling support */ + return -ENOENT; ev = attr->config; /* Count user space (problem-state) only */ if (!attr->exclude_user && attr->exclude_kernel) { From fd2c1ba142277bfd6f3488e6105c9eb643246ddc Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 13 Nov 2018 19:48:54 -0800 Subject: [PATCH 1001/2608] hwmon: (ina2xx) Fix current value calculation [ Upstream commit 38cd989ee38c16388cde89db5b734f9d55b905f9 ] The current register (04h) has a sign bit at MSB. The comments for this calculation also mention that it's a signed register. However, the regval is unsigned type so result of calculation turns out to be an incorrect value when current is negative. This patch simply fixes this by adding a casting to s16. Fixes: 5d389b125186c ("hwmon: (ina2xx) Make calibration register value fixed") Signed-off-by: Nicolin Chen Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/ina2xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index c2252cf452f5..07ee19573b3f 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -274,7 +274,7 @@ static int ina2xx_get_value(struct ina2xx_data *data, u8 reg, break; case INA2XX_CURRENT: /* signed register, result in mA */ - val = regval * data->current_lsb_uA; + val = (s16)regval * data->current_lsb_uA; val = DIV_ROUND_CLOSEST(val, 1000); break; case INA2XX_CALIBRATION: From b65b4228bdaa8e9d67d10a9206f84df9a20c292d Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Nov 2018 14:58:20 +0200 Subject: [PATCH 1002/2608] ASoC: omap-abe-twl6040: Fix missing audio card caused by deferred probing [ Upstream commit 76836fd354922ebe4798a64fda01f8dc6a8b0984 ] The machine driver fails to probe in next-20181113 with: [ 2.539093] omap-abe-twl6040 sound: ASoC: CODEC DAI twl6040-legacy not registered [ 2.546630] omap-abe-twl6040 sound: devm_snd_soc_register_card() failed: -517 ... [ 3.693206] omap-abe-twl6040 sound: ASoC: Both platform name/of_node are set for TWL6040 [ 3.701446] omap-abe-twl6040 sound: ASoC: failed to init link TWL6040 [ 3.708007] omap-abe-twl6040 sound: devm_snd_soc_register_card() failed: -22 [ 3.715148] omap-abe-twl6040: probe of sound failed with error -22 Bisect pointed to a merge commit: first bad commit: [0f688ab20a540aafa984c5dbd68a71debebf4d7f] Merge remote-tracking branch 'net-next/master' and a diff between a working kernel does not reveal anything which would explain the change in behavior. Further investigation showed that on the second try of loading fails because the dai_link->platform is no longer NULL and it might be pointing to uninitialized memory. The fix is to move the snd_soc_dai_link and snd_soc_card inside of the abe_twl6040 struct, which is dynamically allocated every time the driver probes. Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/omap/omap-abe-twl6040.c | 67 +++++++++++++------------------ 1 file changed, 29 insertions(+), 38 deletions(-) diff --git a/sound/soc/omap/omap-abe-twl6040.c b/sound/soc/omap/omap-abe-twl6040.c index 614b18d2f631..6fd143799534 100644 --- a/sound/soc/omap/omap-abe-twl6040.c +++ b/sound/soc/omap/omap-abe-twl6040.c @@ -36,6 +36,8 @@ #include "../codecs/twl6040.h" struct abe_twl6040 { + struct snd_soc_card card; + struct snd_soc_dai_link dai_links[2]; int jack_detection; /* board can detect jack events */ int mclk_freq; /* MCLK frequency speed for twl6040 */ }; @@ -208,40 +210,10 @@ static int omap_abe_dmic_init(struct snd_soc_pcm_runtime *rtd) ARRAY_SIZE(dmic_audio_map)); } -/* Digital audio interface glue - connects codec <--> CPU */ -static struct snd_soc_dai_link abe_twl6040_dai_links[] = { - { - .name = "TWL6040", - .stream_name = "TWL6040", - .codec_dai_name = "twl6040-legacy", - .codec_name = "twl6040-codec", - .init = omap_abe_twl6040_init, - .ops = &omap_abe_ops, - }, - { - .name = "DMIC", - .stream_name = "DMIC Capture", - .codec_dai_name = "dmic-hifi", - .codec_name = "dmic-codec", - .init = omap_abe_dmic_init, - .ops = &omap_abe_dmic_ops, - }, -}; - -/* Audio machine driver */ -static struct snd_soc_card omap_abe_card = { - .owner = THIS_MODULE, - - .dapm_widgets = twl6040_dapm_widgets, - .num_dapm_widgets = ARRAY_SIZE(twl6040_dapm_widgets), - .dapm_routes = audio_map, - .num_dapm_routes = ARRAY_SIZE(audio_map), -}; - static int omap_abe_probe(struct platform_device *pdev) { struct device_node *node = pdev->dev.of_node; - struct snd_soc_card *card = &omap_abe_card; + struct snd_soc_card *card; struct device_node *dai_node; struct abe_twl6040 *priv; int num_links = 0; @@ -252,12 +224,18 @@ static int omap_abe_probe(struct platform_device *pdev) return -ENODEV; } - card->dev = &pdev->dev; - priv = devm_kzalloc(&pdev->dev, sizeof(struct abe_twl6040), GFP_KERNEL); if (priv == NULL) return -ENOMEM; + card = &priv->card; + card->dev = &pdev->dev; + card->owner = THIS_MODULE; + card->dapm_widgets = twl6040_dapm_widgets; + card->num_dapm_widgets = ARRAY_SIZE(twl6040_dapm_widgets); + card->dapm_routes = audio_map; + card->num_dapm_routes = ARRAY_SIZE(audio_map); + if (snd_soc_of_parse_card_name(card, "ti,model")) { dev_err(&pdev->dev, "Card name is not provided\n"); return -ENODEV; @@ -274,14 +252,27 @@ static int omap_abe_probe(struct platform_device *pdev) dev_err(&pdev->dev, "McPDM node is not provided\n"); return -EINVAL; } - abe_twl6040_dai_links[0].cpu_of_node = dai_node; - abe_twl6040_dai_links[0].platform_of_node = dai_node; + + priv->dai_links[0].name = "DMIC"; + priv->dai_links[0].stream_name = "TWL6040"; + priv->dai_links[0].cpu_of_node = dai_node; + priv->dai_links[0].platform_of_node = dai_node; + priv->dai_links[0].codec_dai_name = "twl6040-legacy"; + priv->dai_links[0].codec_name = "twl6040-codec"; + priv->dai_links[0].init = omap_abe_twl6040_init; + priv->dai_links[0].ops = &omap_abe_ops; dai_node = of_parse_phandle(node, "ti,dmic", 0); if (dai_node) { num_links = 2; - abe_twl6040_dai_links[1].cpu_of_node = dai_node; - abe_twl6040_dai_links[1].platform_of_node = dai_node; + priv->dai_links[1].name = "TWL6040"; + priv->dai_links[1].stream_name = "DMIC Capture"; + priv->dai_links[1].cpu_of_node = dai_node; + priv->dai_links[1].platform_of_node = dai_node; + priv->dai_links[1].codec_dai_name = "dmic-hifi"; + priv->dai_links[1].codec_name = "dmic-codec"; + priv->dai_links[1].init = omap_abe_dmic_init; + priv->dai_links[1].ops = &omap_abe_dmic_ops; } else { num_links = 1; } @@ -300,7 +291,7 @@ static int omap_abe_probe(struct platform_device *pdev) return -ENODEV; } - card->dai_link = abe_twl6040_dai_links; + card->dai_link = priv->dai_links; card->num_links = num_links; snd_soc_card_set_drvdata(card, priv); From 55b648d1d624b9beced6384c8e2895b35dc69878 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Wed, 14 Nov 2018 17:06:13 +0800 Subject: [PATCH 1003/2608] ASoC: dapm: Recalculate audio map forcely when card instantiated [ Upstream commit 882eab6c28d23a970ae73b7eb831b169a672d456 ] Audio map are possible in wrong state before card->instantiated has been set to true. Imaging the following examples: time 1: at the beginning in:-1 in:-1 in:-1 in:-1 out:-1 out:-1 out:-1 out:-1 SIGGEN A B Spk time 2: after someone called snd_soc_dapm_new_widgets() (e.g. create_fill_widget_route_map() in sound/soc/codecs/hdac_hdmi.c) in:1 in:0 in:0 in:0 out:0 out:0 out:0 out:1 SIGGEN A B Spk time 3: routes added in:1 in:0 in:0 in:0 out:0 out:0 out:0 out:1 SIGGEN -----> A -----> B ---> Spk In the end, the path should be powered on but it did not. At time 3, "in" of SIGGEN and "out" of Spk did not propagate to their neighbors because snd_soc_dapm_add_path() will not invalidate the paths if the card has not instantiated (i.e. card->instantiated is false). To correct the state of audio map, recalculate the whole map forcely. Signed-off-by: Tzung-Bi Shih Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index fee4b0ef5566..42c2a3065b77 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2307,6 +2307,7 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card) } card->instantiated = 1; + dapm_mark_endpoints_dirty(card); snd_soc_dapm_sync(&card->dapm); mutex_unlock(&card->mutex); mutex_unlock(&client_mutex); From aa17c9c00ce006de2dd7b0eabed9e45b48d3d044 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 31 Oct 2018 15:20:05 +0100 Subject: [PATCH 1004/2608] iio/hid-sensors: Fix IIO_CHAN_INFO_RAW returning wrong values for signed numbers [ Upstream commit 0145b50566e7de5637e80ecba96c7f0e6fff1aad ] Before this commit sensor_hub_input_attr_get_raw_value() failed to take the signedness of 16 and 8 bit values into account, returning e.g. 65436 instead of -100 for the z-axis reading of an accelerometer. This commit adds a new is_signed parameter to the function and makes all callers pass the appropriate value for this. While at it, this commit also fixes up some neighboring lines where statements were needlessly split over 2 lines to improve readability. Signed-off-by: Hans de Goede Acked-by: Srinivas Pandruvada Acked-by: Benjamin Tissoires Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/hid/hid-sensor-custom.c | 2 +- drivers/hid/hid-sensor-hub.c | 13 ++++++++++--- drivers/iio/accel/hid-sensor-accel-3d.c | 5 ++++- drivers/iio/gyro/hid-sensor-gyro-3d.c | 5 ++++- drivers/iio/humidity/hid-sensor-humidity.c | 3 ++- drivers/iio/light/hid-sensor-als.c | 8 +++++--- drivers/iio/light/hid-sensor-prox.c | 8 +++++--- drivers/iio/magnetometer/hid-sensor-magn-3d.c | 8 +++++--- drivers/iio/orientation/hid-sensor-incl-3d.c | 8 +++++--- drivers/iio/pressure/hid-sensor-press.c | 8 +++++--- drivers/iio/temperature/hid-sensor-temperature.c | 3 ++- drivers/rtc/rtc-hid-sensor-time.c | 2 +- include/linux/hid-sensor-hub.h | 4 +++- 13 files changed, 52 insertions(+), 25 deletions(-) diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c index 0bcf041368c7..574126b649e9 100644 --- a/drivers/hid/hid-sensor-custom.c +++ b/drivers/hid/hid-sensor-custom.c @@ -358,7 +358,7 @@ static ssize_t show_value(struct device *dev, struct device_attribute *attr, sensor_inst->hsdev, sensor_inst->hsdev->usage, usage, report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, false); } else if (!strncmp(name, "units", strlen("units"))) value = sensor_inst->fields[field_index].attribute.units; else if (!strncmp(name, "unit-expo", strlen("unit-expo"))) diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index faba542d1b07..b5bd5cb7d532 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -299,7 +299,8 @@ EXPORT_SYMBOL_GPL(sensor_hub_get_feature); int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, u32 usage_id, u32 attr_usage_id, u32 report_id, - enum sensor_hub_read_flags flag) + enum sensor_hub_read_flags flag, + bool is_signed) { struct sensor_hub_data *data = hid_get_drvdata(hsdev->hdev); unsigned long flags; @@ -331,10 +332,16 @@ int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, &hsdev->pending.ready, HZ*5); switch (hsdev->pending.raw_size) { case 1: - ret_val = *(u8 *)hsdev->pending.raw_data; + if (is_signed) + ret_val = *(s8 *)hsdev->pending.raw_data; + else + ret_val = *(u8 *)hsdev->pending.raw_data; break; case 2: - ret_val = *(u16 *)hsdev->pending.raw_data; + if (is_signed) + ret_val = *(s16 *)hsdev->pending.raw_data; + else + ret_val = *(u16 *)hsdev->pending.raw_data; break; case 4: ret_val = *(u32 *)hsdev->pending.raw_data; diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c index 2238a26aba63..f573d9c61fc3 100644 --- a/drivers/iio/accel/hid-sensor-accel-3d.c +++ b/drivers/iio/accel/hid-sensor-accel-3d.c @@ -149,6 +149,7 @@ static int accel_3d_read_raw(struct iio_dev *indio_dev, int report_id = -1; u32 address; int ret_type; + s32 min; struct hid_sensor_hub_device *hsdev = accel_state->common_attributes.hsdev; @@ -158,12 +159,14 @@ static int accel_3d_read_raw(struct iio_dev *indio_dev, case 0: hid_sensor_power_state(&accel_state->common_attributes, true); report_id = accel_state->accel[chan->scan_index].report_id; + min = accel_state->accel[chan->scan_index].logical_minimum; address = accel_3d_addresses[chan->scan_index]; if (report_id >= 0) *val = sensor_hub_input_attr_get_raw_value( accel_state->common_attributes.hsdev, hsdev->usage, address, report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + min < 0); else { *val = 0; hid_sensor_power_state(&accel_state->common_attributes, diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c index c67ce2ac4715..d9192eb41131 100644 --- a/drivers/iio/gyro/hid-sensor-gyro-3d.c +++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c @@ -111,6 +111,7 @@ static int gyro_3d_read_raw(struct iio_dev *indio_dev, int report_id = -1; u32 address; int ret_type; + s32 min; *val = 0; *val2 = 0; @@ -118,13 +119,15 @@ static int gyro_3d_read_raw(struct iio_dev *indio_dev, case 0: hid_sensor_power_state(&gyro_state->common_attributes, true); report_id = gyro_state->gyro[chan->scan_index].report_id; + min = gyro_state->gyro[chan->scan_index].logical_minimum; address = gyro_3d_addresses[chan->scan_index]; if (report_id >= 0) *val = sensor_hub_input_attr_get_raw_value( gyro_state->common_attributes.hsdev, HID_USAGE_SENSOR_GYRO_3D, address, report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + min < 0); else { *val = 0; hid_sensor_power_state(&gyro_state->common_attributes, diff --git a/drivers/iio/humidity/hid-sensor-humidity.c b/drivers/iio/humidity/hid-sensor-humidity.c index 6e09c1acfe51..e53914d51ec3 100644 --- a/drivers/iio/humidity/hid-sensor-humidity.c +++ b/drivers/iio/humidity/hid-sensor-humidity.c @@ -75,7 +75,8 @@ static int humidity_read_raw(struct iio_dev *indio_dev, HID_USAGE_SENSOR_HUMIDITY, HID_USAGE_SENSOR_ATMOSPHERIC_HUMIDITY, humid_st->humidity_attr.report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + humid_st->humidity_attr.logical_minimum < 0); hid_sensor_power_state(&humid_st->common_attributes, false); return IIO_VAL_INT; diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c index 059d964772c7..95ca86f50434 100644 --- a/drivers/iio/light/hid-sensor-als.c +++ b/drivers/iio/light/hid-sensor-als.c @@ -93,6 +93,7 @@ static int als_read_raw(struct iio_dev *indio_dev, int report_id = -1; u32 address; int ret_type; + s32 min; *val = 0; *val2 = 0; @@ -102,8 +103,8 @@ static int als_read_raw(struct iio_dev *indio_dev, case CHANNEL_SCAN_INDEX_INTENSITY: case CHANNEL_SCAN_INDEX_ILLUM: report_id = als_state->als_illum.report_id; - address = - HID_USAGE_SENSOR_LIGHT_ILLUM; + min = als_state->als_illum.logical_minimum; + address = HID_USAGE_SENSOR_LIGHT_ILLUM; break; default: report_id = -1; @@ -116,7 +117,8 @@ static int als_read_raw(struct iio_dev *indio_dev, als_state->common_attributes.hsdev, HID_USAGE_SENSOR_ALS, address, report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + min < 0); hid_sensor_power_state(&als_state->common_attributes, false); } else { diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c index 73fced8a63b7..8c017abc4ee2 100644 --- a/drivers/iio/light/hid-sensor-prox.c +++ b/drivers/iio/light/hid-sensor-prox.c @@ -73,6 +73,7 @@ static int prox_read_raw(struct iio_dev *indio_dev, int report_id = -1; u32 address; int ret_type; + s32 min; *val = 0; *val2 = 0; @@ -81,8 +82,8 @@ static int prox_read_raw(struct iio_dev *indio_dev, switch (chan->scan_index) { case CHANNEL_SCAN_INDEX_PRESENCE: report_id = prox_state->prox_attr.report_id; - address = - HID_USAGE_SENSOR_HUMAN_PRESENCE; + min = prox_state->prox_attr.logical_minimum; + address = HID_USAGE_SENSOR_HUMAN_PRESENCE; break; default: report_id = -1; @@ -95,7 +96,8 @@ static int prox_read_raw(struct iio_dev *indio_dev, prox_state->common_attributes.hsdev, HID_USAGE_SENSOR_PROX, address, report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + min < 0); hid_sensor_power_state(&prox_state->common_attributes, false); } else { diff --git a/drivers/iio/magnetometer/hid-sensor-magn-3d.c b/drivers/iio/magnetometer/hid-sensor-magn-3d.c index 0e791b02ed4a..b495107bd173 100644 --- a/drivers/iio/magnetometer/hid-sensor-magn-3d.c +++ b/drivers/iio/magnetometer/hid-sensor-magn-3d.c @@ -163,21 +163,23 @@ static int magn_3d_read_raw(struct iio_dev *indio_dev, int report_id = -1; u32 address; int ret_type; + s32 min; *val = 0; *val2 = 0; switch (mask) { case 0: hid_sensor_power_state(&magn_state->magn_flux_attributes, true); - report_id = - magn_state->magn[chan->address].report_id; + report_id = magn_state->magn[chan->address].report_id; + min = magn_state->magn[chan->address].logical_minimum; address = magn_3d_addresses[chan->address]; if (report_id >= 0) *val = sensor_hub_input_attr_get_raw_value( magn_state->magn_flux_attributes.hsdev, HID_USAGE_SENSOR_COMPASS_3D, address, report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + min < 0); else { *val = 0; hid_sensor_power_state( diff --git a/drivers/iio/orientation/hid-sensor-incl-3d.c b/drivers/iio/orientation/hid-sensor-incl-3d.c index fd1b3696ee42..16c744bef021 100644 --- a/drivers/iio/orientation/hid-sensor-incl-3d.c +++ b/drivers/iio/orientation/hid-sensor-incl-3d.c @@ -111,21 +111,23 @@ static int incl_3d_read_raw(struct iio_dev *indio_dev, int report_id = -1; u32 address; int ret_type; + s32 min; *val = 0; *val2 = 0; switch (mask) { case IIO_CHAN_INFO_RAW: hid_sensor_power_state(&incl_state->common_attributes, true); - report_id = - incl_state->incl[chan->scan_index].report_id; + report_id = incl_state->incl[chan->scan_index].report_id; + min = incl_state->incl[chan->scan_index].logical_minimum; address = incl_3d_addresses[chan->scan_index]; if (report_id >= 0) *val = sensor_hub_input_attr_get_raw_value( incl_state->common_attributes.hsdev, HID_USAGE_SENSOR_INCLINOMETER_3D, address, report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + min < 0); else { hid_sensor_power_state(&incl_state->common_attributes, false); diff --git a/drivers/iio/pressure/hid-sensor-press.c b/drivers/iio/pressure/hid-sensor-press.c index 6848d8c80eff..1c49ef78f888 100644 --- a/drivers/iio/pressure/hid-sensor-press.c +++ b/drivers/iio/pressure/hid-sensor-press.c @@ -77,6 +77,7 @@ static int press_read_raw(struct iio_dev *indio_dev, int report_id = -1; u32 address; int ret_type; + s32 min; *val = 0; *val2 = 0; @@ -85,8 +86,8 @@ static int press_read_raw(struct iio_dev *indio_dev, switch (chan->scan_index) { case CHANNEL_SCAN_INDEX_PRESSURE: report_id = press_state->press_attr.report_id; - address = - HID_USAGE_SENSOR_ATMOSPHERIC_PRESSURE; + min = press_state->press_attr.logical_minimum; + address = HID_USAGE_SENSOR_ATMOSPHERIC_PRESSURE; break; default: report_id = -1; @@ -99,7 +100,8 @@ static int press_read_raw(struct iio_dev *indio_dev, press_state->common_attributes.hsdev, HID_USAGE_SENSOR_PRESSURE, address, report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + min < 0); hid_sensor_power_state(&press_state->common_attributes, false); } else { diff --git a/drivers/iio/temperature/hid-sensor-temperature.c b/drivers/iio/temperature/hid-sensor-temperature.c index c01efeca4002..6ed5cd5742f1 100644 --- a/drivers/iio/temperature/hid-sensor-temperature.c +++ b/drivers/iio/temperature/hid-sensor-temperature.c @@ -76,7 +76,8 @@ static int temperature_read_raw(struct iio_dev *indio_dev, HID_USAGE_SENSOR_TEMPERATURE, HID_USAGE_SENSOR_DATA_ENVIRONMENTAL_TEMPERATURE, temp_st->temperature_attr.report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + temp_st->temperature_attr.logical_minimum < 0); hid_sensor_power_state( &temp_st->common_attributes, false); diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c index 2751dba850c6..3e1abb455472 100644 --- a/drivers/rtc/rtc-hid-sensor-time.c +++ b/drivers/rtc/rtc-hid-sensor-time.c @@ -213,7 +213,7 @@ static int hid_rtc_read_time(struct device *dev, struct rtc_time *tm) /* get a report with all values through requesting one value */ sensor_hub_input_attr_get_raw_value(time_state->common_attributes.hsdev, HID_USAGE_SENSOR_TIME, hid_time_addresses[0], - time_state->info[0].report_id, SENSOR_HUB_SYNC); + time_state->info[0].report_id, SENSOR_HUB_SYNC, false); /* wait for all values (event) */ ret = wait_for_completion_killable_timeout( &time_state->comp_last_time, HZ*6); diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index fc7aae64dcde..000de6da3b1b 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -177,6 +177,7 @@ int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev, * @attr_usage_id: Attribute usage id as per spec * @report_id: Report id to look for * @flag: Synchronous or asynchronous read +* @is_signed: If true then fields < 32 bits will be sign-extended * * Issues a synchronous or asynchronous read request for an input attribute. * Returns data upto 32 bits. @@ -190,7 +191,8 @@ enum sensor_hub_read_flags { int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, u32 usage_id, u32 attr_usage_id, u32 report_id, - enum sensor_hub_read_flags flag + enum sensor_hub_read_flags flag, + bool is_signed ); /** From f500a4ef7e5d7feddee380edfb1a1d91fc8fa627 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 16 Nov 2018 21:32:35 +0900 Subject: [PATCH 1005/2608] netfilter: xt_hashlimit: fix a possible memory leak in htable_create() [ Upstream commit b4e955e9f372035361fbc6f07b21fe2cc6a5be4a ] In the htable_create(), hinfo is allocated by vmalloc() So that if error occurred, hinfo should be freed. Fixes: 11d5f15723c9 ("netfilter: xt_hashlimit: Create revision 2 to support higher pps rates") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/xt_hashlimit.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 0c034597b9b8..fe8e8a1622b5 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -295,9 +295,10 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, /* copy match config into hashtable config */ ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3); - - if (ret) + if (ret) { + vfree(hinfo); return ret; + } hinfo->cfg.size = size; if (hinfo->cfg.max == 0) @@ -814,7 +815,6 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) int ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 1); - if (ret) return ret; @@ -830,7 +830,6 @@ hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par) int ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 2); - if (ret) return ret; @@ -920,7 +919,6 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) return ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 1); - if (ret) return ret; @@ -939,7 +937,6 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par) return ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 2); - if (ret) return ret; From ca0bcef7c60ba2cb12ca99e706adc3f024a4f8a7 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 15 Nov 2018 10:44:57 +0800 Subject: [PATCH 1006/2608] hwmon: (w83795) temp4_type has writable permission [ Upstream commit 09aaf6813cfca4c18034fda7a43e68763f34abb1 ] Both datasheet and comments of store_temp_mode() tell us that temp1~4_type is writable, so fix it. Signed-off-by: Yao Wang Signed-off-by: Huacai Chen Fixes: 39deb6993e7c (" hwmon: (w83795) Simplify temperature sensor type handling") Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/w83795.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/w83795.c b/drivers/hwmon/w83795.c index 49276bbdac3d..1bb80f992aa8 100644 --- a/drivers/hwmon/w83795.c +++ b/drivers/hwmon/w83795.c @@ -1691,7 +1691,7 @@ store_sf_setup(struct device *dev, struct device_attribute *attr, * somewhere else in the code */ #define SENSOR_ATTR_TEMP(index) { \ - SENSOR_ATTR_2(temp##index##_type, S_IRUGO | (index < 4 ? S_IWUSR : 0), \ + SENSOR_ATTR_2(temp##index##_type, S_IRUGO | (index < 5 ? S_IWUSR : 0), \ show_temp_mode, store_temp_mode, NOT_USED, index - 1), \ SENSOR_ATTR_2(temp##index##_input, S_IRUGO, show_temp, \ NULL, TEMP_READ, index - 1), \ From d36cc6073623904cfc937c649370b56bc0e7dd88 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 1 Nov 2018 18:00:01 +0100 Subject: [PATCH 1007/2608] perf tools: Restore proper cwd on return from mnt namespace [ Upstream commit b01c1f69c8660eaeab7d365cd570103c5c073a02 ] When reporting on 'record' server we try to retrieve/use the mnt namespace of the profiled tasks. We use following API with cookie to hold the return namespace, roughly: nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc) setns(newns, 0); ... new ns related open.. ... nsinfo__mountns_exit(struct nscookie *nc) setns(nc->oldns) Once finished we setns to old namespace, which also sets the current working directory (cwd) to "/", trashing the cwd we had. This is mostly fine, because we use absolute paths almost everywhere, but it screws up 'perf diff': # perf diff failed to open perf.data: No such file or directory (try 'perf record' first) ... Adding the current working directory to be part of the cookie and restoring it in the nsinfo__mountns_exit call. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Krister Johansen Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: 843ff37bb59e ("perf symbols: Find symbols in different mount namespace") Link: http://lkml.kernel.org/r/20181101170001.30019-1-jolsa@kernel.org [ No need to check for NULL args for free(), use zfree() for struct members ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/namespaces.c | 17 +++++++++++++++-- tools/perf/util/namespaces.h | 1 + 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index 1ef0049860a8..eadc7ddacbf6 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -17,6 +17,7 @@ #include #include #include +#include struct namespaces *namespaces__new(struct namespaces_event *event) { @@ -185,6 +186,7 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, char curpath[PATH_MAX]; int oldns = -1; int newns = -1; + char *oldcwd = NULL; if (nc == NULL) return; @@ -198,9 +200,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX) return; + oldcwd = get_current_dir_name(); + if (!oldcwd) + return; + oldns = open(curpath, O_RDONLY); if (oldns < 0) - return; + goto errout; newns = open(nsi->mntns_path, O_RDONLY); if (newns < 0) @@ -209,11 +215,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, if (setns(newns, CLONE_NEWNS) < 0) goto errout; + nc->oldcwd = oldcwd; nc->oldns = oldns; nc->newns = newns; return; errout: + free(oldcwd); if (oldns > -1) close(oldns); if (newns > -1) @@ -222,11 +230,16 @@ errout: void nsinfo__mountns_exit(struct nscookie *nc) { - if (nc == NULL || nc->oldns == -1 || nc->newns == -1) + if (nc == NULL || nc->oldns == -1 || nc->newns == -1 || !nc->oldcwd) return; setns(nc->oldns, CLONE_NEWNS); + if (nc->oldcwd) { + WARN_ON_ONCE(chdir(nc->oldcwd)); + zfree(&nc->oldcwd); + } + if (nc->oldns > -1) { close(nc->oldns); nc->oldns = -1; diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index 05d82601c9a6..23584a6dd048 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -36,6 +36,7 @@ struct nsinfo { struct nscookie { int oldns; int newns; + char *oldcwd; }; int nsinfo__init(struct nsinfo *nsi); From 7d14117bd218a81f9f174af94b7c4f26aca16ad3 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Mon, 5 Nov 2018 18:11:36 +0000 Subject: [PATCH 1008/2608] PCI: imx6: Fix link training status detection in link up check [ Upstream commit 68bc10bf992180f269816ff3d22eb30383138577 ] This bug was introduced in the interaction for two commits on either branch of the merge commit 562df5c8521e ("Merge branch 'pci/host-designware' into next"). Commit 4d107d3b5a68 ("PCI: imx6: Move link up check into imx6_pcie_wait_for_link()"), changed imx6_pcie_wait_for_link() to poll the link status register directly, checking for link up and not training, and made imx6_pcie_link_up() only check the link up bit (once, not a polling loop). While commit 886bc5ceb5cc ("PCI: designware: Add generic dw_pcie_wait_for_link()"), replaced the loop in imx6_pcie_wait_for_link() with a call to a new dwc core function, which polled imx6_pcie_link_up(), which still checked both link up and not training in a loop. When these two commits were merged, the version of imx6_pcie_wait_for_link() from 886bc5ceb5cc was kept, which eliminated the link training check placed there by 4d107d3b5a68. However, the version of imx6_pcie_link_up() from 4d107d3b5a68 was kept, which eliminated the link training check that had been there and was moved to imx6_pcie_wait_for_link(). The result was the link training check got lost for the imx6 driver. Eliminate imx6_pcie_link_up() so that the default handler, dw_pcie_link_up(), is used instead. The default handler has the correct code, which checks for link up and also that it still is not training, fixing the regression. Fixes: 562df5c8521e ("Merge branch 'pci/host-designware' into next") Signed-off-by: Trent Piepho [lorenzo.pieralisi@arm.com: rewrote the commit log] Signed-off-by: Lorenzo Pieralisi Reviewed-by: Lucas Stach Cc: Bjorn Helgaas Cc: Joao Pinto Cc: Lorenzo Pieralisi Cc: Richard Zhu Signed-off-by: Sasha Levin --- drivers/pci/dwc/pci-imx6.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/pci/dwc/pci-imx6.c b/drivers/pci/dwc/pci-imx6.c index b73483534a5b..1f1069b70e45 100644 --- a/drivers/pci/dwc/pci-imx6.c +++ b/drivers/pci/dwc/pci-imx6.c @@ -83,8 +83,6 @@ struct imx6_pcie { #define PCIE_PL_PFLR_FORCE_LINK (1 << 15) #define PCIE_PHY_DEBUG_R0 (PL_OFFSET + 0x28) #define PCIE_PHY_DEBUG_R1 (PL_OFFSET + 0x2c) -#define PCIE_PHY_DEBUG_R1_XMLH_LINK_IN_TRAINING (1 << 29) -#define PCIE_PHY_DEBUG_R1_XMLH_LINK_UP (1 << 4) #define PCIE_PHY_CTRL (PL_OFFSET + 0x114) #define PCIE_PHY_CTRL_DATA_LOC 0 @@ -653,12 +651,6 @@ static int imx6_pcie_host_init(struct pcie_port *pp) return 0; } -static int imx6_pcie_link_up(struct dw_pcie *pci) -{ - return dw_pcie_readl_dbi(pci, PCIE_PHY_DEBUG_R1) & - PCIE_PHY_DEBUG_R1_XMLH_LINK_UP; -} - static const struct dw_pcie_host_ops imx6_pcie_host_ops = { .host_init = imx6_pcie_host_init, }; @@ -701,7 +693,7 @@ static int imx6_add_pcie_port(struct imx6_pcie *imx6_pcie, } static const struct dw_pcie_ops dw_pcie_ops = { - .link_up = imx6_pcie_link_up, + /* No special ops needed, but pcie-designware still expects this struct */ }; static int imx6_pcie_probe(struct platform_device *pdev) From de21975e71fe3fbb96b42fe6155d8a74fc74e4ca Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Tue, 20 Nov 2018 11:52:15 -0600 Subject: [PATCH 1009/2608] objtool: Fix double-free in .cold detection error path [ Upstream commit 0b9301fb632f7111a3293a30cc5b20f1b82ed08d ] If read_symbols() fails during second list traversal (the one dealing with ".cold" subfunctions) it frees the symbol, but never deletes it from the list/hash_table resulting in symbol being freed again in elf_close(). Fix it by just returning an error, leaving cleanup to elf_close(). Signed-off-by: Artem Savkov Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 13810435b9a7 ("objtool: Support GCC 8's cold subfunctions") Link: http://lkml.kernel.org/r/beac5a9b7da9e8be90223459dcbe07766ae437dd.1542736240.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- tools/objtool/elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 0d1acb704f64..3616d626991e 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -312,7 +312,7 @@ static int read_symbols(struct elf *elf) if (!pfunc) { WARN("%s(): can't find parent function", sym->name); - goto err; + return -1; } sym->pfunc = pfunc; From 5ce4164fff6735d7f26b3e465deb7133cea558e9 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Tue, 20 Nov 2018 11:52:16 -0600 Subject: [PATCH 1010/2608] objtool: Fix segfault in .cold detection with -ffunction-sections [ Upstream commit 22566c1603030f0a036ad564634b064ad1a55db2 ] Because find_symbol_by_name() traverses the same lists as read_symbols(), changing sym->name in place without copying it affects the result of find_symbol_by_name(). In the case where a ".cold" function precedes its parent in sec->symbol_list, it can result in a function being considered a parent of itself. This leads to function length being set to 0 and other consequent side-effects including a segfault in add_switch_table(). The effects of this bug are only visible when building with -ffunction-sections in KCFLAGS. Fix by copying the search string instead of modifying it in place. Signed-off-by: Artem Savkov Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 13810435b9a7 ("objtool: Support GCC 8's cold subfunctions") Link: http://lkml.kernel.org/r/910abd6b5a4945130fd44f787c24e07b9e07c8da.1542736240.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- tools/objtool/elf.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 3616d626991e..dd4ed7c3c062 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -31,6 +31,8 @@ #include "elf.h" #include "warn.h" +#define MAX_NAME_LEN 128 + struct section *find_section_by_name(struct elf *elf, const char *name) { struct section *sec; @@ -298,6 +300,8 @@ static int read_symbols(struct elf *elf) /* Create parent/child links for any cold subfunctions */ list_for_each_entry(sec, &elf->sections, list) { list_for_each_entry(sym, &sec->symbol_list, list) { + char pname[MAX_NAME_LEN + 1]; + size_t pnamelen; if (sym->type != STT_FUNC) continue; sym->pfunc = sym->cfunc = sym; @@ -305,9 +309,16 @@ static int read_symbols(struct elf *elf) if (!coldstr) continue; - coldstr[0] = '\0'; - pfunc = find_symbol_by_name(elf, sym->name); - coldstr[0] = '.'; + pnamelen = coldstr - sym->name; + if (pnamelen > MAX_NAME_LEN) { + WARN("%s(): parent function name exceeds maximum length of %d characters", + sym->name, MAX_NAME_LEN); + return -1; + } + + strncpy(pname, sym->name, pnamelen); + pname[pnamelen] = '\0'; + pfunc = find_symbol_by_name(elf, pname); if (!pfunc) { WARN("%s(): can't find parent function", From b254f11cc9304b757357460643003ea7087e6c5c Mon Sep 17 00:00:00 2001 From: Romain Izard Date: Tue, 20 Nov 2018 17:57:37 +0100 Subject: [PATCH 1011/2608] ARM: dts: at91: sama5d2: use the divided clock for SMC [ Upstream commit 4ab7ca092c3c7ac8b16aa28eba723a8868f82f14 ] The SAMA5D2 is different from SAMA5D3 and SAMA5D4, as there are two different clocks for the peripherals in the SoC. The Static Memory controller is connected to the divided master clock. Unfortunately, the device tree does not correctly show this and uses the master clock directly. This clock is then used by the code for the NAND controller to calculate the timings for the controller, and we end up with slow NAND Flash access. Fix the device tree, and the performance of Flash access is improved. Signed-off-by: Romain Izard Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sama5d2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index b1a26b42d190..a8e4b89097d9 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -308,7 +308,7 @@ 0x1 0x0 0x60000000 0x10000000 0x2 0x0 0x70000000 0x10000000 0x3 0x0 0x80000000 0x10000000>; - clocks = <&mck>; + clocks = <&h32ck>; status = "disabled"; nand_controller: nand-controller { From 67080eb258b4329ff55eefcd11bdee295ce905e5 Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Wed, 14 Nov 2018 18:32:37 +0000 Subject: [PATCH 1012/2608] Btrfs: send, fix infinite loop due to directory rename dependencies [ Upstream commit a4390aee72713d9e73f1132bcdeb17d72fbbf974 ] When doing an incremental send, due to the need of delaying directory move (rename) operations we can end up in infinite loop at apply_children_dir_moves(). An example scenario that triggers this problem is described below, where directory names correspond to the numbers of their respective inodes. Parent snapshot: . |--- 261/ |--- 271/ |--- 266/ |--- 259/ |--- 260/ | |--- 267 | |--- 264/ | |--- 258/ | |--- 257/ | |--- 265/ |--- 268/ |--- 269/ | |--- 262/ | |--- 270/ |--- 272/ | |--- 263/ | |--- 275/ | |--- 274/ |--- 273/ Send snapshot: . |-- 275/ |-- 274/ |-- 273/ |-- 262/ |-- 269/ |-- 258/ |-- 271/ |-- 268/ |-- 267/ |-- 270/ |-- 259/ | |-- 265/ | |-- 272/ |-- 257/ |-- 260/ |-- 264/ |-- 263/ |-- 261/ |-- 266/ When processing inode 257 we delay its move (rename) operation because its new parent in the send snapshot, inode 272, was not yet processed. Then when processing inode 272, we delay the move operation for that inode because inode 274 is its ancestor in the send snapshot. Finally we delay the move operation for inode 274 when processing it because inode 275 is its new parent in the send snapshot and was not yet moved. When finishing processing inode 275, we start to do the move operations that were previously delayed (at apply_children_dir_moves()), resulting in the following iterations: 1) We issue the move operation for inode 274; 2) Because inode 262 depended on the move operation of inode 274 (it was delayed because 274 is its ancestor in the send snapshot), we issue the move operation for inode 262; 3) We issue the move operation for inode 272, because it was delayed by inode 274 too (ancestor of 272 in the send snapshot); 4) We issue the move operation for inode 269 (it was delayed by 262); 5) We issue the move operation for inode 257 (it was delayed by 272); 6) We issue the move operation for inode 260 (it was delayed by 272); 7) We issue the move operation for inode 258 (it was delayed by 269); 8) We issue the move operation for inode 264 (it was delayed by 257); 9) We issue the move operation for inode 271 (it was delayed by 258); 10) We issue the move operation for inode 263 (it was delayed by 264); 11) We issue the move operation for inode 268 (it was delayed by 271); 12) We verify if we can issue the move operation for inode 270 (it was delayed by 271). We detect a path loop in the current state, because inode 267 needs to be moved first before we can issue the move operation for inode 270. So we delay again the move operation for inode 270, this time we will attempt to do it after inode 267 is moved; 13) We issue the move operation for inode 261 (it was delayed by 263); 14) We verify if we can issue the move operation for inode 266 (it was delayed by 263). We detect a path loop in the current state, because inode 270 needs to be moved first before we can issue the move operation for inode 266. So we delay again the move operation for inode 266, this time we will attempt to do it after inode 270 is moved (its move operation was delayed in step 12); 15) We issue the move operation for inode 267 (it was delayed by 268); 16) We verify if we can issue the move operation for inode 266 (it was delayed by 270). We detect a path loop in the current state, because inode 270 needs to be moved first before we can issue the move operation for inode 266. So we delay again the move operation for inode 266, this time we will attempt to do it after inode 270 is moved (its move operation was delayed in step 12). So here we added again the same delayed move operation that we added in step 14; 17) We attempt again to see if we can issue the move operation for inode 266, and as in step 16, we realize we can not due to a path loop in the current state due to a dependency on inode 270. Again we delay inode's 266 rename to happen after inode's 270 move operation, adding the same dependency to the empty stack that we did in steps 14 and 16. The next iteration will pick the same move dependency on the stack (the only entry) and realize again there is still a path loop and then again the same dependency to the stack, over and over, resulting in an infinite loop. So fix this by preventing adding the same move dependency entries to the stack by removing each pending move record from the red black tree of pending moves. This way the next call to get_pending_dir_moves() will not return anything for the current parent inode. A test case for fstests, with this reproducer, follows soon. Signed-off-by: Robbie Ko Reviewed-by: Filipe Manana [Wrote changelog with example and more clear explanation] Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/send.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index baf5a4cd7ffc..3f22af96d63b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3354,7 +3354,8 @@ static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m) kfree(m); } -static void tail_append_pending_moves(struct pending_dir_move *moves, +static void tail_append_pending_moves(struct send_ctx *sctx, + struct pending_dir_move *moves, struct list_head *stack) { if (list_empty(&moves->list)) { @@ -3365,6 +3366,10 @@ static void tail_append_pending_moves(struct pending_dir_move *moves, list_add_tail(&moves->list, stack); list_splice_tail(&list, stack); } + if (!RB_EMPTY_NODE(&moves->node)) { + rb_erase(&moves->node, &sctx->pending_dir_moves); + RB_CLEAR_NODE(&moves->node); + } } static int apply_children_dir_moves(struct send_ctx *sctx) @@ -3379,7 +3384,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx) return 0; INIT_LIST_HEAD(&stack); - tail_append_pending_moves(pm, &stack); + tail_append_pending_moves(sctx, pm, &stack); while (!list_empty(&stack)) { pm = list_first_entry(&stack, struct pending_dir_move, list); @@ -3390,7 +3395,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx) goto out; pm = get_pending_dir_moves(sctx, parent_ino); if (pm) - tail_append_pending_moves(pm, &stack); + tail_append_pending_moves(sctx, pm, &stack); } return 0; From 9692eefbee777b18e22998f0dc2852cf8f34174e Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Mon, 5 Nov 2018 08:07:37 +0200 Subject: [PATCH 1013/2608] RDMA/mlx5: Fix fence type for IB_WR_LOCAL_INV WR [ Upstream commit 074fca3a18e7e1e0d4d7dcc9d7badc43b90232f4 ] Currently, for IB_WR_LOCAL_INV WR, when the next fence is None, the current fence will be SMALL instead of Normal Fence. Without this patch krping doesn't work on CX-5 devices and throws following error: The error messages are from CX5 driver are: (from server side) [ 710.434014] mlx5_0:dump_cqe:278:(pid 2712): dump error cqe [ 710.434016] 00000000 00000000 00000000 00000000 [ 710.434016] 00000000 00000000 00000000 00000000 [ 710.434017] 00000000 00000000 00000000 00000000 [ 710.434018] 00000000 93003204 100000b8 000524d2 [ 710.434019] krping: cq completion failed with wr_id 0 status 4 opcode 128 vender_err 32 Fixed the logic to set the correct fence type. Fixes: 6e8484c5cf07 ("RDMA/mlx5: set UMR wqe fence according to HCA cap") Signed-off-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/qp.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index dfc190055167..964c3a0bbf16 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3928,17 +3928,18 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - if (wr->opcode == IB_WR_LOCAL_INV || - wr->opcode == IB_WR_REG_MR) { + if (wr->opcode == IB_WR_REG_MR) { fence = dev->umr_fence; next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; - } else if (wr->send_flags & IB_SEND_FENCE) { - if (qp->next_fence) - fence = MLX5_FENCE_MODE_SMALL_AND_FENCE; - else - fence = MLX5_FENCE_MODE_FENCE; - } else { - fence = qp->next_fence; + } else { + if (wr->send_flags & IB_SEND_FENCE) { + if (qp->next_fence) + fence = MLX5_FENCE_MODE_SMALL_AND_FENCE; + else + fence = MLX5_FENCE_MODE_FENCE; + } else { + fence = qp->next_fence; + } } switch (ibqp->qp_type) { From a625d3e3ce05d05f8f2e1e5d4546d12c94feec4d Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 15 Nov 2018 09:49:38 -0800 Subject: [PATCH 1014/2608] RDMA/rdmavt: Fix rvt_create_ah function signature [ Upstream commit 4f32fb921b153ae9ea280e02a3e91509fffc03d3 ] rdmavt uses a crazy system that looses the type checking when assinging functions to struct ib_device function pointers. Because of this the signature to this function was not changed when the below commit revised things. Fix the signature so we are not calling a function pointer with a mismatched signature. Fixes: 477864c8fcd9 ("IB/core: Let create_ah return extended response to user") Signed-off-by: Kamal Heib Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rdmavt/ah.c | 4 +++- drivers/infiniband/sw/rdmavt/ah.h | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index ba3639a0d77c..48ea5b8207f0 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -91,13 +91,15 @@ EXPORT_SYMBOL(rvt_check_ah); * rvt_create_ah - create an address handle * @pd: the protection domain * @ah_attr: the attributes of the AH + * @udata: pointer to user's input output buffer information. * * This may be called from interrupt context. * * Return: newly allocated ah */ struct ib_ah *rvt_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr) + struct rdma_ah_attr *ah_attr, + struct ib_udata *udata) { struct rvt_ah *ah; struct rvt_dev_info *dev = ib_to_rvt(pd->device); diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h index 16105af99189..25271b48a683 100644 --- a/drivers/infiniband/sw/rdmavt/ah.h +++ b/drivers/infiniband/sw/rdmavt/ah.h @@ -51,7 +51,8 @@ #include struct ib_ah *rvt_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr); + struct rdma_ah_attr *ah_attr, + struct ib_udata *udata); int rvt_destroy_ah(struct ib_ah *ibah); int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); From 4a3fa2a5fed133fd2f26e5f37acdb23ad6e0900c Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Nov 2018 13:06:21 +0200 Subject: [PATCH 1015/2608] ASoC: omap-mcbsp: Fix latency value calculation for pm_qos [ Upstream commit dd2f52d8991af9fe0928d59ec502ba52be7bc38d ] The latency number is in usec for the pm_qos. Correct the calculation to give us the time in usec Signed-off-by: Peter Ujfalusi Acked-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/omap/omap-mcbsp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c index 6b40bdbef336..47c2ed5ca492 100644 --- a/sound/soc/omap/omap-mcbsp.c +++ b/sound/soc/omap/omap-mcbsp.c @@ -308,9 +308,9 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream, pkt_size = channels; } - latency = ((((buffer_size - pkt_size) / channels) * 1000) - / (params->rate_num / params->rate_den)); - + latency = (buffer_size - pkt_size) / channels; + latency = latency * USEC_PER_SEC / + (params->rate_num / params->rate_den); mcbsp->latency[substream->stream] = latency; omap_mcbsp_set_threshold(substream, pkt_size); From 997a8d6c47fae788131e3b113841169d89ecf803 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Nov 2018 13:06:22 +0200 Subject: [PATCH 1016/2608] ASoC: omap-mcpdm: Add pm_qos handling to avoid under/overruns with CPU_IDLE [ Upstream commit 373a500e34aea97971c9d71e45edad458d3da98f ] We need to block sleep states which would require longer time to leave than the time the DMA must react to the DMA request in order to keep the FIFO serviced without under of overrun. Signed-off-by: Peter Ujfalusi Acked-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/omap/omap-mcpdm.c | 43 ++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/sound/soc/omap/omap-mcpdm.c b/sound/soc/omap/omap-mcpdm.c index 64609c77a79d..44ffeb71cd1d 100644 --- a/sound/soc/omap/omap-mcpdm.c +++ b/sound/soc/omap/omap-mcpdm.c @@ -54,6 +54,8 @@ struct omap_mcpdm { unsigned long phys_base; void __iomem *io_base; int irq; + struct pm_qos_request pm_qos_req; + int latency[2]; struct mutex mutex; @@ -277,6 +279,9 @@ static void omap_mcpdm_dai_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai); + int tx = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK); + int stream1 = tx ? SNDRV_PCM_STREAM_PLAYBACK : SNDRV_PCM_STREAM_CAPTURE; + int stream2 = tx ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; mutex_lock(&mcpdm->mutex); @@ -289,6 +294,14 @@ static void omap_mcpdm_dai_shutdown(struct snd_pcm_substream *substream, } } + if (mcpdm->latency[stream2]) + pm_qos_update_request(&mcpdm->pm_qos_req, + mcpdm->latency[stream2]); + else if (mcpdm->latency[stream1]) + pm_qos_remove_request(&mcpdm->pm_qos_req); + + mcpdm->latency[stream1] = 0; + mutex_unlock(&mcpdm->mutex); } @@ -300,7 +313,7 @@ static int omap_mcpdm_dai_hw_params(struct snd_pcm_substream *substream, int stream = substream->stream; struct snd_dmaengine_dai_dma_data *dma_data; u32 threshold; - int channels; + int channels, latency; int link_mask = 0; channels = params_channels(params); @@ -340,14 +353,25 @@ static int omap_mcpdm_dai_hw_params(struct snd_pcm_substream *substream, dma_data->maxburst = (MCPDM_DN_THRES_MAX - threshold) * channels; + latency = threshold; } else { /* If playback is not running assume a stereo stream to come */ if (!mcpdm->config[!stream].link_mask) mcpdm->config[!stream].link_mask = (0x3 << 3); dma_data->maxburst = threshold * channels; + latency = (MCPDM_DN_THRES_MAX - threshold); } + /* + * The DMA must act to a DMA request within latency time (usec) to avoid + * under/overflow + */ + mcpdm->latency[stream] = latency * USEC_PER_SEC / params_rate(params); + + if (!mcpdm->latency[stream]) + mcpdm->latency[stream] = 10; + /* Check if we need to restart McPDM with this stream */ if (mcpdm->config[stream].link_mask && mcpdm->config[stream].link_mask != link_mask) @@ -362,6 +386,20 @@ static int omap_mcpdm_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai); + struct pm_qos_request *pm_qos_req = &mcpdm->pm_qos_req; + int tx = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK); + int stream1 = tx ? SNDRV_PCM_STREAM_PLAYBACK : SNDRV_PCM_STREAM_CAPTURE; + int stream2 = tx ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; + int latency = mcpdm->latency[stream2]; + + /* Prevent omap hardware from hitting off between FIFO fills */ + if (!latency || mcpdm->latency[stream1] < latency) + latency = mcpdm->latency[stream1]; + + if (pm_qos_request_active(pm_qos_req)) + pm_qos_update_request(pm_qos_req, latency); + else if (latency) + pm_qos_add_request(pm_qos_req, PM_QOS_CPU_DMA_LATENCY, latency); if (!omap_mcpdm_active(mcpdm)) { omap_mcpdm_start(mcpdm); @@ -423,6 +461,9 @@ static int omap_mcpdm_remove(struct snd_soc_dai *dai) free_irq(mcpdm->irq, (void *)mcpdm); pm_runtime_disable(mcpdm->dev); + if (pm_qos_request_active(&mcpdm->pm_qos_req)) + pm_qos_remove_request(&mcpdm->pm_qos_req); + return 0; } From bdafd18e24c8fcf704950dc48849e271cceee19f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Nov 2018 13:06:23 +0200 Subject: [PATCH 1017/2608] ASoC: omap-dmic: Add pm_qos handling to avoid overruns with CPU_IDLE [ Upstream commit ffdcc3638c58d55a6fa68b6e5dfd4fb4109652eb ] We need to block sleep states which would require longer time to leave than the time the DMA must react to the DMA request in order to keep the FIFO serviced without overrun. Signed-off-by: Peter Ujfalusi Acked-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/omap/omap-dmic.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/soc/omap/omap-dmic.c b/sound/soc/omap/omap-dmic.c index 09db2aec12a3..776e809a8aab 100644 --- a/sound/soc/omap/omap-dmic.c +++ b/sound/soc/omap/omap-dmic.c @@ -48,6 +48,8 @@ struct omap_dmic { struct device *dev; void __iomem *io_base; struct clk *fclk; + struct pm_qos_request pm_qos_req; + int latency; int fclk_freq; int out_freq; int clk_div; @@ -124,6 +126,8 @@ static void omap_dmic_dai_shutdown(struct snd_pcm_substream *substream, mutex_lock(&dmic->mutex); + pm_qos_remove_request(&dmic->pm_qos_req); + if (!dai->active) dmic->active = 0; @@ -226,6 +230,8 @@ static int omap_dmic_dai_hw_params(struct snd_pcm_substream *substream, /* packet size is threshold * channels */ dma_data = snd_soc_dai_get_dma_data(dai, substream); dma_data->maxburst = dmic->threshold * channels; + dmic->latency = (OMAP_DMIC_THRES_MAX - dmic->threshold) * USEC_PER_SEC / + params_rate(params); return 0; } @@ -236,6 +242,9 @@ static int omap_dmic_dai_prepare(struct snd_pcm_substream *substream, struct omap_dmic *dmic = snd_soc_dai_get_drvdata(dai); u32 ctrl; + if (pm_qos_request_active(&dmic->pm_qos_req)) + pm_qos_update_request(&dmic->pm_qos_req, dmic->latency); + /* Configure uplink threshold */ omap_dmic_write(dmic, OMAP_DMIC_FIFO_CTRL_REG, dmic->threshold); From 42cb0b9efc9e18e8b81e9a296082381fc8b70071 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Fri, 23 Nov 2018 15:56:33 +0800 Subject: [PATCH 1018/2608] exportfs: do not read dentry after free [ Upstream commit 2084ac6c505a58f7efdec13eba633c6aaa085ca5 ] The function dentry_connected calls dput(dentry) to drop the previously acquired reference to dentry. In this case, dentry can be released. After that, IS_ROOT(dentry) checks the condition (dentry == dentry->d_parent), which may result in a use-after-free bug. This patch directly compares dentry with its parent obtained before dropping the reference. Fixes: a056cc8934c("exportfs: stop retrying once we race with rename/remove") Signed-off-by: Pan Bian Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/exportfs/expfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 329a5d103846..c22cc9d2a5c9 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -77,7 +77,7 @@ static bool dentry_connected(struct dentry *dentry) struct dentry *parent = dget_parent(dentry); dput(dentry); - if (IS_ROOT(dentry)) { + if (dentry == parent) { dput(parent); return false; } From 9209043b29881a7ede5e34610c4feadd29e59e40 Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Fri, 23 Nov 2018 17:43:26 +0100 Subject: [PATCH 1019/2608] bpf: fix check of allowed specifiers in bpf_trace_printk [ Upstream commit 1efb6ee3edea57f57f9fb05dba8dcb3f7333f61f ] A format string consisting of "%p" or "%s" followed by an invalid specifier (e.g. "%p%\n" or "%s%") could pass the check which would make format_decode (lib/vsprintf.c) to warn. Fixes: 9c959c863f82 ("tracing: Allow BPF programs to call bpf_trace_printk()") Reported-by: syzbot+1ec5c5ec949c4adaa0c4@syzkaller.appspotmail.com Signed-off-by: Martynas Pumputis Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- kernel/trace/bpf_trace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 6350f64d5aa4..f9dd8fd055a6 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -161,11 +161,13 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, i++; } else if (fmt[i] == 'p' || fmt[i] == 's') { mod[fmt_cnt]++; - i++; - if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) + /* disallow any further format extensions */ + if (fmt[i + 1] != 0 && + !isspace(fmt[i + 1]) && + !ispunct(fmt[i + 1])) return -EINVAL; fmt_cnt++; - if (fmt[i - 1] == 's') { + if (fmt[i] == 's') { if (str_seen) /* allow only one '%s' per fmt string */ return -EINVAL; From 57dea3802f389e38b4e3f4d439e0540f00359205 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 15 Nov 2018 15:14:30 +0800 Subject: [PATCH 1020/2608] ipvs: call ip_vs_dst_notifier earlier than ipv6_dev_notf [ Upstream commit 2a31e4bd9ad255ee40809b5c798c4b1c2b09703b ] ip_vs_dst_event is supposed to clean up all dst used in ipvs' destinations when a net dev is going down. But it works only when the dst's dev is the same as the dev from the event. Now with the same priority but late registration, ip_vs_dst_notifier is always called later than ipv6_dev_notf where the dst's dev is set to lo for NETDEV_DOWN event. As the dst's dev lo is not the same as the dev from the event in ip_vs_dst_event, ip_vs_dst_notifier doesn't actually work. Also as these dst have to wait for dest_trash_timer to clean them up. It would cause some non-permanent kernel warnings: unregister_netdevice: waiting for br0 to become free. Usage count = 3 To fix it, call ip_vs_dst_notifier earlier than ipv6_dev_notf by increasing its priority to ADDRCONF_NOTIFY_PRIORITY + 5. Note that for ipv4 route fib_netdev_notifier doesn't set dst's dev to lo in NETDEV_DOWN event, so this fix is only needed when IP_VS_IPV6 is defined. Fixes: 7a4f0761fce3 ("IPVS: init and cleanup restructuring") Reported-by: Li Shuang Signed-off-by: Xin Long Acked-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipvs/ip_vs_ctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 327ebe786eeb..2f45c3ce77ef 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -4012,6 +4012,9 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) static struct notifier_block ip_vs_dst_notifier = { .notifier_call = ip_vs_dst_event, +#ifdef CONFIG_IP_VS_IPV6 + .priority = ADDRCONF_NOTIFY_PRIORITY + 5, +#endif }; int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) From 11f8633c5a334850e9171f4808f20f5a2039060e Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 25 Nov 2018 00:17:04 +0200 Subject: [PATCH 1021/2608] USB: omap_udc: use devm_request_irq() [ Upstream commit 286afdde1640d8ea8916a0f05e811441fbbf4b9d ] The current code fails to release the third irq on the error path (observed by reading the code), and we get also multiple WARNs with failing gadget drivers due to duplicate IRQ releases. Fix by using devm_request_irq(). Signed-off-by: Aaro Koskinen Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/omap_udc.c | 37 +++++++++---------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index f05ba6825bfe..e515c85ef0c5 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -2886,8 +2886,8 @@ bad_on_1710: udc->clr_halt = UDC_RESET_EP; /* USB general purpose IRQ: ep0, state changes, dma, etc */ - status = request_irq(pdev->resource[1].start, omap_udc_irq, - 0, driver_name, udc); + status = devm_request_irq(&pdev->dev, pdev->resource[1].start, + omap_udc_irq, 0, driver_name, udc); if (status != 0) { ERR("can't get irq %d, err %d\n", (int) pdev->resource[1].start, status); @@ -2895,20 +2895,20 @@ bad_on_1710: } /* USB "non-iso" IRQ (PIO for all but ep0) */ - status = request_irq(pdev->resource[2].start, omap_udc_pio_irq, - 0, "omap_udc pio", udc); + status = devm_request_irq(&pdev->dev, pdev->resource[2].start, + omap_udc_pio_irq, 0, "omap_udc pio", udc); if (status != 0) { ERR("can't get irq %d, err %d\n", (int) pdev->resource[2].start, status); - goto cleanup2; + goto cleanup1; } #ifdef USE_ISO - status = request_irq(pdev->resource[3].start, omap_udc_iso_irq, - 0, "omap_udc iso", udc); + status = devm_request_irq(&pdev->dev, pdev->resource[3].start, + omap_udc_iso_irq, 0, "omap_udc iso", udc); if (status != 0) { ERR("can't get irq %d, err %d\n", (int) pdev->resource[3].start, status); - goto cleanup3; + goto cleanup1; } #endif if (cpu_is_omap16xx() || cpu_is_omap7xx()) { @@ -2921,22 +2921,11 @@ bad_on_1710: create_proc_file(); status = usb_add_gadget_udc_release(&pdev->dev, &udc->gadget, omap_udc_release); - if (status) - goto cleanup4; + if (!status) + return 0; - return 0; - -cleanup4: remove_proc_file(); -#ifdef USE_ISO -cleanup3: - free_irq(pdev->resource[2].start, udc); -#endif - -cleanup2: - free_irq(pdev->resource[1].start, udc); - cleanup1: kfree(udc); udc = NULL; @@ -2980,12 +2969,6 @@ static int omap_udc_remove(struct platform_device *pdev) remove_proc_file(); -#ifdef USE_ISO - free_irq(pdev->resource[3].start, udc); -#endif - free_irq(pdev->resource[2].start, udc); - free_irq(pdev->resource[1].start, udc); - if (udc->dc_clk) { if (udc->clk_requested) omap_udc_enable_clock(0); From 09eb2b90d9e00acf0aa314f59b69de3b0037bb21 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 25 Nov 2018 00:17:05 +0200 Subject: [PATCH 1022/2608] USB: omap_udc: fix crashes on probe error and module removal [ Upstream commit 99f700366fcea1aa2fa3c49c99f371670c3c62f8 ] We currently crash if usb_add_gadget_udc_release() fails, since the udc->done is not initialized until in the remove function. Furthermore, on module removal the udc data is accessed although the release function is already triggered by usb_del_gadget_udc() early in the function. Fix by rewriting the release and remove functions, basically moving all the cleanup into the release function, and doing the completion only in the module removal case. The patch fixes omap_udc module probe with a failing gadged, and also allows the removal of omap_udc. Tested by running "modprobe omap_udc; modprobe -r omap_udc" in a loop. Signed-off-by: Aaro Koskinen Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/omap_udc.c | 50 ++++++++++++------------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index e515c85ef0c5..d45dc14ef0a2 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -2612,9 +2612,22 @@ omap_ep_setup(char *name, u8 addr, u8 type, static void omap_udc_release(struct device *dev) { - complete(udc->done); + pullup_disable(udc); + if (!IS_ERR_OR_NULL(udc->transceiver)) { + usb_put_phy(udc->transceiver); + udc->transceiver = NULL; + } + omap_writew(0, UDC_SYSCON1); + remove_proc_file(); + if (udc->dc_clk) { + if (udc->clk_requested) + omap_udc_enable_clock(0); + clk_put(udc->hhc_clk); + clk_put(udc->dc_clk); + } + if (udc->done) + complete(udc->done); kfree(udc); - udc = NULL; } static int @@ -2919,12 +2932,8 @@ bad_on_1710: } create_proc_file(); - status = usb_add_gadget_udc_release(&pdev->dev, &udc->gadget, - omap_udc_release); - if (!status) - return 0; - - remove_proc_file(); + return usb_add_gadget_udc_release(&pdev->dev, &udc->gadget, + omap_udc_release); cleanup1: kfree(udc); @@ -2951,36 +2960,15 @@ static int omap_udc_remove(struct platform_device *pdev) { DECLARE_COMPLETION_ONSTACK(done); - if (!udc) - return -ENODEV; - - usb_del_gadget_udc(&udc->gadget); - if (udc->driver) - return -EBUSY; - udc->done = &done; - pullup_disable(udc); - if (!IS_ERR_OR_NULL(udc->transceiver)) { - usb_put_phy(udc->transceiver); - udc->transceiver = NULL; - } - omap_writew(0, UDC_SYSCON1); + usb_del_gadget_udc(&udc->gadget); - remove_proc_file(); - - if (udc->dc_clk) { - if (udc->clk_requested) - omap_udc_enable_clock(0); - clk_put(udc->hhc_clk); - clk_put(udc->dc_clk); - } + wait_for_completion(&done); release_mem_region(pdev->resource[0].start, pdev->resource[0].end - pdev->resource[0].start + 1); - wait_for_completion(&done); - return 0; } From e68f6cb901194b9aa662dbe65b69486c32e58615 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 25 Nov 2018 00:17:06 +0200 Subject: [PATCH 1023/2608] USB: omap_udc: fix omap_udc_start() on 15xx machines [ Upstream commit 6ca6695f576b8453fe68865e84d25946d63b10ad ] On OMAP 15xx machines there are no transceivers, and omap_udc_start() always fails as it forgot to adjust the default return value. Signed-off-by: Aaro Koskinen Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/omap_udc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index d45dc14ef0a2..9060b3af27ff 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -2045,7 +2045,7 @@ static inline int machine_without_vbus_sense(void) static int omap_udc_start(struct usb_gadget *g, struct usb_gadget_driver *driver) { - int status = -ENODEV; + int status; struct omap_ep *ep; unsigned long flags; @@ -2083,6 +2083,7 @@ static int omap_udc_start(struct usb_gadget *g, goto done; } } else { + status = 0; if (can_pullup(udc)) pullup_enable(udc); else From 72c982a3b1dddf68c3d5e259447994fdbf23b75a Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 25 Nov 2018 00:17:07 +0200 Subject: [PATCH 1024/2608] USB: omap_udc: fix USB gadget functionality on Palm Tungsten E [ Upstream commit 2c2322fbcab8102b8cadc09d66714700a2da42c2 ] On Palm TE nothing happens when you try to use gadget drivers and plug the USB cable. Fix by adding the board to the vbus sense quirk list. Signed-off-by: Aaro Koskinen Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/omap_udc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index 9060b3af27ff..c8facc8aa87e 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -2037,6 +2037,7 @@ static inline int machine_without_vbus_sense(void) { return machine_is_omap_innovator() || machine_is_omap_osk() + || machine_is_omap_palmte() || machine_is_sx1() /* No known omap7xx boards with vbus sense */ || cpu_is_omap7xx(); From ca993dead6b5ca2181185064502ea6b22797a857 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 25 Nov 2018 00:17:08 +0200 Subject: [PATCH 1025/2608] USB: omap_udc: fix rejection of out transfers when DMA is used [ Upstream commit 069caf5950dfa75d0526cd89c439ff9d9d3136d8 ] Commit 387f869d2579 ("usb: gadget: u_ether: conditionally align transfer size") started aligning transfer size only if requested, breaking omap_udc DMA mode. Set quirk_ep_out_aligned_size to restore the old behaviour. Fixes: 387f869d2579 ("usb: gadget: u_ether: conditionally align transfer size") Signed-off-by: Aaro Koskinen Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/omap_udc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index c8facc8aa87e..ee0b87a0773c 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -2661,6 +2661,7 @@ omap_udc_setup(struct platform_device *odev, struct usb_phy *xceiv) udc->gadget.speed = USB_SPEED_UNKNOWN; udc->gadget.max_speed = USB_SPEED_FULL; udc->gadget.name = driver_name; + udc->gadget.quirk_ep_out_aligned_size = 1; udc->transceiver = xceiv; /* ep0 is special; put it right after the SETUP buffer */ From f47b8824a83057e9f63d39df86332ffeb703311f Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Wed, 21 Nov 2018 13:39:29 +0400 Subject: [PATCH 1026/2608] drm/meson: add support for 1080p25 mode [ Upstream commit 31e1ab494559fb46de304cc6c2aed1528f94b298 ] This essential mode for PAL users is missing, so add it. Fixes: 335e3713afb87 ("drm/meson: Add support for HDMI venc modes and settings") Signed-off-by: Christian Hewitt Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/1542793169-13008-1-git-send-email-christianshewitt@gmail.com Signed-off-by: Sean Paul Signed-off-by: Sasha Levin --- drivers/gpu/drm/meson/meson_venc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/meson/meson_venc.c b/drivers/gpu/drm/meson/meson_venc.c index 9509017dbded..d5dfe7045cc6 100644 --- a/drivers/gpu/drm/meson/meson_venc.c +++ b/drivers/gpu/drm/meson/meson_venc.c @@ -714,6 +714,7 @@ struct meson_hdmi_venc_vic_mode { { 5, &meson_hdmi_encp_mode_1080i60 }, { 20, &meson_hdmi_encp_mode_1080i50 }, { 32, &meson_hdmi_encp_mode_1080p24 }, + { 33, &meson_hdmi_encp_mode_1080p50 }, { 34, &meson_hdmi_encp_mode_1080p30 }, { 31, &meson_hdmi_encp_mode_1080p50 }, { 16, &meson_hdmi_encp_mode_1080p60 }, From 97f048033a07b199b98d5e8f10f1022f10d42ed4 Mon Sep 17 00:00:00 2001 From: Alin Nastac Date: Wed, 21 Nov 2018 14:00:30 +0100 Subject: [PATCH 1027/2608] netfilter: ipv6: Preserve link scope traffic original oif [ Upstream commit 508b09046c0f21678652fb66fd1e9959d55591d2 ] When ip6_route_me_harder is invoked, it resets outgoing interface of: - link-local scoped packets sent by neighbor discovery - multicast packets sent by MLD host - multicast packets send by MLD proxy daemon that sets outgoing interface through IPV6_PKTINFO ipi6_ifindex Link-local and multicast packets must keep their original oif after ip6_route_me_harder is called. Signed-off-by: Alin Nastac Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/ipv6/netfilter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 9bf260459f83..1f8b1a433b5d 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -25,7 +25,8 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) unsigned int hh_len; struct dst_entry *dst; struct flowi6 fl6 = { - .flowi6_oif = sk ? sk->sk_bound_dev_if : 0, + .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if : + rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0, .flowi6_mark = skb->mark, .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, From 8f859b2729e980d511f29a68c3474aa27ead72b7 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Sun, 25 Nov 2018 20:34:26 +0200 Subject: [PATCH 1028/2608] IB/mlx5: Fix page fault handling for MW [ Upstream commit 75b7b86bdb0df37e08e44b6c1f99010967f81944 ] Memory windows are implemented with an indirect MKey, when a page fault event comes for a MW Mkey we need to find the MR at the end of the list of the indirect MKeys by iterating on all items from the first to the last. The offset calculated during this process has to be zeroed after the first iteration or the next iteration will start from a wrong address, resulting incorrect ODP faulting behavior. Fixes: db570d7deafb ("IB/mlx5: Add ODP support to MW") Signed-off-by: Artemy Kovalyov Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/odp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 3d701c7a4c91..1ed94b6c0b0a 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -723,6 +723,7 @@ next_mr: head = frame; bcnt -= frame->bcnt; + offset = 0; } break; From bb3f86913bdad19cc9a0d0165ddd373ff8c930f7 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Thu, 8 Nov 2018 16:48:36 +0800 Subject: [PATCH 1029/2608] KVM: x86: fix empty-body warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 354cb410d87314e2eda344feea84809e4261570a ] We get the following warnings about empty statements when building with 'W=1': arch/x86/kvm/lapic.c:632:53: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] arch/x86/kvm/lapic.c:1907:42: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] arch/x86/kvm/lapic.c:1936:65: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] arch/x86/kvm/lapic.c:1975:44: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] Rework the debug helper macro to get rid of these warnings. Signed-off-by: Yi Wang Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 13dfb55b84db..f7c34184342a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -55,7 +55,7 @@ #define PRIo64 "o" /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ -#define apic_debug(fmt, arg...) +#define apic_debug(fmt, arg...) do {} while (0) /* 14 is the version for Xeon and Pentium 8.4.8*/ #define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) From 7fdd58deeae47d51ce528ef3bfd1f638f3c399be Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Thu, 8 Nov 2018 11:22:21 +0800 Subject: [PATCH 1030/2608] x86/kvm/vmx: fix old-style function declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1e4329ee2c52692ea42cc677fb2133519718b34a ] The inline keyword which is not at the beginning of the function declaration may trigger the following build warnings, so let's fix it: arch/x86/kvm/vmx.c:1309:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] arch/x86/kvm/vmx.c:5947:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] arch/x86/kvm/vmx.c:5985:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] arch/x86/kvm/vmx.c:6023:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] Signed-off-by: Yi Wang Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ec588cf4fe95..4353580b659a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1089,7 +1089,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); -static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type); static DEFINE_PER_CPU(struct vmcs *, vmxarea); @@ -5227,7 +5227,7 @@ static void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } -static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type) { int f = sizeof(unsigned long); @@ -5262,7 +5262,7 @@ static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bit } } -static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type) { int f = sizeof(unsigned long); @@ -5297,7 +5297,7 @@ static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitm } } -static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type, bool value) { if (value) From ff791c9ec159ce294f949505bd0fe77881e67e02 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 26 Nov 2018 15:07:16 +0100 Subject: [PATCH 1031/2608] net: thunderx: fix NULL pointer dereference in nic_remove [ Upstream commit 24a6d2dd263bc910de018c78d1148b3e33b94512 ] Fix a possible NULL pointer dereference in nic_remove routine removing the nicpf module if nic_probe fails. The issue can be triggered with the following reproducer: $rmmod nicvf $rmmod nicpf [ 521.412008] Unable to handle kernel access to user memory outside uaccess routines at virtual address 0000000000000014 [ 521.422777] Mem abort info: [ 521.425561] ESR = 0x96000004 [ 521.428624] Exception class = DABT (current EL), IL = 32 bits [ 521.434535] SET = 0, FnV = 0 [ 521.437579] EA = 0, S1PTW = 0 [ 521.440730] Data abort info: [ 521.443603] ISV = 0, ISS = 0x00000004 [ 521.447431] CM = 0, WnR = 0 [ 521.450417] user pgtable: 4k pages, 48-bit VAs, pgdp = 0000000072a3da42 [ 521.457022] [0000000000000014] pgd=0000000000000000 [ 521.461916] Internal error: Oops: 96000004 [#1] SMP [ 521.511801] Hardware name: GIGABYTE H270-T70/MT70-HD0, BIOS T49 02/02/2018 [ 521.518664] pstate: 80400005 (Nzcv daif +PAN -UAO) [ 521.523451] pc : nic_remove+0x24/0x88 [nicpf] [ 521.527808] lr : pci_device_remove+0x48/0xd8 [ 521.532066] sp : ffff000013433cc0 [ 521.535370] x29: ffff000013433cc0 x28: ffff810f6ac50000 [ 521.540672] x27: 0000000000000000 x26: 0000000000000000 [ 521.545974] x25: 0000000056000000 x24: 0000000000000015 [ 521.551274] x23: ffff8007ff89a110 x22: ffff000001667070 [ 521.556576] x21: ffff8007ffb170b0 x20: ffff8007ffb17000 [ 521.561877] x19: 0000000000000000 x18: 0000000000000025 [ 521.567178] x17: 0000000000000000 x16: 000000000000010ffc33ff98 x8 : 0000000000000000 [ 521.593683] x7 : 0000000000000000 x6 : 0000000000000001 [ 521.598983] x5 : 0000000000000002 x4 : 0000000000000003 [ 521.604284] x3 : ffff8007ffb17184 x2 : ffff8007ffb17184 [ 521.609585] x1 : ffff000001662118 x0 : ffff000008557be0 [ 521.614887] Process rmmod (pid: 1897, stack limit = 0x00000000859535c3) [ 521.621490] Call trace: [ 521.623928] nic_remove+0x24/0x88 [nicpf] [ 521.627927] pci_device_remove+0x48/0xd8 [ 521.631847] device_release_driver_internal+0x1b0/0x248 [ 521.637062] driver_detach+0x50/0xc0 [ 521.640628] bus_remove_driver+0x60/0x100 [ 521.644627] driver_unregister+0x34/0x60 [ 521.648538] pci_unregister_driver+0x24/0xd8 [ 521.652798] nic_cleanup_module+0x14/0x111c [nicpf] [ 521.657672] __arm64_sys_delete_module+0x150/0x218 [ 521.662460] el0_svc_handler+0x94/0x110 [ 521.666287] el0_svc+0x8/0xc [ 521.669160] Code: aa1e03e0 9102c295 d503201f f9404eb3 (b9401660) Fixes: 4863dea3fab0 ("net: Adding support for Cavium ThunderX network controller") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/cavium/thunder/nic_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index fb770b0182d3..d89ec4724efd 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -1376,6 +1376,9 @@ static void nic_remove(struct pci_dev *pdev) { struct nicpf *nic = pci_get_drvdata(pdev); + if (!nic) + return; + if (nic->flags & NIC_SRIOV_ENABLED) pci_disable_sriov(pdev); From b641ee14cfc1fe4d665e8bca98de1d00ed043ba9 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 19 Nov 2018 16:49:05 +0100 Subject: [PATCH 1032/2608] usb: gadget: u_ether: fix unsafe list iteration [ Upstream commit c9287fa657b3328b4549c0ab39ea7f197a3d6a50 ] list_for_each_entry_safe() is not safe for deleting entries from the list if the spin lock, which protects it, is released and reacquired during the list iteration. Fix this issue by replacing this construction with a simple check if list is empty and removing the first entry in each iteration. This is almost equivalent to a revert of the commit mentioned in the Fixes: tag. This patch fixes following issue: --->8--- Unable to handle kernel NULL pointer dereference at virtual address 00000104 pgd = (ptrval) [00000104] *pgd=00000000 Internal error: Oops: 817 [#1] PREEMPT SMP ARM Modules linked in: CPU: 1 PID: 84 Comm: kworker/1:1 Not tainted 4.20.0-rc2-next-20181114-00009-g8266b35ec404 #1061 Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) Workqueue: events eth_work PC is at rx_fill+0x60/0xac LR is at _raw_spin_lock_irqsave+0x50/0x5c pc : [] lr : [] psr: 80000093 sp : ee7fbee8 ip : 00000100 fp : 00000000 r10: 006000c0 r9 : c10b0ab0 r8 : ee7eb5c0 r7 : ee7eb614 r6 : ee7eb5ec r5 : 000000dc r4 : ee12ac00 r3 : ee12ac24 r2 : 00000200 r1 : 60000013 r0 : ee7eb5ec Flags: Nzcv IRQs off FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 6d5dc04a DAC: 00000051 Process kworker/1:1 (pid: 84, stack limit = 0x(ptrval)) Stack: (0xee7fbee8 to 0xee7fc000) ... [] (rx_fill) from [] (process_one_work+0x200/0x738) [] (process_one_work) from [] (worker_thread+0x2c/0x4c8) [] (worker_thread) from [] (kthread+0x128/0x164) [] (kthread) from [] (ret_from_fork+0x14/0x20) Exception stack(0xee7fbfb0 to 0xee7fbff8) ... ---[ end trace 64480bc835eba7d6 ]--- Fixes: fea14e68ff5e ("usb: gadget: u_ether: use better list accessors") Signed-off-by: Marek Szyprowski Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/u_ether.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index bdbc3fdc7c4f..3a0e4f5d7b83 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -405,12 +405,12 @@ done: static void rx_fill(struct eth_dev *dev, gfp_t gfp_flags) { struct usb_request *req; - struct usb_request *tmp; unsigned long flags; /* fill unused rxq slots with some skb */ spin_lock_irqsave(&dev->req_lock, flags); - list_for_each_entry_safe(req, tmp, &dev->rx_reqs, list) { + while (!list_empty(&dev->rx_reqs)) { + req = list_first_entry(&dev->rx_reqs, struct usb_request, list); list_del_init(&req->list); spin_unlock_irqrestore(&dev->req_lock, flags); @@ -1125,7 +1125,6 @@ void gether_disconnect(struct gether *link) { struct eth_dev *dev = link->ioport; struct usb_request *req; - struct usb_request *tmp; WARN_ON(!dev); if (!dev) @@ -1142,7 +1141,8 @@ void gether_disconnect(struct gether *link) */ usb_ep_disable(link->in_ep); spin_lock(&dev->req_lock); - list_for_each_entry_safe(req, tmp, &dev->tx_reqs, list) { + while (!list_empty(&dev->tx_reqs)) { + req = list_first_entry(&dev->tx_reqs, struct usb_request, list); list_del(&req->list); spin_unlock(&dev->req_lock); @@ -1154,7 +1154,8 @@ void gether_disconnect(struct gether *link) usb_ep_disable(link->out_ep); spin_lock(&dev->req_lock); - list_for_each_entry_safe(req, tmp, &dev->rx_reqs, list) { + while (!list_empty(&dev->rx_reqs)) { + req = list_first_entry(&dev->rx_reqs, struct usb_request, list); list_del(&req->list); spin_unlock(&dev->req_lock); From 397727e77f2f32af322f850b457c1503acdff220 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 28 Nov 2018 11:27:28 +0900 Subject: [PATCH 1033/2608] netfilter: nf_tables: deactivate expressions in rule replecement routine [ Upstream commit ca08987885a147643817d02bf260bc4756ce8cd4 ] There is no expression deactivation call from the rule replacement path, hence, chain counter is not decremented. A few steps to reproduce the problem: %nft add table ip filter %nft add chain ip filter c1 %nft add chain ip filter c1 %nft add rule ip filter c1 jump c2 %nft replace rule ip filter c1 handle 3 accept %nft flush ruleset expression means immediate NFT_JUMP to chain c2. Reference count of chain c2 is increased when the rule is added. When rule is deleted or replaced, the reference counter of c2 should be decreased via nft_rule_expr_deactivate() which calls nft_immediate_deactivate(). Splat looks like: [ 214.396453] WARNING: CPU: 1 PID: 21 at net/netfilter/nf_tables_api.c:1432 nf_tables_chain_destroy.isra.38+0x2f9/0x3a0 [nf_tables] [ 214.398983] Modules linked in: nf_tables nfnetlink [ 214.398983] CPU: 1 PID: 21 Comm: kworker/1:1 Not tainted 4.20.0-rc2+ #44 [ 214.398983] Workqueue: events nf_tables_trans_destroy_work [nf_tables] [ 214.398983] RIP: 0010:nf_tables_chain_destroy.isra.38+0x2f9/0x3a0 [nf_tables] [ 214.398983] Code: 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 8e 00 00 00 48 8b 7b 58 e8 e1 2c 4e c6 48 89 df e8 d9 2c 4e c6 eb 9a <0f> 0b eb 96 0f 0b e9 7e fe ff ff e8 a7 7e 4e c6 e9 a4 fe ff ff e8 [ 214.398983] RSP: 0018:ffff8881152874e8 EFLAGS: 00010202 [ 214.398983] RAX: 0000000000000001 RBX: ffff88810ef9fc28 RCX: ffff8881152876f0 [ 214.398983] RDX: dffffc0000000000 RSI: 1ffff11022a50ede RDI: ffff88810ef9fc78 [ 214.398983] RBP: 1ffff11022a50e9d R08: 0000000080000000 R09: 0000000000000000 [ 214.398983] R10: 0000000000000000 R11: 0000000000000000 R12: 1ffff11022a50eba [ 214.398983] R13: ffff888114446e08 R14: ffff8881152876f0 R15: ffffed1022a50ed6 [ 214.398983] FS: 0000000000000000(0000) GS:ffff888116400000(0000) knlGS:0000000000000000 [ 214.398983] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 214.398983] CR2: 00007fab9bb5f868 CR3: 000000012aa16000 CR4: 00000000001006e0 [ 214.398983] Call Trace: [ 214.398983] ? nf_tables_table_destroy.isra.37+0x100/0x100 [nf_tables] [ 214.398983] ? __kasan_slab_free+0x145/0x180 [ 214.398983] ? nf_tables_trans_destroy_work+0x439/0x830 [nf_tables] [ 214.398983] ? kfree+0xdb/0x280 [ 214.398983] nf_tables_trans_destroy_work+0x5f5/0x830 [nf_tables] [ ... ] Fixes: bb7b40aecbf7 ("netfilter: nf_tables: bogus EBUSY in chain deletions") Reported by: Christoph Anton Mitterer Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=914505 Link: https://bugzilla.kernel.org/show_bug.cgi?id=201791 Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ea1e57daf50e..623ec29ade26 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2400,21 +2400,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } if (nlh->nlmsg_flags & NLM_F_REPLACE) { - if (!nft_is_active_next(net, old_rule)) { - err = -ENOENT; - goto err2; - } - trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE, - old_rule); + trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule); if (trans == NULL) { err = -ENOMEM; goto err2; } - nft_deactivate_next(net, old_rule); - chain->use--; - - if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { - err = -ENOMEM; + err = nft_delrule(&ctx, old_rule); + if (err < 0) { + nft_trans_destroy(trans); goto err2; } From 7b99a0d73be5fdbbdb0a0eef7de211f6359fc210 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Modukuri Date: Mon, 24 Sep 2018 12:02:39 +1000 Subject: [PATCH 1034/2608] cachefiles: Fix page leak in cachefiles_read_backing_file while vmscan is active [ Upstream commit 9a24ce5b66f9c8190d63b15f4473600db4935f1f ] [Description] In a heavily loaded system where the system pagecache is nearing memory limits and fscache is enabled, pages can be leaked by fscache while trying read pages from cachefiles backend. This can happen because two applications can be reading same page from a single mount, two threads can be trying to read the backing page at same time. This results in one of the threads finding that a page for the backing file or netfs file is already in the radix tree. During the error handling cachefiles does not clean up the reference on backing page, leading to page leak. [Fix] The fix is straightforward, to decrement the reference when error is encountered. [dhowells: Note that I've removed the clearance and put of newpage as they aren't attested in the commit message and don't appear to actually achieve anything since a new page is only allocated is newpage!=NULL and any residual new page is cleared before returning.] [Testing] I have tested the fix using following method for 12+ hrs. 1) mkdir -p /mnt/nfs ; mount -o vers=3,fsc :/export /mnt/nfs 2) create 10000 files of 2.8MB in a NFS mount. 3) start a thread to simulate heavy VM presssure (while true ; do echo 3 > /proc/sys/vm/drop_caches ; sleep 1 ; done)& 4) start multiple parallel reader for data set at same time find /mnt/nfs -type f | xargs -P 80 cat > /dev/null & find /mnt/nfs -type f | xargs -P 80 cat > /dev/null & find /mnt/nfs -type f | xargs -P 80 cat > /dev/null & .. .. find /mnt/nfs -type f | xargs -P 80 cat > /dev/null & find /mnt/nfs -type f | xargs -P 80 cat > /dev/null & 5) finally check using cat /proc/fs/fscache/stats | grep -i pages ; free -h , cat /proc/meminfo and page-types -r -b lru to ensure all pages are freed. Reviewed-by: Daniel Axtens Signed-off-by: Shantanu Goel Signed-off-by: Kiran Kumar Modukuri [dja: forward ported to current upstream] Signed-off-by: Daniel Axtens Signed-off-by: David Howells Signed-off-by: Sasha Levin --- fs/cachefiles/rdwr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 199eb396a1bb..54379cf7db7f 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -537,7 +537,10 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, netpage->index, cachefiles_gfp); if (ret < 0) { if (ret == -EEXIST) { + put_page(backpage); + backpage = NULL; put_page(netpage); + netpage = NULL; fscache_retrieval_complete(op, 1); continue; } @@ -610,7 +613,10 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, netpage->index, cachefiles_gfp); if (ret < 0) { if (ret == -EEXIST) { + put_page(backpage); + backpage = NULL; put_page(netpage); + netpage = NULL; fscache_retrieval_complete(op, 1); continue; } From 9966f78ab603ee58909f411c8b034cef4d1b76ae Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Tue, 6 Nov 2018 16:27:12 +0800 Subject: [PATCH 1035/2608] igb: fix uninitialized variables [ Upstream commit e4c39f7926b4de355f7df75651d75003806aae09 ] This patch fixes the variable 'phy_word' may be used uninitialized. Signed-off-by: Yunjian Wang Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/e1000_i210.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c index 07d48f2e3369..6766081f5ab9 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.c +++ b/drivers/net/ethernet/intel/igb/e1000_i210.c @@ -862,6 +862,7 @@ s32 igb_pll_workaround_i210(struct e1000_hw *hw) nvm_word = E1000_INVM_DEFAULT_AL; tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL; igb_write_phy_reg_82580(hw, I347AT4_PAGE_SELECT, E1000_PHY_PLL_FREQ_PAGE); + phy_word = E1000_PHY_PLL_UNCONF; for (i = 0; i < E1000_MAX_PLL_TRIES; i++) { /* check current state directly from internal PHY */ igb_read_phy_reg_82580(hw, E1000_PHY_PLL_FREQ_REG, &phy_word); From 689dee4b72ff02c5e96afee94bac903b7bab5ed0 Mon Sep 17 00:00:00 2001 From: Josh Elsasser Date: Sat, 24 Nov 2018 12:57:33 -0800 Subject: [PATCH 1036/2608] ixgbe: recognize 1000BaseLX SFP modules as 1Gbps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a8bf879af7b1999eba36303ce9cc60e0e7dd816c ] Add the two 1000BaseLX enum values to the X550's check for 1Gbps modules, allowing the core driver code to establish a link over this SFP type. This is done by the out-of-tree driver but the fix wasn't in mainline. Fixes: e23f33367882 ("ixgbe: Fix 1G and 10G link stability for X550EM_x SFP+”) Fixes: 6a14ee0cfb19 ("ixgbe: Add X550 support function pointers") Signed-off-by: Josh Elsasser Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index cf6a245db6d5..a37c951b0753 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -2257,7 +2257,9 @@ static s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw, *autoneg = false; if (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || - hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) { + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1) { *speed = IXGBE_LINK_SPEED_1GB_FULL; return 0; } From 8a69edb8f7db6908910334399a9829f98163252e Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 28 Nov 2018 15:30:24 +0800 Subject: [PATCH 1037/2608] net: hisilicon: remove unexpected free_netdev [ Upstream commit c758940158bf29fe14e9d0f89d5848f227b48134 ] The net device ndev is freed via free_netdev when failing to register the device. The control flow then jumps to the error handling code block. ndev is used and freed again. Resulting in a use-after-free bug. Signed-off-by: Pan Bian Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hip04_eth.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 0cec06bec63e..c27054b8ce81 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -914,10 +914,8 @@ static int hip04_mac_probe(struct platform_device *pdev) } ret = register_netdev(ndev); - if (ret) { - free_netdev(ndev); + if (ret) goto alloc_fail; - } return 0; From cc92ade5f498bb56f2316588c954d7371849b27b Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Thu, 22 Nov 2018 11:45:24 -0500 Subject: [PATCH 1038/2608] drm/amdgpu: Add delay after enable RLC ucode [ Upstream commit ad97d9de45835b6a0f71983b0ae0cffd7306730a ] Driver shouldn't try to access any GFX registers until RLC is idle. During the test, it took 12 seconds for RLC to clear the BUSY bit in RLC_GPM_STAT register which is un-acceptable for driver. As per RLC engineer, it would take RLC Ucode less than 10,000 GFXCLK cycles to finish its critical section. In a lowest 300M enginer clock setting(default from vbios), 50 us delay is enough. This commit fix the hang when RLC introduce the work around for XGMI which requires more cycles to setup more registers than normal Signed-off-by: shaoyunl Acked-by: Felix Kuehling Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3981915e2311..b2eecfc9042e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1992,12 +1992,13 @@ static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) #endif WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); + udelay(50); /* carrizo do enable cp interrupt after cp inited */ - if (!(adev->flags & AMD_IS_APU)) + if (!(adev->flags & AMD_IS_APU)) { gfx_v9_0_enable_gui_idle_interrupt(adev, true); - - udelay(50); + udelay(50); + } #ifdef AMDGPU_RLC_DEBUG_RETRY /* RLC_GPM_GENERAL_6 : RLC Ucode version */ From 6ee9b4deecb18d75a533821864beb6fe598be4fa Mon Sep 17 00:00:00 2001 From: "Y.C. Chen" Date: Thu, 22 Nov 2018 11:56:28 +0800 Subject: [PATCH 1039/2608] drm/ast: fixed reading monitor EDID not stable issue [ Upstream commit 300625620314194d9e6d4f6dda71f2dc9cf62d9f ] v1: over-sample data to increase the stability with some specific monitors v2: refine to avoid infinite loop v3: remove un-necessary "volatile" declaration [airlied: fix two checkpatch warnings] Signed-off-by: Y.C. Chen Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/1542858988-1127-1-git-send-email-yc_chen@aspeedtech.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/ast/ast_mode.c | 36 ++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index fae1176b2472..343867b182dd 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -973,9 +973,21 @@ static int get_clock(void *i2c_priv) { struct ast_i2c_chan *i2c = i2c_priv; struct ast_private *ast = i2c->dev->dev_private; - uint32_t val; + uint32_t val, val2, count, pass; + + count = 0; + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; + do { + val2 = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; + if (val == val2) { + pass++; + } else { + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; + } + } while ((pass < 5) && (count++ < 0x10000)); - val = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4; return val & 1 ? 1 : 0; } @@ -983,9 +995,21 @@ static int get_data(void *i2c_priv) { struct ast_i2c_chan *i2c = i2c_priv; struct ast_private *ast = i2c->dev->dev_private; - uint32_t val; + uint32_t val, val2, count, pass; + + count = 0; + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; + do { + val2 = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; + if (val == val2) { + pass++; + } else { + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; + } + } while ((pass < 5) && (count++ < 0x10000)); - val = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5; return val & 1 ? 1 : 0; } @@ -998,7 +1022,7 @@ static void set_clock(void *i2c_priv, int clock) for (i = 0; i < 0x10000; i++) { ujcrb7 = ((clock & 0x01) ? 0 : 1); - ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xfe, ujcrb7); + ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xf4, ujcrb7); jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x01); if (ujcrb7 == jtemp) break; @@ -1014,7 +1038,7 @@ static void set_data(void *i2c_priv, int data) for (i = 0; i < 0x10000; i++) { ujcrb7 = ((data & 0x01) ? 0 : 1) << 2; - ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xfb, ujcrb7); + ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xf1, ujcrb7); jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x04); if (ujcrb7 == jtemp) break; From f02e0d5fb6695f69c51c38ef5567a5940791521f Mon Sep 17 00:00:00 2001 From: Srikanth Boddepalli Date: Tue, 27 Nov 2018 19:53:27 +0530 Subject: [PATCH 1040/2608] xen: xlate_mmu: add missing header to fix 'W=1' warning [ Upstream commit 72791ac854fea36034fa7976b748fde585008e78 ] Add a missing header otherwise compiler warns about missed prototype: drivers/xen/xlate_mmu.c:183:5: warning: no previous prototype for 'xen_xlate_unmap_gfn_range?' [-Wmissing-prototypes] int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, ^~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Srikanth Boddepalli Reviewed-by: Boris Ostrovsky Reviewed-by: Joey Pabalinas Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/xlate_mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c index 23f1387b3ef7..e7df65d32c91 100644 --- a/drivers/xen/xlate_mmu.c +++ b/drivers/xen/xlate_mmu.c @@ -36,6 +36,7 @@ #include #include +#include #include #include #include From b9c242b4fad864505f679db31531903d1b3779e9 Mon Sep 17 00:00:00 2001 From: Igor Druzhinin Date: Tue, 27 Nov 2018 20:58:21 +0000 Subject: [PATCH 1041/2608] Revert "xen/balloon: Mark unallocated host memory as UNUSABLE" [ Upstream commit 123664101aa2156d05251704fc63f9bcbf77741a ] This reverts commit b3cf8528bb21febb650a7ecbf080d0647be40b9f. That commit unintentionally broke Xen balloon memory hotplug with "hotplug_unpopulated" set to 1. As long as "System RAM" resource got assigned under a new "Unusable memory" resource in IO/Mem tree any attempt to online this memory would fail due to general kernel restrictions on having "System RAM" resources as 1st level only. The original issue that commit has tried to workaround fa564ad96366 ("x86/PCI: Enable a 64bit BAR on AMD Family 15h (Models 00-1f, 30-3f, 60-7f)") also got amended by the following 03a551734 ("x86/PCI: Move and shrink AMD 64-bit window to avoid conflict") which made the original fix to Xen ballooning unnecessary. Signed-off-by: Igor Druzhinin Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- arch/x86/xen/enlighten.c | 78 ---------------------------------------- arch/x86/xen/setup.c | 6 ++-- drivers/xen/balloon.c | 65 +++++---------------------------- include/xen/balloon.h | 5 --- 4 files changed, 13 insertions(+), 141 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index df208af3cd74..515d5e4414c2 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -7,7 +7,6 @@ #include #include -#include #include #include @@ -336,80 +335,3 @@ void xen_arch_unregister_cpu(int num) } EXPORT_SYMBOL(xen_arch_unregister_cpu); #endif - -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG -void __init arch_xen_balloon_init(struct resource *hostmem_resource) -{ - struct xen_memory_map memmap; - int rc; - unsigned int i, last_guest_ram; - phys_addr_t max_addr = PFN_PHYS(max_pfn); - struct e820_table *xen_e820_table; - const struct e820_entry *entry; - struct resource *res; - - if (!xen_initial_domain()) - return; - - xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL); - if (!xen_e820_table) - return; - - memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries); - set_xen_guest_handle(memmap.buffer, xen_e820_table->entries); - rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap); - if (rc) { - pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc); - goto out; - } - - last_guest_ram = 0; - for (i = 0; i < memmap.nr_entries; i++) { - if (xen_e820_table->entries[i].addr >= max_addr) - break; - if (xen_e820_table->entries[i].type == E820_TYPE_RAM) - last_guest_ram = i; - } - - entry = &xen_e820_table->entries[last_guest_ram]; - if (max_addr >= entry->addr + entry->size) - goto out; /* No unallocated host RAM. */ - - hostmem_resource->start = max_addr; - hostmem_resource->end = entry->addr + entry->size; - - /* - * Mark non-RAM regions between the end of dom0 RAM and end of host RAM - * as unavailable. The rest of that region can be used for hotplug-based - * ballooning. - */ - for (; i < memmap.nr_entries; i++) { - entry = &xen_e820_table->entries[i]; - - if (entry->type == E820_TYPE_RAM) - continue; - - if (entry->addr >= hostmem_resource->end) - break; - - res = kzalloc(sizeof(*res), GFP_KERNEL); - if (!res) - goto out; - - res->name = "Unavailable host RAM"; - res->start = entry->addr; - res->end = (entry->addr + entry->size < hostmem_resource->end) ? - entry->addr + entry->size : hostmem_resource->end; - rc = insert_resource(hostmem_resource, res); - if (rc) { - pr_warn("%s: Can't insert [%llx - %llx) (%d)\n", - __func__, res->start, res->end, rc); - kfree(res); - goto out; - } - } - - out: - kfree(xen_e820_table); -} -#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 6e0d2086eacb..c114ca767b3b 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -808,6 +808,7 @@ char * __init xen_memory_setup(void) addr = xen_e820_table.entries[0].addr; size = xen_e820_table.entries[0].size; while (i < xen_e820_table.nr_entries) { + bool discard = false; chunk_size = size; type = xen_e820_table.entries[i].type; @@ -823,10 +824,11 @@ char * __init xen_memory_setup(void) xen_add_extra_mem(pfn_s, n_pfns); xen_max_p2m_pfn = pfn_s + n_pfns; } else - type = E820_TYPE_UNUSABLE; + discard = true; } - xen_align_and_add_e820_region(addr, chunk_size, type); + if (!discard) + xen_align_and_add_e820_region(addr, chunk_size, type); addr += chunk_size; size -= chunk_size; diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 065f0b607373..f77e499afddd 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -257,25 +257,10 @@ static void release_memory_resource(struct resource *resource) kfree(resource); } -/* - * Host memory not allocated to dom0. We can use this range for hotplug-based - * ballooning. - * - * It's a type-less resource. Setting IORESOURCE_MEM will make resource - * management algorithms (arch_remove_reservations()) look into guest e820, - * which we don't want. - */ -static struct resource hostmem_resource = { - .name = "Host RAM", -}; - -void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res) -{} - static struct resource *additional_memory_resource(phys_addr_t size) { - struct resource *res, *res_hostmem; - int ret = -ENOMEM; + struct resource *res; + int ret; res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) @@ -284,42 +269,13 @@ static struct resource *additional_memory_resource(phys_addr_t size) res->name = "System RAM"; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL); - if (res_hostmem) { - /* Try to grab a range from hostmem */ - res_hostmem->name = "Host memory"; - ret = allocate_resource(&hostmem_resource, res_hostmem, - size, 0, -1, - PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); - } - - if (!ret) { - /* - * Insert this resource into iomem. Because hostmem_resource - * tracks portion of guest e820 marked as UNUSABLE noone else - * should try to use it. - */ - res->start = res_hostmem->start; - res->end = res_hostmem->end; - ret = insert_resource(&iomem_resource, res); - if (ret < 0) { - pr_err("Can't insert iomem_resource [%llx - %llx]\n", - res->start, res->end); - release_memory_resource(res_hostmem); - res_hostmem = NULL; - res->start = res->end = 0; - } - } - - if (ret) { - ret = allocate_resource(&iomem_resource, res, - size, 0, -1, - PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); - if (ret < 0) { - pr_err("Cannot allocate new System RAM resource\n"); - kfree(res); - return NULL; - } + ret = allocate_resource(&iomem_resource, res, + size, 0, -1, + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); + if (ret < 0) { + pr_err("Cannot allocate new System RAM resource\n"); + kfree(res); + return NULL; } #ifdef CONFIG_SPARSEMEM @@ -331,7 +287,6 @@ static struct resource *additional_memory_resource(phys_addr_t size) pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n", pfn, limit); release_memory_resource(res); - release_memory_resource(res_hostmem); return NULL; } } @@ -810,8 +765,6 @@ static int __init balloon_init(void) set_online_page_callback(&xen_online_page); register_memory_notifier(&xen_memory_nb); register_sysctl_table(xen_root); - - arch_xen_balloon_init(&hostmem_resource); #endif #ifdef CONFIG_XEN_PV diff --git a/include/xen/balloon.h b/include/xen/balloon.h index 61f410fd74e4..4914b93a23f2 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -44,8 +44,3 @@ static inline void xen_balloon_init(void) { } #endif - -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG -struct resource; -void arch_xen_balloon_init(struct resource *hostmem_resource); -#endif From b718d6bede4bd6c8c6faac1369567f3d7587d4c1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 1 Nov 2018 16:17:22 -0700 Subject: [PATCH 1042/2608] pstore/ram: Correctly calculate usable PRZ bytes [ Upstream commit 89d328f637b9904b6d4c9af73c8a608b8dd4d6f8 ] The actual number of bytes stored in a PRZ is smaller than the bytes requested by platform data, since there is a header on each PRZ. Additionally, if ECC is enabled, there are trailing bytes used as well. Normally this mismatch doesn't matter since PRZs are circular buffers and the leading "overflow" bytes are just thrown away. However, in the case of a compressed record, this rather badly corrupts the results. This corruption was visible with "ramoops.mem_size=204800 ramoops.ecc=1". Any stored crashes would not be uncompressable (producing a pstorefs "dmesg-*.enc.z" file), and triggering errors at boot: [ 2.790759] pstore: crypto_comp_decompress failed, ret = -22! Backporting this depends on commit 70ad35db3321 ("pstore: Convert console write to use ->write_buf") Reported-by: Joel Fernandes Fixes: b0aad7a99c1d ("pstore: Add compression support to pstore") Signed-off-by: Kees Cook Reviewed-by: Joel Fernandes (Google) Signed-off-by: Sasha Levin --- fs/pstore/ram.c | 15 ++++++--------- include/linux/pstore.h | 5 ++++- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 7125b398d312..9f7e546d7050 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -804,17 +804,14 @@ static int ramoops_probe(struct platform_device *pdev) cxt->pstore.data = cxt; /* - * Console can handle any buffer size, so prefer LOG_LINE_MAX. If we - * have to handle dumps, we must have at least record_size buffer. And - * for ftrace, bufsize is irrelevant (if bufsize is 0, buf will be - * ZERO_SIZE_PTR). + * Since bufsize is only used for dmesg crash dumps, it + * must match the size of the dprz record (after PRZ header + * and ECC bytes have been accounted for). */ - if (cxt->console_size) - cxt->pstore.bufsize = 1024; /* LOG_LINE_MAX */ - cxt->pstore.bufsize = max(cxt->record_size, cxt->pstore.bufsize); - cxt->pstore.buf = kmalloc(cxt->pstore.bufsize, GFP_KERNEL); + cxt->pstore.bufsize = cxt->dprzs[0]->buffer_size; + cxt->pstore.buf = kzalloc(cxt->pstore.bufsize, GFP_KERNEL); if (!cxt->pstore.buf) { - pr_err("cannot allocate pstore buffer\n"); + pr_err("cannot allocate pstore crash dump buffer\n"); err = -ENOMEM; goto fail_clear; } diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 61f806a7fe29..170bb981d2fd 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -90,7 +90,10 @@ struct pstore_record { * * @buf_lock: spinlock to serialize access to @buf * @buf: preallocated crash dump buffer - * @bufsize: size of @buf available for crash dump writes + * @bufsize: size of @buf available for crash dump bytes (must match + * smallest number of bytes available for writing to a + * backend entry, since compressed bytes don't take kindly + * to being truncated) * * @read_mutex: serializes @open, @read, @close, and @erase callbacks * @flags: bitfield of frontends the backend can accept writes for From 38026d1afcc1153aa6efebaeac7fc2a31c2779d0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 26 Oct 2018 17:16:29 +1100 Subject: [PATCH 1043/2608] fscache: fix race between enablement and dropping of object [ Upstream commit c5a94f434c82529afda290df3235e4d85873c5b4 ] It was observed that a process blocked indefintely in __fscache_read_or_alloc_page(), waiting for FSCACHE_COOKIE_LOOKING_UP to be cleared via fscache_wait_for_deferred_lookup(). At this time, ->backing_objects was empty, which would normaly prevent __fscache_read_or_alloc_page() from getting to the point of waiting. This implies that ->backing_objects was cleared *after* __fscache_read_or_alloc_page was was entered. When an object is "killed" and then "dropped", FSCACHE_COOKIE_LOOKING_UP is cleared in fscache_lookup_failure(), then KILL_OBJECT and DROP_OBJECT are "called" and only in DROP_OBJECT is ->backing_objects cleared. This leaves a window where something else can set FSCACHE_COOKIE_LOOKING_UP and __fscache_read_or_alloc_page() can start waiting, before ->backing_objects is cleared There is some uncertainty in this analysis, but it seems to be fit the observations. Adding the wake in this patch will be handled correctly by __fscache_read_or_alloc_page(), as it checks if ->backing_objects is empty again, after waiting. Customer which reported the hang, also report that the hang cannot be reproduced with this fix. The backtrace for the blocked process looked like: PID: 29360 TASK: ffff881ff2ac0f80 CPU: 3 COMMAND: "zsh" #0 [ffff881ff43efbf8] schedule at ffffffff815e56f1 #1 [ffff881ff43efc58] bit_wait at ffffffff815e64ed #2 [ffff881ff43efc68] __wait_on_bit at ffffffff815e61b8 #3 [ffff881ff43efca0] out_of_line_wait_on_bit at ffffffff815e625e #4 [ffff881ff43efd08] fscache_wait_for_deferred_lookup at ffffffffa04f2e8f [fscache] #5 [ffff881ff43efd18] __fscache_read_or_alloc_page at ffffffffa04f2ffe [fscache] #6 [ffff881ff43efd58] __nfs_readpage_from_fscache at ffffffffa0679668 [nfs] #7 [ffff881ff43efd78] nfs_readpage at ffffffffa067092b [nfs] #8 [ffff881ff43efda0] generic_file_read_iter at ffffffff81187a73 #9 [ffff881ff43efe50] nfs_file_read at ffffffffa066544b [nfs] #10 [ffff881ff43efe70] __vfs_read at ffffffff811fc756 #11 [ffff881ff43efee8] vfs_read at ffffffff811fccfa #12 [ffff881ff43eff18] sys_read at ffffffff811fda62 #13 [ffff881ff43eff50] entry_SYSCALL_64_fastpath at ffffffff815e986e Signed-off-by: NeilBrown Signed-off-by: David Howells Signed-off-by: Sasha Levin --- fs/fscache/object.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 7a182c87f378..ab1d7f35f6c2 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -715,6 +715,9 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob if (awaken) wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); + if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); + /* Prevent a race with our last child, which has to signal EV_CLEARED * before dropping our spinlock. From 0859bb2526fb3725d2b53142e67be38e12fca72b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 17 Jul 2018 09:53:42 +0100 Subject: [PATCH 1044/2608] fscache, cachefiles: remove redundant variable 'cache' [ Upstream commit 31ffa563833576bd49a8bf53120568312755e6e2 ] Variable 'cache' is being assigned but is never used hence it is redundant and can be removed. Cleans up clang warning: warning: variable 'cache' set but not used [-Wunused-but-set-variable] Signed-off-by: Colin Ian King Signed-off-by: David Howells Signed-off-by: Sasha Levin --- fs/cachefiles/rdwr.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 54379cf7db7f..5e9176ec0d3a 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -969,11 +969,8 @@ error: void cachefiles_uncache_page(struct fscache_object *_object, struct page *page) { struct cachefiles_object *object; - struct cachefiles_cache *cache; object = container_of(_object, struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); _enter("%p,{%lu}", object, page->index); From 9407fd14d0ee929ea21571055ccf4361b6e2323a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 21 Nov 2018 15:17:37 -0800 Subject: [PATCH 1045/2608] nvme: flush namespace scanning work just before removing namespaces [ Upstream commit f6c8e432cb0479255322c5d0335b9f1699a0270c ] nvme_stop_ctrl can be called also for reset flow and there is no need to flush the scan_work as namespaces are not being removed. This can cause deadlock in rdma, fc and loop drivers since nvme_stop_ctrl barriers before controller teardown (and specifically I/O cancellation of the scan_work itself) takes place, but the scan_work will be blocked anyways so there is no need to flush it. Instead, move scan_work flush to nvme_remove_namespaces() where it really needs to flush. Reported-by: Ming Lei Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Reviewed by: James Smart Tested-by: Ewan D. Milne Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3a63d58d2ca9..65f3f1a34b6b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2572,6 +2572,9 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) { struct nvme_ns *ns, *next; + /* prevent racing with ns scanning */ + flush_work(&ctrl->scan_work); + /* * The dead states indicates the controller was not gracefully * disconnected. In that case, we won't be able to flush any data while @@ -2743,7 +2746,6 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_stop_keep_alive(ctrl); flush_work(&ctrl->async_event_work); - flush_work(&ctrl->scan_work); cancel_work_sync(&ctrl->fw_act_work); } EXPORT_SYMBOL_GPL(nvme_stop_ctrl); From 8e3f3c06be02aaf0b09f11eafc3e2f567bdbd6d2 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 29 Nov 2018 09:55:59 +0000 Subject: [PATCH 1046/2608] ACPI/IORT: Fix iort_get_platform_device_domain() uninitialized pointer value [ Upstream commit ea2412dc21cc790335d319181dddc43682aef164 ] Running the Clang static analyzer on IORT code detected the following error: Logic error: Branch condition evaluates to a garbage value in iort_get_platform_device_domain() If the named component associated with a given device has no IORT mappings, iort_get_platform_device_domain() exits its MSI mapping loop with msi_parent pointer containing garbage, which can lead to erroneous code path execution. Initialize the msi_parent pointer, fixing the bug. Fixes: d4f54a186667 ("ACPI: platform: setup MSI domain for ACPI based platform device") Reported-by: Patrick Bellasi Reviewed-by: Hanjun Guo Acked-by: Will Deacon Cc: Sudeep Holla Cc: "Rafael J. Wysocki" Signed-off-by: Lorenzo Pieralisi Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- drivers/acpi/arm64/iort.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index de56394dd161..ca414910710e 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -547,7 +547,7 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id) */ static struct irq_domain *iort_get_platform_device_domain(struct device *dev) { - struct acpi_iort_node *node, *msi_parent; + struct acpi_iort_node *node, *msi_parent = NULL; struct fwnode_handle *iort_fwnode; struct acpi_iort_its_group *its; int i; From df2055c0eb8af72790060d6797cac9be56622412 Mon Sep 17 00:00:00 2001 From: Larry Chen Date: Fri, 30 Nov 2018 14:08:56 -0800 Subject: [PATCH 1047/2608] ocfs2: fix deadlock caused by ocfs2_defrag_extent() [ Upstream commit e21e57445a64598b29a6f629688f9b9a39e7242a ] ocfs2_defrag_extent may fall into deadlock. ocfs2_ioctl_move_extents ocfs2_ioctl_move_extents ocfs2_move_extents ocfs2_defrag_extent ocfs2_lock_allocators_move_extents ocfs2_reserve_clusters inode_lock GLOBAL_BITMAP_SYSTEM_INODE __ocfs2_flush_truncate_log inode_lock GLOBAL_BITMAP_SYSTEM_INODE As backtrace shows above, ocfs2_reserve_clusters() will call inode_lock against the global bitmap if local allocator has not sufficient cluters. Once global bitmap could meet the demand, ocfs2_reserve_cluster will return success with global bitmap locked. After ocfs2_reserve_cluster(), if truncate log is full, __ocfs2_flush_truncate_log() will definitely fall into deadlock because it needs to inode_lock global bitmap, which has already been locked. To fix this bug, we could remove from ocfs2_lock_allocators_move_extents() the code which intends to lock global allocator, and put the removed code after __ocfs2_flush_truncate_log(). ocfs2_lock_allocators_move_extents() is referred by 2 places, one is here, the other does not need the data allocator context, which means this patch does not affect the caller so far. Link: http://lkml.kernel.org/r/20181101071422.14470-1-lchen@suse.com Signed-off-by: Larry Chen Reviewed-by: Changwei Ge Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/move_extents.c | 47 +++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 7eb3b0a6347e..f55f82ca3425 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -156,18 +156,14 @@ out: } /* - * lock allocators, and reserving appropriate number of bits for - * meta blocks and data clusters. - * - * in some cases, we don't need to reserve clusters, just let data_ac - * be NULL. + * lock allocator, and reserve appropriate number of bits for + * meta blocks. */ -static int ocfs2_lock_allocators_move_extents(struct inode *inode, +static int ocfs2_lock_meta_allocator_move_extents(struct inode *inode, struct ocfs2_extent_tree *et, u32 clusters_to_move, u32 extents_to_split, struct ocfs2_alloc_context **meta_ac, - struct ocfs2_alloc_context **data_ac, int extra_blocks, int *credits) { @@ -192,13 +188,6 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode, goto out; } - if (data_ac) { - ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac); - if (ret) { - mlog_errno(ret); - goto out; - } - } *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el); @@ -257,10 +246,10 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, } } - ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1, - &context->meta_ac, - &context->data_ac, - extra_blocks, &credits); + ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et, + *len, 1, + &context->meta_ac, + extra_blocks, &credits); if (ret) { mlog_errno(ret); goto out; @@ -283,6 +272,21 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, } } + /* + * Make sure ocfs2_reserve_cluster is called after + * __ocfs2_flush_truncate_log, otherwise, dead lock may happen. + * + * If ocfs2_reserve_cluster is called + * before __ocfs2_flush_truncate_log, dead lock on global bitmap + * may happen. + * + */ + ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac); + if (ret) { + mlog_errno(ret); + goto out_unlock_mutex; + } + handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -600,9 +604,10 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, } } - ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1, - &context->meta_ac, - NULL, extra_blocks, &credits); + ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et, + len, 1, + &context->meta_ac, + extra_blocks, &credits); if (ret) { mlog_errno(ret); goto out; From c7aafad098b066ceee0d6a3b2ba63573ef09dc38 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 30 Nov 2018 14:09:07 -0800 Subject: [PATCH 1048/2608] mm/page_alloc.c: fix calculation of pgdat->nr_zones [ Upstream commit 8f416836c0d50b198cad1225132e5abebf8980dc ] init_currently_empty_zone() will adjust pgdat->nr_zones and set it to 'zone_idx(zone) + 1' unconditionally. This is correct in the normal case, while not exact in hot-plug situation. This function is used in two places: * free_area_init_core() * move_pfn_range_to_zone() In the first case, we are sure zone index increase monotonically. While in the second one, this is under users control. One way to reproduce this is: ---------------------------- 1. create a virtual machine with empty node1 -m 4G,slots=32,maxmem=32G \ -smp 4,maxcpus=8 \ -numa node,nodeid=0,mem=4G,cpus=0-3 \ -numa node,nodeid=1,mem=0G,cpus=4-7 2. hot-add cpu 3-7 cpu-add [3-7] 2. hot-add memory to nod1 object_add memory-backend-ram,id=ram0,size=1G device_add pc-dimm,id=dimm0,memdev=ram0,node=1 3. online memory with following order echo online_movable > memory47/state echo online > memory40/state After this, node1 will have its nr_zones equals to (ZONE_NORMAL + 1) instead of (ZONE_MOVABLE + 1). Michal said: "Having an incorrect nr_zones might result in all sorts of problems which would be quite hard to debug (e.g. reclaim not considering the movable zone). I do not expect many users would suffer from this it but still this is trivial and obviously right thing to do so backporting to the stable tree shouldn't be harmful (last famous words)" Link: http://lkml.kernel.org/r/20181117022022.9956-1-richard.weiyang@gmail.com Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") Signed-off-by: Wei Yang Acked-by: Michal Hocko Reviewed-by: Oscar Salvador Cc: Anshuman Khandual Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/page_alloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6be91a1a00d9..a2f365f40433 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5544,8 +5544,10 @@ void __meminit init_currently_empty_zone(struct zone *zone, unsigned long size) { struct pglist_data *pgdat = zone->zone_pgdat; + int zone_idx = zone_idx(zone) + 1; - pgdat->nr_zones = zone_idx(zone) + 1; + if (zone_idx > pgdat->nr_zones) + pgdat->nr_zones = zone_idx; zone->zone_start_pfn = zone_start_pfn; From 5ee5fa61d04e2bd8ab004f307bd6ad0808482746 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Fri, 30 Nov 2018 14:09:14 -0800 Subject: [PATCH 1049/2608] hfs: do not free node before using [ Upstream commit ce96a407adef126870b3f4a1b73529dd8aa80f49 ] hfs_bmap_free() frees the node via hfs_bnode_put(node). However, it then reads node->this when dumping error message on an error path, which may result in a use-after-free bug. This patch frees the node only when it is never again used. Link: http://lkml.kernel.org/r/1542963889-128825-1-git-send-email-bianpan2016@163.com Fixes: a1185ffa2fc ("HFS rewrite") Signed-off-by: Pan Bian Reviewed-by: Andrew Morton Cc: Joe Perches Cc: Ernesto A. Fernandez Cc: Viacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfs/btree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 374b5688e29e..9bdff5e40626 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -329,13 +329,14 @@ void hfs_bmap_free(struct hfs_bnode *node) nidx -= len * 8; i = node->next; - hfs_bnode_put(node); if (!i) { /* panic */; pr_crit("unable to free bnode %u. bmap not found!\n", node->this); + hfs_bnode_put(node); return; } + hfs_bnode_put(node); node = hfs_bnode_find(tree, i); if (IS_ERR(node)) return; From 95c8714e3fa0bee316600c46fd007fec2750206b Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Fri, 30 Nov 2018 14:09:18 -0800 Subject: [PATCH 1050/2608] hfsplus: do not free node before using [ Upstream commit c7d7d620dcbd2a1c595092280ca943f2fced7bbd ] hfs_bmap_free() frees node via hfs_bnode_put(node). However it then reads node->this when dumping error message on an error path, which may result in a use-after-free bug. This patch frees node only when it is never used. Link: http://lkml.kernel.org/r/1543053441-66942-1-git-send-email-bianpan2016@163.com Signed-off-by: Pan Bian Reviewed-by: Andrew Morton Cc: Ernesto A. Fernandez Cc: Joe Perches Cc: Viacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfsplus/btree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index de14b2b6881b..3de3bc4918b5 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -454,14 +454,15 @@ void hfs_bmap_free(struct hfs_bnode *node) nidx -= len * 8; i = node->next; - hfs_bnode_put(node); if (!i) { /* panic */; pr_crit("unable to free bnode %u. " "bmap not found!\n", node->this); + hfs_bnode_put(node); return; } + hfs_bnode_put(node); node = hfs_bnode_find(tree, i); if (IS_ERR(node)) return; From 225b11374625aa4827fc1ee0b810c27dfe851809 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 30 Nov 2018 14:09:48 -0800 Subject: [PATCH 1051/2608] debugobjects: avoid recursive calls with kmemleak [ Upstream commit 8de456cf87ba863e028c4dd01bae44255ce3d835 ] CONFIG_DEBUG_OBJECTS_RCU_HEAD does not play well with kmemleak due to recursive calls. fill_pool kmemleak_ignore make_black_object put_object __call_rcu (kernel/rcu/tree.c) debug_rcu_head_queue debug_object_activate debug_object_init fill_pool kmemleak_ignore make_black_object ... So add SLAB_NOLEAKTRACE to kmem_cache_create() to not register newly allocated debug objects at all. Link: http://lkml.kernel.org/r/20181126165343.2339-1-cai@gmx.us Signed-off-by: Qian Cai Suggested-by: Catalin Marinas Acked-by: Waiman Long Acked-by: Catalin Marinas Cc: Thomas Gleixner Cc: Yang Shi Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/debugobjects.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 99308479b1c8..bacb00a9cd9f 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -111,7 +111,6 @@ static void fill_pool(void) if (!new) return; - kmemleak_ignore(new); raw_spin_lock_irqsave(&pool_lock, flags); hlist_add_head(&new->node, &obj_pool); debug_objects_allocated++; @@ -1085,7 +1084,6 @@ static int __init debug_objects_replace_static_objects(void) obj = kmem_cache_zalloc(obj_cache, GFP_KERNEL); if (!obj) goto free; - kmemleak_ignore(obj); hlist_add_head(&obj->node, &objects); } @@ -1141,7 +1139,8 @@ void __init debug_objects_mem_init(void) obj_cache = kmem_cache_create("debug_objects_cache", sizeof (struct debug_obj), 0, - SLAB_DEBUG_OBJECTS, NULL); + SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE, + NULL); if (!obj_cache || debug_objects_replace_static_objects()) { debug_objects_enabled = 0; From 56926f913bf8284f0d33d79cdf0421f0caec8a1f Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Fri, 30 Nov 2018 14:10:54 -0800 Subject: [PATCH 1052/2608] ocfs2: fix potential use after free [ Upstream commit 164f7e586739d07eb56af6f6d66acebb11f315c8 ] ocfs2_get_dentry() calls iput(inode) to drop the reference count of inode, and if the reference count hits 0, inode is freed. However, in this function, it then reads inode->i_generation, which may result in a use after free bug. Move the put operation later. Link: http://lkml.kernel.org/r/1543109237-110227-1-git-send-email-bianpan2016@163.com Fixes: 781f200cb7a("ocfs2: Remove masklog ML_EXPORT.") Signed-off-by: Pan Bian Reviewed-by: Andrew Morton Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 9f88188060db..4bf8d5854b27 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -125,10 +125,10 @@ check_err: check_gen: if (handle->ih_generation != inode->i_generation) { - iput(inode); trace_ocfs2_get_dentry_generation((unsigned long long)blkno, handle->ih_generation, inode->i_generation); + iput(inode); result = ERR_PTR(-ESTALE); goto bail; } From 62582f67ff7617313b27b897ed02b569b712dcac Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 13 Dec 2018 09:24:35 -0500 Subject: [PATCH 1053/2608] Revert "printk: Never set console_may_schedule in console_trylock()" This reverts commit c9b8d580b3fb0ab65d37c372aef19a318fda3199. This is just a technical revert to make the printk fix apply cleanly, this patch will be re-picked in about 3 commits. --- kernel/printk/printk.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a9cf2e15f6a3..7161312593dd 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1762,12 +1762,6 @@ asmlinkage int vprintk_emit(int facility, int level, /* If called from the scheduler, we can not call up(). */ if (!in_sched) { - /* - * Disable preemption to avoid being preempted while holding - * console_sem which would prevent anyone from printing to - * console - */ - preempt_disable(); /* * Try to acquire and then immediately release the console * semaphore. The release will print out buffers and wake up @@ -1775,7 +1769,6 @@ asmlinkage int vprintk_emit(int facility, int level, */ if (console_trylock()) console_unlock(); - preempt_enable(); } return printed_len; @@ -2090,7 +2083,20 @@ int console_trylock(void) return 0; } console_locked = 1; - console_may_schedule = 0; + /* + * When PREEMPT_COUNT disabled we can't reliably detect if it's + * safe to schedule (e.g. calling printk while holding a spin_lock), + * because preempt_disable()/preempt_enable() are just barriers there + * and preempt_count() is always 0. + * + * RCU read sections have a separate preemption counter when + * PREEMPT_RCU enabled thus we must take extra care and check + * rcu_preempt_depth(), otherwise RCU read sections modify + * preempt_count(). + */ + console_may_schedule = !oops_in_progress && + preemptible() && + !rcu_preempt_depth(); return 1; } EXPORT_SYMBOL(console_trylock); From 594231141b003939444a1202a02992b3d0adb23e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 10 Jan 2018 14:24:17 +0100 Subject: [PATCH 1054/2608] printk: Add console owner and waiter logic to load balance console writes [ Upstream commit dbdda842fe96f8932bae554f0adf463c27c42bc7 ] This patch implements what I discussed in Kernel Summit. I added lockdep annotation (hopefully correctly), and it hasn't had any splats (since I fixed some bugs in the first iterations). It did catch problems when I had the owner covering too much. But now that the owner is only set when actively calling the consoles, lockdep has stayed quiet. Here's the design again: I added a "console_owner" which is set to a task that is actively writing to the consoles. It is *not* the same as the owner of the console_lock. It is only set when doing the calls to the console functions. It is protected by a console_owner_lock which is a raw spin lock. There is a console_waiter. This is set when there is an active console owner that is not current, and waiter is not set. This too is protected by console_owner_lock. In printk() when it tries to write to the consoles, we have: if (console_trylock()) console_unlock(); Now I added an else, which will check if there is an active owner, and no current waiter. If that is the case, then console_waiter is set, and the task goes into a spin until it is no longer set. When the active console owner finishes writing the current message to the consoles, it grabs the console_owner_lock and sees if there is a waiter, and clears console_owner. If there is a waiter, then it breaks out of the loop, clears the waiter flag (because that will release the waiter from its spin), and exits. Note, it does *not* release the console semaphore. Because it is a semaphore, there is no owner. Another task may release it. This means that the waiter is guaranteed to be the new console owner! Which it becomes. Then the waiter calls console_unlock() and continues to write to the consoles. If another task comes along and does a printk() it too can become the new waiter, and we wash rinse and repeat! By Petr Mladek about possible new deadlocks: The thing is that we move console_sem only to printk() call that normally calls console_unlock() as well. It means that the transferred owner should not bring new type of dependencies. As Steven said somewhere: "If there is a deadlock, it was there even before." We could look at it from this side. The possible deadlock would look like: CPU0 CPU1 console_unlock() console_owner = current; spin_lockA() printk() spin = true; while (...) call_console_drivers() spin_lockA() This would be a deadlock. CPU0 would wait for the lock A. While CPU1 would own the lockA and would wait for CPU0 to finish calling the console drivers and pass the console_sem owner. But if the above is true than the following scenario was already possible before: CPU0 spin_lockA() printk() console_unlock() call_console_drivers() spin_lockA() By other words, this deadlock was there even before. Such deadlocks are prevented by using printk_deferred() in the sections guarded by the lock A. By Steven Rostedt: To demonstrate the issue, this module has been shown to lock up a system with 4 CPUs and a slow console (like a serial console). It is also able to lock up a 8 CPU system with only a fast (VGA) console, by passing in "loops=100". The changes in this commit prevent this module from locking up the system. #include #include #include #include #include #include static bool stop_testing; static unsigned int loops = 1; static void preempt_printk_workfn(struct work_struct *work) { int i; while (!READ_ONCE(stop_testing)) { for (i = 0; i < loops && !READ_ONCE(stop_testing); i++) { preempt_disable(); pr_emerg("%5d%-75s\n", smp_processor_id(), " XXX NOPREEMPT"); preempt_enable(); } msleep(1); } } static struct work_struct __percpu *works; static void finish(void) { int cpu; WRITE_ONCE(stop_testing, true); for_each_online_cpu(cpu) flush_work(per_cpu_ptr(works, cpu)); free_percpu(works); } static int __init test_init(void) { int cpu; works = alloc_percpu(struct work_struct); if (!works) return -ENOMEM; /* * This is just a test module. This will break if you * do any CPU hot plugging between loading and * unloading the module. */ for_each_online_cpu(cpu) { struct work_struct *work = per_cpu_ptr(works, cpu); INIT_WORK(work, &preempt_printk_workfn); schedule_work_on(cpu, work); } return 0; } static void __exit test_exit(void) { finish(); } module_param(loops, uint, 0); module_init(test_init); module_exit(test_exit); MODULE_LICENSE("GPL"); Link: http://lkml.kernel.org/r/20180110132418.7080-2-pmladek@suse.com Cc: akpm@linux-foundation.org Cc: linux-mm@kvack.org Cc: Cong Wang Cc: Dave Hansen Cc: Johannes Weiner Cc: Mel Gorman Cc: Michal Hocko Cc: Vlastimil Babka Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jan Kara Cc: Mathieu Desnoyers Cc: Tetsuo Handa Cc: Byungchul Park Cc: Tejun Heo Cc: Pavel Machek Cc: linux-kernel@vger.kernel.org Signed-off-by: Steven Rostedt (VMware) [pmladek@suse.com: Commit message about possible deadlocks] Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin --- kernel/printk/printk.c | 108 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 7161312593dd..b88b402444d6 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -86,8 +86,15 @@ EXPORT_SYMBOL_GPL(console_drivers); static struct lockdep_map console_lock_dep_map = { .name = "console_lock" }; +static struct lockdep_map console_owner_dep_map = { + .name = "console_owner" +}; #endif +static DEFINE_RAW_SPINLOCK(console_owner_lock); +static struct task_struct *console_owner; +static bool console_waiter; + enum devkmsg_log_bits { __DEVKMSG_LOG_BIT_ON = 0, __DEVKMSG_LOG_BIT_OFF, @@ -1767,8 +1774,56 @@ asmlinkage int vprintk_emit(int facility, int level, * semaphore. The release will print out buffers and wake up * /dev/kmsg and syslog() users. */ - if (console_trylock()) + if (console_trylock()) { console_unlock(); + } else { + struct task_struct *owner = NULL; + bool waiter; + bool spin = false; + + printk_safe_enter_irqsave(flags); + + raw_spin_lock(&console_owner_lock); + owner = READ_ONCE(console_owner); + waiter = READ_ONCE(console_waiter); + if (!waiter && owner && owner != current) { + WRITE_ONCE(console_waiter, true); + spin = true; + } + raw_spin_unlock(&console_owner_lock); + + /* + * If there is an active printk() writing to the + * consoles, instead of having it write our data too, + * see if we can offload that load from the active + * printer, and do some printing ourselves. + * Go into a spin only if there isn't already a waiter + * spinning, and there is an active printer, and + * that active printer isn't us (recursive printk?). + */ + if (spin) { + /* We spin waiting for the owner to release us */ + spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); + /* Owner will clear console_waiter on hand off */ + while (READ_ONCE(console_waiter)) + cpu_relax(); + + spin_release(&console_owner_dep_map, 1, _THIS_IP_); + printk_safe_exit_irqrestore(flags); + + /* + * The owner passed the console lock to us. + * Since we did not spin on console lock, annotate + * this as a trylock. Otherwise lockdep will + * complain. + */ + mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); + console_unlock(); + printk_safe_enter_irqsave(flags); + } + printk_safe_exit_irqrestore(flags); + + } } return printed_len; @@ -2155,6 +2210,7 @@ void console_unlock(void) static u64 seen_seq; unsigned long flags; bool wake_klogd = false; + bool waiter = false; bool do_cond_resched, retry; if (console_suspended) { @@ -2243,14 +2299,64 @@ skip: console_seq++; raw_spin_unlock(&logbuf_lock); + /* + * While actively printing out messages, if another printk() + * were to occur on another CPU, it may wait for this one to + * finish. This task can not be preempted if there is a + * waiter waiting to take over. + */ + raw_spin_lock(&console_owner_lock); + console_owner = current; + raw_spin_unlock(&console_owner_lock); + + /* The waiter may spin on us after setting console_owner */ + spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); + stop_critical_timings(); /* don't trace print latency */ call_console_drivers(ext_text, ext_len, text, len); start_critical_timings(); + + raw_spin_lock(&console_owner_lock); + waiter = READ_ONCE(console_waiter); + console_owner = NULL; + raw_spin_unlock(&console_owner_lock); + + /* + * If there is a waiter waiting for us, then pass the + * rest of the work load over to that waiter. + */ + if (waiter) + break; + + /* There was no waiter, and nothing will spin on us here */ + spin_release(&console_owner_dep_map, 1, _THIS_IP_); + printk_safe_exit_irqrestore(flags); if (do_cond_resched) cond_resched(); } + + /* + * If there is an active waiter waiting on the console_lock. + * Pass off the printing to the waiter, and the waiter + * will continue printing on its CPU, and when all writing + * has finished, the last printer will wake up klogd. + */ + if (waiter) { + WRITE_ONCE(console_waiter, false); + /* The waiter is now free to continue */ + spin_release(&console_owner_dep_map, 1, _THIS_IP_); + /* + * Hand off console_lock to waiter. The waiter will perform + * the up(). After this, the waiter is the console_lock owner. + */ + mutex_release(&console_lock_dep_map, 1, _THIS_IP_); + printk_safe_exit_irqrestore(flags); + /* Note, if waiter is set, logbuf_lock is not held */ + return; + } + console_locked = 0; /* Release the exclusive_console once it is used */ From ef433725f5f054e44c11d40f2d80f35824351697 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 12 Jan 2018 17:08:37 +0100 Subject: [PATCH 1055/2608] printk: Hide console waiter logic into helpers [ Upstream commit c162d5b4338d72deed61aa65ed0f2f4ba2bbc8ab ] The commit ("printk: Add console owner and waiter logic to load balance console writes") made vprintk_emit() and console_unlock() even more complicated. This patch extracts the new code into 3 helper functions. They should help to keep it rather self-contained. It will be easier to use and maintain. This patch just shuffles the existing code. It does not change the functionality. Link: http://lkml.kernel.org/r/20180112160837.GD24497@linux.suse Cc: akpm@linux-foundation.org Cc: linux-mm@kvack.org Cc: Cong Wang Cc: Dave Hansen Cc: Johannes Weiner Cc: Mel Gorman Cc: Michal Hocko Cc: Vlastimil Babka Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jan Kara Cc: Mathieu Desnoyers Cc: Tetsuo Handa Cc: rostedt@home.goodmis.org Cc: Byungchul Park Cc: Tejun Heo Cc: Pavel Machek Cc: linux-kernel@vger.kernel.org Reviewed-by: Steven Rostedt (VMware) Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin --- kernel/printk/printk.c | 245 +++++++++++++++++++++++++---------------- 1 file changed, 148 insertions(+), 97 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b88b402444d6..2d1c2700bd85 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -86,15 +86,8 @@ EXPORT_SYMBOL_GPL(console_drivers); static struct lockdep_map console_lock_dep_map = { .name = "console_lock" }; -static struct lockdep_map console_owner_dep_map = { - .name = "console_owner" -}; #endif -static DEFINE_RAW_SPINLOCK(console_owner_lock); -static struct task_struct *console_owner; -static bool console_waiter; - enum devkmsg_log_bits { __DEVKMSG_LOG_BIT_ON = 0, __DEVKMSG_LOG_BIT_OFF, @@ -1555,6 +1548,146 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) return do_syslog(type, buf, len, SYSLOG_FROM_READER); } +/* + * Special console_lock variants that help to reduce the risk of soft-lockups. + * They allow to pass console_lock to another printk() call using a busy wait. + */ + +#ifdef CONFIG_LOCKDEP +static struct lockdep_map console_owner_dep_map = { + .name = "console_owner" +}; +#endif + +static DEFINE_RAW_SPINLOCK(console_owner_lock); +static struct task_struct *console_owner; +static bool console_waiter; + +/** + * console_lock_spinning_enable - mark beginning of code where another + * thread might safely busy wait + * + * This basically converts console_lock into a spinlock. This marks + * the section where the console_lock owner can not sleep, because + * there may be a waiter spinning (like a spinlock). Also it must be + * ready to hand over the lock at the end of the section. + */ +static void console_lock_spinning_enable(void) +{ + raw_spin_lock(&console_owner_lock); + console_owner = current; + raw_spin_unlock(&console_owner_lock); + + /* The waiter may spin on us after setting console_owner */ + spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); +} + +/** + * console_lock_spinning_disable_and_check - mark end of code where another + * thread was able to busy wait and check if there is a waiter + * + * This is called at the end of the section where spinning is allowed. + * It has two functions. First, it is a signal that it is no longer + * safe to start busy waiting for the lock. Second, it checks if + * there is a busy waiter and passes the lock rights to her. + * + * Important: Callers lose the lock if there was a busy waiter. + * They must not touch items synchronized by console_lock + * in this case. + * + * Return: 1 if the lock rights were passed, 0 otherwise. + */ +static int console_lock_spinning_disable_and_check(void) +{ + int waiter; + + raw_spin_lock(&console_owner_lock); + waiter = READ_ONCE(console_waiter); + console_owner = NULL; + raw_spin_unlock(&console_owner_lock); + + if (!waiter) { + spin_release(&console_owner_dep_map, 1, _THIS_IP_); + return 0; + } + + /* The waiter is now free to continue */ + WRITE_ONCE(console_waiter, false); + + spin_release(&console_owner_dep_map, 1, _THIS_IP_); + + /* + * Hand off console_lock to waiter. The waiter will perform + * the up(). After this, the waiter is the console_lock owner. + */ + mutex_release(&console_lock_dep_map, 1, _THIS_IP_); + return 1; +} + +/** + * console_trylock_spinning - try to get console_lock by busy waiting + * + * This allows to busy wait for the console_lock when the current + * owner is running in specially marked sections. It means that + * the current owner is running and cannot reschedule until it + * is ready to lose the lock. + * + * Return: 1 if we got the lock, 0 othrewise + */ +static int console_trylock_spinning(void) +{ + struct task_struct *owner = NULL; + bool waiter; + bool spin = false; + unsigned long flags; + + if (console_trylock()) + return 1; + + printk_safe_enter_irqsave(flags); + + raw_spin_lock(&console_owner_lock); + owner = READ_ONCE(console_owner); + waiter = READ_ONCE(console_waiter); + if (!waiter && owner && owner != current) { + WRITE_ONCE(console_waiter, true); + spin = true; + } + raw_spin_unlock(&console_owner_lock); + + /* + * If there is an active printk() writing to the + * consoles, instead of having it write our data too, + * see if we can offload that load from the active + * printer, and do some printing ourselves. + * Go into a spin only if there isn't already a waiter + * spinning, and there is an active printer, and + * that active printer isn't us (recursive printk?). + */ + if (!spin) { + printk_safe_exit_irqrestore(flags); + return 0; + } + + /* We spin waiting for the owner to release us */ + spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); + /* Owner will clear console_waiter on hand off */ + while (READ_ONCE(console_waiter)) + cpu_relax(); + spin_release(&console_owner_dep_map, 1, _THIS_IP_); + + printk_safe_exit_irqrestore(flags); + /* + * The owner passed the console lock to us. + * Since we did not spin on console lock, annotate + * this as a trylock. Otherwise lockdep will + * complain. + */ + mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); + + return 1; +} + /* * Call the console drivers, asking them to write out * log_buf[start] to log_buf[end - 1]. @@ -1774,56 +1907,8 @@ asmlinkage int vprintk_emit(int facility, int level, * semaphore. The release will print out buffers and wake up * /dev/kmsg and syslog() users. */ - if (console_trylock()) { + if (console_trylock_spinning()) console_unlock(); - } else { - struct task_struct *owner = NULL; - bool waiter; - bool spin = false; - - printk_safe_enter_irqsave(flags); - - raw_spin_lock(&console_owner_lock); - owner = READ_ONCE(console_owner); - waiter = READ_ONCE(console_waiter); - if (!waiter && owner && owner != current) { - WRITE_ONCE(console_waiter, true); - spin = true; - } - raw_spin_unlock(&console_owner_lock); - - /* - * If there is an active printk() writing to the - * consoles, instead of having it write our data too, - * see if we can offload that load from the active - * printer, and do some printing ourselves. - * Go into a spin only if there isn't already a waiter - * spinning, and there is an active printer, and - * that active printer isn't us (recursive printk?). - */ - if (spin) { - /* We spin waiting for the owner to release us */ - spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); - /* Owner will clear console_waiter on hand off */ - while (READ_ONCE(console_waiter)) - cpu_relax(); - - spin_release(&console_owner_dep_map, 1, _THIS_IP_); - printk_safe_exit_irqrestore(flags); - - /* - * The owner passed the console lock to us. - * Since we did not spin on console lock, annotate - * this as a trylock. Otherwise lockdep will - * complain. - */ - mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); - console_unlock(); - printk_safe_enter_irqsave(flags); - } - printk_safe_exit_irqrestore(flags); - - } } return printed_len; @@ -1924,6 +2009,8 @@ static ssize_t msg_print_ext_header(char *buf, size_t size, static ssize_t msg_print_ext_body(char *buf, size_t size, char *dict, size_t dict_len, char *text, size_t text_len) { return 0; } +static void console_lock_spinning_enable(void) { } +static int console_lock_spinning_disable_and_check(void) { return 0; } static void call_console_drivers(const char *ext_text, size_t ext_len, const char *text, size_t len) {} static size_t msg_print_text(const struct printk_log *msg, @@ -2210,7 +2297,6 @@ void console_unlock(void) static u64 seen_seq; unsigned long flags; bool wake_klogd = false; - bool waiter = false; bool do_cond_resched, retry; if (console_suspended) { @@ -2305,31 +2391,16 @@ skip: * finish. This task can not be preempted if there is a * waiter waiting to take over. */ - raw_spin_lock(&console_owner_lock); - console_owner = current; - raw_spin_unlock(&console_owner_lock); - - /* The waiter may spin on us after setting console_owner */ - spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); + console_lock_spinning_enable(); stop_critical_timings(); /* don't trace print latency */ call_console_drivers(ext_text, ext_len, text, len); start_critical_timings(); - raw_spin_lock(&console_owner_lock); - waiter = READ_ONCE(console_waiter); - console_owner = NULL; - raw_spin_unlock(&console_owner_lock); - - /* - * If there is a waiter waiting for us, then pass the - * rest of the work load over to that waiter. - */ - if (waiter) - break; - - /* There was no waiter, and nothing will spin on us here */ - spin_release(&console_owner_dep_map, 1, _THIS_IP_); + if (console_lock_spinning_disable_and_check()) { + printk_safe_exit_irqrestore(flags); + return; + } printk_safe_exit_irqrestore(flags); @@ -2337,26 +2408,6 @@ skip: cond_resched(); } - /* - * If there is an active waiter waiting on the console_lock. - * Pass off the printing to the waiter, and the waiter - * will continue printing on its CPU, and when all writing - * has finished, the last printer will wake up klogd. - */ - if (waiter) { - WRITE_ONCE(console_waiter, false); - /* The waiter is now free to continue */ - spin_release(&console_owner_dep_map, 1, _THIS_IP_); - /* - * Hand off console_lock to waiter. The waiter will perform - * the up(). After this, the waiter is the console_lock owner. - */ - mutex_release(&console_lock_dep_map, 1, _THIS_IP_); - printk_safe_exit_irqrestore(flags); - /* Note, if waiter is set, logbuf_lock is not held */ - return; - } - console_locked = 0; /* Release the exclusive_console once it is used */ From 08b7a8f880a27631e25e23ac2569f7c2d166469e Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 16 Jan 2018 13:47:16 +0900 Subject: [PATCH 1056/2608] printk: Never set console_may_schedule in console_trylock() [ Upstream commit fd5f7cde1b85d4c8e09ca46ce948e008a2377f64 ] This patch, basically, reverts commit 6b97a20d3a79 ("printk: set may_schedule for some of console_trylock() callers"). That commit was a mistake, it introduced a big dependency on the scheduler, by enabling preemption under console_sem in printk()->console_unlock() path, which is rather too critical. The patch did not significantly reduce the possibilities of printk() lockups, but made it possible to stall printk(), as has been reported by Tetsuo Handa [1]. Another issues is that preemption under console_sem also messes up with Steven Rostedt's hand off scheme, by making it possible to sleep with console_sem both in console_unlock() and in vprintk_emit(), after acquiring the console_sem ownership (anywhere between printk_safe_exit_irqrestore() in console_trylock_spinning() and printk_safe_enter_irqsave() in console_unlock()). This makes hand off less likely and, at the same time, may result in a significant amount of pending logbuf messages. Preempted console_sem owner makes it impossible for other CPUs to emit logbuf messages, but does not make it impossible for other CPUs to append new messages to the logbuf. Reinstate the old behavior and make printk() non-preemptible. Should any printk() lockup reports arrive they must be handled in a different way. [1] http://lkml.kernel.org/r/201603022101.CAH73907.OVOOMFHFFtQJSL%20()%20I-love%20!%20SAKURA%20!%20ne%20!%20jp Fixes: 6b97a20d3a79 ("printk: set may_schedule for some of console_trylock() callers") Link: http://lkml.kernel.org/r/20180116044716.GE6607@jagdpanzerIV To: Tetsuo Handa Cc: Sergey Senozhatsky Cc: Tejun Heo Cc: akpm@linux-foundation.org Cc: linux-mm@kvack.org Cc: Cong Wang Cc: Dave Hansen Cc: Johannes Weiner Cc: Mel Gorman Cc: Michal Hocko Cc: Vlastimil Babka Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jan Kara Cc: Mathieu Desnoyers Cc: Byungchul Park Cc: Pavel Machek Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Reported-by: Tetsuo Handa Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin --- kernel/printk/printk.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 2d1c2700bd85..2f654a79f80b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1902,6 +1902,12 @@ asmlinkage int vprintk_emit(int facility, int level, /* If called from the scheduler, we can not call up(). */ if (!in_sched) { + /* + * Disable preemption to avoid being preempted while holding + * console_sem which would prevent anyone from printing to + * console + */ + preempt_disable(); /* * Try to acquire and then immediately release the console * semaphore. The release will print out buffers and wake up @@ -1909,6 +1915,7 @@ asmlinkage int vprintk_emit(int facility, int level, */ if (console_trylock_spinning()) console_unlock(); + preempt_enable(); } return printed_len; @@ -2225,20 +2232,7 @@ int console_trylock(void) return 0; } console_locked = 1; - /* - * When PREEMPT_COUNT disabled we can't reliably detect if it's - * safe to schedule (e.g. calling printk while holding a spin_lock), - * because preempt_disable()/preempt_enable() are just barriers there - * and preempt_count() is always 0. - * - * RCU read sections have a separate preemption counter when - * PREEMPT_RCU enabled thus we must take extra care and check - * rcu_preempt_depth(), otherwise RCU read sections modify - * preempt_count(). - */ - console_may_schedule = !oops_in_progress && - preemptible() && - !rcu_preempt_depth(); + console_may_schedule = 0; return 1; } EXPORT_SYMBOL(console_trylock); From 16c9a316f21f1bcde5ce0633848d61699d63bad3 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Mon, 26 Feb 2018 15:44:20 +0100 Subject: [PATCH 1057/2608] printk: Wake klogd when passing console_lock owner [ Upstream commit c14376de3a1befa70d9811ca2872d47367b48767 ] wake_klogd is a local variable in console_unlock(). The information is lost when the console_lock owner using the busy wait added by the commit dbdda842fe96f8932 ("printk: Add console owner and waiter logic to load balance console writes"). The following race is possible: CPU0 CPU1 console_unlock() for (;;) /* calling console for last message */ printk() log_store() log_next_seq++; /* see new message */ if (seen_seq != log_next_seq) { wake_klogd = true; seen_seq = log_next_seq; } console_lock_spinning_enable(); if (console_trylock_spinning()) /* spinning */ if (console_lock_spinning_disable_and_check()) { printk_safe_exit_irqrestore(flags); return; console_unlock() if (seen_seq != log_next_seq) { /* already seen */ /* nothing to do */ Result: Nobody would wakeup klogd. One solution would be to make a global variable from wake_klogd. But then we would need to manipulate it under a lock or so. This patch wakes klogd also when console_lock is passed to the spinning waiter. It looks like the right way to go. Also userspace should have a chance to see and store any "flood" of messages. Note that the very late klogd wake up was a historic solution. It made sense on single CPU systems or when sys_syslog() operations were synchronized using the big kernel lock like in v2.1.113. But it is questionable these days. Fixes: dbdda842fe96f8932 ("printk: Add console owner and waiter logic to load balance console writes") Link: http://lkml.kernel.org/r/20180226155734.dzwg3aovqnwtvkoy@pathway.suse.cz Cc: Steven Rostedt Cc: linux-kernel@vger.kernel.org Cc: Tejun Heo Suggested-by: Sergey Senozhatsky Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin --- kernel/printk/printk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 2f654a79f80b..2e2c86dd226f 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2393,7 +2393,7 @@ skip: if (console_lock_spinning_disable_and_check()) { printk_safe_exit_irqrestore(flags); - return; + goto out; } printk_safe_exit_irqrestore(flags); @@ -2426,6 +2426,7 @@ skip: if (retry && console_trylock()) goto again; +out: if (wake_klogd) wake_up_klogd(); } From 891e5a89f0faf42766eefd476285d4779fd31a21 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 17 Nov 2017 15:28:27 -0800 Subject: [PATCH 1058/2608] lib/rbtree-test: lower default params commit 0b548e33e6cb2bff240fdaf1783783be15c29080 upstream. Fengguang reported soft lockups while running the rbtree and interval tree test modules. The logic for these tests all occur in init phase, and we currently are pounding with the default values for number of nodes and number of iterations of each test. Reduce the latter by two orders of magnitude. This does not influence the value of the tests in that one thousand times by default is enough to get the picture. Link: http://lkml.kernel.org/r/20171109161715.xai2dtwqw2frhkcm@linux-n805 Signed-off-by: Davidlohr Bueso Reported-by: Fengguang Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- lib/interval_tree_test.c | 4 ++-- lib/rbtree_test.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c index 0e343fd29570..835242e74aaa 100644 --- a/lib/interval_tree_test.c +++ b/lib/interval_tree_test.c @@ -11,10 +11,10 @@ MODULE_PARM_DESC(name, msg); __param(int, nnodes, 100, "Number of nodes in the interval tree"); -__param(int, perf_loops, 100000, "Number of iterations modifying the tree"); +__param(int, perf_loops, 1000, "Number of iterations modifying the tree"); __param(int, nsearches, 100, "Number of searches to the interval tree"); -__param(int, search_loops, 10000, "Number of iterations searching the tree"); +__param(int, search_loops, 1000, "Number of iterations searching the tree"); __param(bool, search_all, false, "Searches will iterate all nodes in the tree"); __param(uint, max_endpoint, ~0, "Largest value for the interval's endpoint"); diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 191a238e5a9d..7d36c1e27ff6 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -11,7 +11,7 @@ MODULE_PARM_DESC(name, msg); __param(int, nnodes, 100, "Number of nodes in the rb-tree"); -__param(int, perf_loops, 100000, "Number of iterations modifying the rb-tree"); +__param(int, perf_loops, 1000, "Number of iterations modifying the rb-tree"); __param(int, check_loops, 100, "Number of iterations modifying and verifying the rb-tree"); struct test_node { From 5d2cc520d1f1ed929ef1f6b7ad57ddde9c42b651 Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Mon, 26 Nov 2018 18:35:14 +0100 Subject: [PATCH 1059/2608] flexfiles: enforce per-mirror stateid only for v4 DSes commit 320f35b7bf8cccf1997ca3126843535e1b95e9c4 upstream. Since commit bb21ce0ad227 we always enforce per-mirror stateid. However, this makes sense only for v4+ servers. Signed-off-by: Tigran Mkrtchyan Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/flexfilelayout/flexfilelayout.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 13612a848378..8dbde5ded042 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1725,7 +1725,8 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) if (fh) hdr->args.fh = fh; - if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) + if (vers == 4 && + !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) goto out_failed; /* @@ -1790,7 +1791,8 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) if (fh) hdr->args.fh = fh; - if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) + if (vers == 4 && + !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) goto out_failed; /* From 16906e5ad4cc0439d03dc294c0251f4c875ccef2 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 1 Jul 2018 13:57:24 -0700 Subject: [PATCH 1060/2608] staging: speakup: Replace strncpy with memcpy commit fd29edc7232bc19f969e8f463138afc5472b3d5f upstream. gcc 8.1.0 generates the following warnings. drivers/staging/speakup/kobjects.c: In function 'punc_store': drivers/staging/speakup/kobjects.c:522:2: warning: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length drivers/staging/speakup/kobjects.c:504:6: note: length computed here drivers/staging/speakup/kobjects.c: In function 'synth_store': drivers/staging/speakup/kobjects.c:391:2: warning: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length drivers/staging/speakup/kobjects.c:388:8: note: length computed here Using strncpy() is indeed less than perfect since the length of data to be copied has already been determined with strlen(). Replace strncpy() with memcpy() to address the warning and optimize the code a little. Signed-off-by: Guenter Roeck Reviewed-by: Samuel Thibault Signed-off-by: Greg Kroah-Hartman --- drivers/staging/speakup/kobjects.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/speakup/kobjects.c b/drivers/staging/speakup/kobjects.c index ca85476e3ff7..23256bbdba51 100644 --- a/drivers/staging/speakup/kobjects.c +++ b/drivers/staging/speakup/kobjects.c @@ -387,7 +387,7 @@ static ssize_t synth_store(struct kobject *kobj, struct kobj_attribute *attr, len = strlen(buf); if (len < 2 || len > 9) return -EINVAL; - strncpy(new_synth_name, buf, len); + memcpy(new_synth_name, buf, len); if (new_synth_name[len - 1] == '\n') len--; new_synth_name[len] = '\0'; @@ -518,7 +518,7 @@ static ssize_t punc_store(struct kobject *kobj, struct kobj_attribute *attr, return -EINVAL; } - strncpy(punc_buf, buf, x); + memcpy(punc_buf, buf, x); while (x && punc_buf[x - 1] == '\n') x--; From 62711dc6898b3186a8a3501e4a3829add8624b3f Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 9 Dec 2018 17:04:19 +0900 Subject: [PATCH 1061/2608] ALSA: fireface: fix reference to wrong register for clock configuration commit fa9c98e4b975bb3192ed6af09d9fa282ed3cd8a0 upstream. In an initial commit, 'SYNC_STATUS' register is referred to get clock configuration, however this is wrong, according to my local note at hand for reverse-engineering about packet dump. It should be 'CLOCK_CONFIG' register. Actually, ff400_dump_clock_config() is correctly programmed. This commit fixes the bug. Cc: # v4.12+ Fixes: 76fdb3a9e13a ('ALSA: fireface: add support for Fireface 400') Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/fireface/ff-protocol-ff400.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/fireface/ff-protocol-ff400.c b/sound/firewire/fireface/ff-protocol-ff400.c index 9f5036442ab9..b47954a6b8ab 100644 --- a/sound/firewire/fireface/ff-protocol-ff400.c +++ b/sound/firewire/fireface/ff-protocol-ff400.c @@ -30,7 +30,7 @@ static int ff400_get_clock(struct snd_ff *ff, unsigned int *rate, int err; err = snd_fw_transaction(ff->unit, TCODE_READ_QUADLET_REQUEST, - FF400_SYNC_STATUS, ®, sizeof(reg), 0); + FF400_CLOCK_CONFIG, ®, sizeof(reg), 0); if (err < 0) return err; data = le32_to_cpu(reg); From d655a1a6684e0d6cc2429caeb4c5dd0fed4efb79 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 7 Dec 2018 15:14:59 +0800 Subject: [PATCH 1062/2608] ALSA: hda/realtek - Fixed headphone issue for ALC700 commit bde1a7459623a66c2abec4d0a841e4b06cc88d9a upstream. If it plugged headphone or headset into the jack, then do the reboot, it will have a chance to cause headphone no sound. It just need to run the headphone mode procedure after boot time. The issue will be fixed. It also suitable for ALC234 ALC274 and ALC294. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f6136f041a81..31c91e0a815e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6965,6 +6965,37 @@ static void alc269_fill_coef(struct hda_codec *codec) alc_update_coef_idx(codec, 0x4, 0, 1<<11); } +static void alc294_hp_init(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + int i, val; + + if (!hp_pin) + return; + + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + + msleep(100); + + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + + alc_update_coef_idx(codec, 0x6f, 0x000f, 0);/* Set HP depop to manual mode */ + alc_update_coefex_idx(codec, 0x58, 0x00, 0x8000, 0x8000); /* HP depop procedure start */ + + /* Wait for depop procedure finish */ + val = alc_read_coefex_idx(codec, 0x58, 0x01); + for (i = 0; i < 20 && val & 0x0080; i++) { + msleep(50); + val = alc_read_coefex_idx(codec, 0x58, 0x01); + } + /* Set HP depop to auto mode */ + alc_update_coef_idx(codec, 0x6f, 0x000f, 0x000b); + msleep(50); +} + /* */ static int patch_alc269(struct hda_codec *codec) @@ -7101,6 +7132,7 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC294; spec->gen.mixer_nid = 0; /* ALC2x4 does not have any loopback mixer path */ alc_update_coef_idx(codec, 0x6b, 0x0018, (1<<4) | (1<<3)); /* UAJ MIC Vref control by verb */ + alc294_hp_init(codec); break; case 0x10ec0300: spec->codec_variant = ALC269_TYPE_ALC300; @@ -7112,6 +7144,7 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC700; spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */ alc_update_coef_idx(codec, 0x4a, 1 << 15, 0); /* Combo jack auto trigger control */ + alc294_hp_init(codec); break; } From 01a166012cd16b9ce147744c514f228df6cf495e Mon Sep 17 00:00:00 2001 From: Piotr Stankiewicz Date: Wed, 28 Nov 2018 06:44:46 -0800 Subject: [PATCH 1063/2608] IB/hfi1: Fix an out-of-bounds access in get_hw_stats commit 36d842194a57f1b21fbc6a6875f2fa2f9a7f8679 upstream. When running with KASAN, the following trace is produced: [ 62.535888] ================================================================== [ 62.544930] BUG: KASAN: slab-out-of-bounds in gut_hw_stats+0x122/0x230 [hfi1] [ 62.553856] Write of size 8 at addr ffff88080e8d6330 by task kworker/0:1/14 [ 62.565333] CPU: 0 PID: 14 Comm: kworker/0:1 Not tainted 4.19.0-test-build-kasan+ #8 [ 62.575087] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0019.101220160604 10/12/2016 [ 62.587951] Workqueue: events work_for_cpu_fn [ 62.594050] Call Trace: [ 62.598023] dump_stack+0xc6/0x14c [ 62.603089] ? dump_stack_print_info.cold.1+0x2f/0x2f [ 62.610041] ? kmsg_dump_rewind_nolock+0x59/0x59 [ 62.616615] ? get_hw_stats+0x122/0x230 [hfi1] [ 62.622985] print_address_description+0x6c/0x23c [ 62.629744] ? get_hw_stats+0x122/0x230 [hfi1] [ 62.636108] kasan_report.cold.6+0x241/0x308 [ 62.642365] get_hw_stats+0x122/0x230 [hfi1] [ 62.648703] ? hfi1_alloc_rn+0x40/0x40 [hfi1] [ 62.655088] ? __kmalloc+0x110/0x240 [ 62.660695] ? hfi1_alloc_rn+0x40/0x40 [hfi1] [ 62.667142] setup_hw_stats+0xd8/0x430 [ib_core] [ 62.673972] ? show_hfi+0x50/0x50 [hfi1] [ 62.680026] ib_device_register_sysfs+0x165/0x180 [ib_core] [ 62.687995] ib_register_device+0x5a2/0xa10 [ib_core] [ 62.695340] ? show_hfi+0x50/0x50 [hfi1] [ 62.701421] ? ib_unregister_device+0x2e0/0x2e0 [ib_core] [ 62.709222] ? __vmalloc_node_range+0x2d0/0x380 [ 62.716131] ? rvt_driver_mr_init+0x11f/0x2d0 [rdmavt] [ 62.723735] ? vmalloc_node+0x5c/0x70 [ 62.729697] ? rvt_driver_mr_init+0x11f/0x2d0 [rdmavt] [ 62.737347] ? rvt_driver_mr_init+0x1f5/0x2d0 [rdmavt] [ 62.744998] ? __rvt_alloc_mr+0x110/0x110 [rdmavt] [ 62.752315] ? rvt_rc_error+0x140/0x140 [rdmavt] [ 62.759434] ? rvt_vma_open+0x30/0x30 [rdmavt] [ 62.766364] ? mutex_unlock+0x1d/0x40 [ 62.772445] ? kmem_cache_create_usercopy+0x15d/0x230 [ 62.780115] rvt_register_device+0x1f6/0x360 [rdmavt] [ 62.787823] ? rvt_get_port_immutable+0x180/0x180 [rdmavt] [ 62.796058] ? __get_txreq+0x400/0x400 [hfi1] [ 62.802969] ? memcpy+0x34/0x50 [ 62.808611] hfi1_register_ib_device+0xde6/0xeb0 [hfi1] [ 62.816601] ? hfi1_get_npkeys+0x10/0x10 [hfi1] [ 62.823760] ? hfi1_init+0x89f/0x9a0 [hfi1] [ 62.830469] ? hfi1_setup_eagerbufs+0xad0/0xad0 [hfi1] [ 62.838204] ? pcie_capability_clear_and_set_word+0xcd/0xe0 [ 62.846429] ? pcie_capability_read_word+0xd0/0xd0 [ 62.853791] ? hfi1_pcie_init+0x187/0x4b0 [hfi1] [ 62.860958] init_one+0x67f/0xae0 [hfi1] [ 62.867301] ? hfi1_init+0x9a0/0x9a0 [hfi1] [ 62.873876] ? wait_woken+0x130/0x130 [ 62.879860] ? read_word_at_a_time+0xe/0x20 [ 62.886329] ? strscpy+0x14b/0x280 [ 62.891998] ? hfi1_init+0x9a0/0x9a0 [hfi1] [ 62.898405] local_pci_probe+0x70/0xd0 [ 62.904295] ? pci_device_shutdown+0x90/0x90 [ 62.910833] work_for_cpu_fn+0x29/0x40 [ 62.916750] process_one_work+0x584/0x960 [ 62.922974] ? rcu_work_rcufn+0x40/0x40 [ 62.928991] ? __schedule+0x396/0xdc0 [ 62.934806] ? __sched_text_start+0x8/0x8 [ 62.941020] ? pick_next_task_fair+0x68b/0xc60 [ 62.947674] ? run_rebalance_domains+0x260/0x260 [ 62.954471] ? __list_add_valid+0x29/0xa0 [ 62.960607] ? move_linked_works+0x1c7/0x230 [ 62.967077] ? trace_event_raw_event_workqueue_execute_start+0x140/0x140 [ 62.976248] ? mutex_lock+0xa6/0x100 [ 62.982029] ? __mutex_lock_slowpath+0x10/0x10 [ 62.988795] ? __switch_to+0x37a/0x710 [ 62.994731] worker_thread+0x62e/0x9d0 [ 63.000602] ? max_active_store+0xf0/0xf0 [ 63.006828] ? __switch_to_asm+0x40/0x70 [ 63.012932] ? __switch_to_asm+0x34/0x70 [ 63.019013] ? __switch_to_asm+0x40/0x70 [ 63.025042] ? __switch_to_asm+0x34/0x70 [ 63.031030] ? __switch_to_asm+0x40/0x70 [ 63.037006] ? __schedule+0x396/0xdc0 [ 63.042660] ? kmem_cache_alloc_trace+0xf3/0x1f0 [ 63.049323] ? kthread+0x59/0x1d0 [ 63.054594] ? ret_from_fork+0x35/0x40 [ 63.060257] ? __sched_text_start+0x8/0x8 [ 63.066212] ? schedule+0xcf/0x250 [ 63.071529] ? __wake_up_common+0x110/0x350 [ 63.077794] ? __schedule+0xdc0/0xdc0 [ 63.083348] ? wait_woken+0x130/0x130 [ 63.088963] ? finish_task_switch+0x1f1/0x520 [ 63.095258] ? kasan_unpoison_shadow+0x30/0x40 [ 63.101792] ? __init_waitqueue_head+0xa0/0xd0 [ 63.108183] ? replenish_dl_entity.cold.60+0x18/0x18 [ 63.115151] ? _raw_spin_lock_irqsave+0x25/0x50 [ 63.121754] ? max_active_store+0xf0/0xf0 [ 63.127753] kthread+0x1ae/0x1d0 [ 63.132894] ? kthread_bind+0x30/0x30 [ 63.138422] ret_from_fork+0x35/0x40 [ 63.146973] Allocated by task 14: [ 63.152077] kasan_kmalloc+0xbf/0xe0 [ 63.157471] __kmalloc+0x110/0x240 [ 63.162804] init_cntrs+0x34d/0xdf0 [hfi1] [ 63.168883] hfi1_init_dd+0x29a3/0x2f90 [hfi1] [ 63.175244] init_one+0x551/0xae0 [hfi1] [ 63.181065] local_pci_probe+0x70/0xd0 [ 63.186759] work_for_cpu_fn+0x29/0x40 [ 63.192310] process_one_work+0x584/0x960 [ 63.198163] worker_thread+0x62e/0x9d0 [ 63.203843] kthread+0x1ae/0x1d0 [ 63.208874] ret_from_fork+0x35/0x40 [ 63.217203] Freed by task 1: [ 63.221844] __kasan_slab_free+0x12e/0x180 [ 63.227844] kfree+0x92/0x1a0 [ 63.232570] single_release+0x3a/0x60 [ 63.238024] __fput+0x1d9/0x480 [ 63.242911] task_work_run+0x139/0x190 [ 63.248440] exit_to_usermode_loop+0x191/0x1a0 [ 63.254814] do_syscall_64+0x301/0x330 [ 63.260283] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 63.270199] The buggy address belongs to the object at ffff88080e8d5500 which belongs to the cache kmalloc-4096 of size 4096 [ 63.287247] The buggy address is located 3632 bytes inside of 4096-byte region [ffff88080e8d5500, ffff88080e8d6500) [ 63.303564] The buggy address belongs to the page: [ 63.310447] page:ffffea00203a3400 count:1 mapcount:0 mapping:ffff88081380e840 index:0x0 compound_mapcount: 0 [ 63.323102] flags: 0x2fffff80008100(slab|head) [ 63.329775] raw: 002fffff80008100 0000000000000000 0000000100000001 ffff88081380e840 [ 63.340175] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 63.350564] page dumped because: kasan: bad access detected [ 63.361974] Memory state around the buggy address: [ 63.369137] ffff88080e8d6200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 63.379082] ffff88080e8d6280: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 63.389032] >ffff88080e8d6300: 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc [ 63.398944] ^ [ 63.406141] ffff88080e8d6380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 63.416109] ffff88080e8d6400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 63.426099] ================================================================== The trace happens because get_hw_stats() assumes there is room in the memory allocated in init_cntrs() to accommodate the driver counters. Unfortunately, that routine only allocated space for the device counters. Fix by insuring the allocation has room for the additional driver counters. Cc: # v4.14+ Fixes: b7481944b06e9 ("IB/hfi1: Show statistics counters under IB stats interface") Reviewed-by: Mike Marciniczyn Reviewed-by: Mike Ruhl Signed-off-by: Piotr Stankiewicz Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/chip.c | 3 ++- drivers/infiniband/hw/hfi1/hfi.h | 2 ++ drivers/infiniband/hw/hfi1/verbs.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index f9faacce9250..db33ad985a12 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12449,7 +12449,8 @@ static int init_cntrs(struct hfi1_devdata *dd) } /* allocate space for the counter values */ - dd->cntrs = kcalloc(dd->ndevcntrs, sizeof(u64), GFP_KERNEL); + dd->cntrs = kcalloc(dd->ndevcntrs + num_driver_cntrs, sizeof(u64), + GFP_KERNEL); if (!dd->cntrs) goto bail; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 13a7bcaa58e6..ee2859dcceab 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -152,6 +152,8 @@ struct hfi1_ib_stats { extern struct hfi1_ib_stats hfi1_stats; extern const struct pci_error_handlers hfi1_pci_err_handler; +extern int num_driver_cntrs; + /* * First-cut criterion for "device is active" is * two thousand dwords combined Tx, Rx traffic per diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 63d404a6752a..12cf0f7ca7bb 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1693,7 +1693,7 @@ static const char * const driver_cntr_names[] = { static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */ static const char **dev_cntr_names; static const char **port_cntr_names; -static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); +int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); static int num_dev_cntrs; static int num_port_cntrs; static int cntr_names_initialized; From 4465b31b75490c038777cfccfbdf2ba8e9e5286a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Dec 2018 09:58:24 -0800 Subject: [PATCH 1064/2608] tcp: lack of available data can also cause TSO defer commit f9bfe4e6a9d08d405fe7b081ee9a13e649c97ecf upstream. tcp_tso_should_defer() can return true in three different cases : 1) We are cwnd-limited 2) We are rwnd-limited 3) We are application limited. Neal pointed out that my recent fix went too far, since it assumed that if we were not in 1) case, we must be rwnd-limited Fix this by properly populating the is_cwnd_limited and is_rwnd_limited booleans. After this change, we can finally move the silly check for FIN flag only for the application-limited case. The same move for EOR bit will be handled in net-next, since commit 1c09f7d073b1 ("tcp: do not try to defer skbs with eor mark (MSG_EOR)") is scheduled for linux-4.21 Tested by running 200 concurrent netperf -t TCP_RR -- -r 60000,100 and checking none of them was rwnd_limited in the chrono_stat output from "ss -ti" command. Fixes: 41727549de3e ("tcp: Do not underestimate rwnd_limited") Signed-off-by: Eric Dumazet Suggested-by: Neal Cardwell Reviewed-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Reviewed-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1b31b0a1c7fa..24bad638c2ec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1885,7 +1885,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, * This algorithm is from John Heffner. */ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, - bool *is_cwnd_limited, u32 max_segs) + bool *is_cwnd_limited, + bool *is_rwnd_limited, + u32 max_segs) { const struct inet_connection_sock *icsk = inet_csk(sk); u32 age, send_win, cong_win, limit, in_flight; @@ -1893,9 +1895,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, struct sk_buff *head; int win_divisor; - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) - goto send_now; - if (icsk->icsk_ca_state >= TCP_CA_Recovery) goto send_now; @@ -1951,10 +1950,27 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, if (age < (tp->srtt_us >> 4)) goto send_now; - /* Ok, it looks like it is advisable to defer. */ + /* Ok, it looks like it is advisable to defer. + * Three cases are tracked : + * 1) We are cwnd-limited + * 2) We are rwnd-limited + * 3) We are application limited. + */ + if (cong_win < send_win) { + if (cong_win <= skb->len) { + *is_cwnd_limited = true; + return true; + } + } else { + if (send_win <= skb->len) { + *is_rwnd_limited = true; + return true; + } + } - if (cong_win < send_win && cong_win <= skb->len) - *is_cwnd_limited = true; + /* If this packet won't get more data, do not wait. */ + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + goto send_now; return true; @@ -2328,11 +2344,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } else { if (!push_one && tcp_tso_should_defer(sk, skb, &is_cwnd_limited, - max_segs)) { - if (!is_cwnd_limited) - is_rwnd_limited = true; + &is_rwnd_limited, max_segs)) break; - } } limit = mss_now; From 3beeb2615681fe87f4e6291ade669d50c27ce59a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 17 Dec 2018 09:28:56 +0100 Subject: [PATCH 1065/2608] Linux 4.14.89 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3fdee40861a1..b83477be8d0c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 88 +SUBLEVEL = 89 EXTRAVERSION = NAME = Petit Gorille From 30c64b5a4f2b88de1c69fcc556eb4a1238ad5ac3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 13 Nov 2017 07:15:41 +0100 Subject: [PATCH 1066/2608] timer/debug: Change /proc/timer_list from 0444 to 0400 [ Upstream commit 8e7df2b5b7f245c9bd11064712db5cb69044a362 ] While it uses %pK, there's still few reasons to read this file as non-root. Suggested-by: Linus Torvalds Acked-by: Thomas Gleixner Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/time/timer_list.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 0e7f5428a148..0ed768b56c60 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -389,7 +389,7 @@ static int __init init_timer_list_procfs(void) { struct proc_dir_entry *pe; - pe = proc_create("timer_list", 0444, NULL, &timer_list_fops); + pe = proc_create("timer_list", 0400, NULL, &timer_list_fops); if (!pe) return -ENOMEM; return 0; From 7ff0bcb2cb31602dd347b216f5f70851102d5067 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 4 Dec 2018 17:04:57 +0800 Subject: [PATCH 1067/2608] pinctrl: sunxi: a83t: Fix IRQ offset typo for PH11 commit 478b6767ad26ab86d9ecc341027dd09a87b1f997 upstream. Pin PH11 is used on various A83T board to detect a change in the OTG port's ID pin, as in when an OTG host cable is plugged in. The incorrect offset meant the gpiochip/irqchip was activating the wrong pin for interrupts. Fixes: 4730f33f0d82 ("pinctrl: sunxi: add allwinner A83T PIO controller support") Cc: Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c index 6624499eae72..4ada80317a3b 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c @@ -568,7 +568,7 @@ static const struct sunxi_desc_pin sun8i_a83t_pins[] = { SUNXI_PIN(SUNXI_PINCTRL_PIN(H, 11), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION_IRQ_BANK(0x6, 2, 1)), /* PH_EINT11 */ + SUNXI_FUNCTION_IRQ_BANK(0x6, 2, 11)), /* PH_EINT11 */ }; static const struct sunxi_pinctrl_desc sun8i_a83t_pinctrl_data = { From 5f4610fe2ed4351b45c0d82b3246f18b886d26f9 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Tue, 11 Dec 2018 12:37:49 -0500 Subject: [PATCH 1068/2608] aio: fix spectre gadget in lookup_ioctx commit a538e3ff9dabcdf6c3f477a373c629213d1c3066 upstream. Matthew pointed out that the ioctx_table is susceptible to spectre v1, because the index can be controlled by an attacker. The below patch should mitigate the attack for all of the aio system calls. Cc: stable@vger.kernel.org Reported-by: Matthew Wilcox Reported-by: Dan Carpenter Signed-off-by: Jeff Moyer Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/aio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/aio.c b/fs/aio.c index 3a749c3a92e3..a2de58f77338 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -43,6 +43,7 @@ #include #include +#include #include "internal.h" @@ -1084,6 +1085,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) if (!table || id >= table->nr) goto out; + id = array_index_nospec(id, table->nr); ctx = rcu_dereference(table->table[id]); if (ctx && ctx->user_id == ctx_id) { if (percpu_ref_tryget_live(&ctx->users)) From b99eaefb6850b38abdd8a2029bbfde08170ec6c9 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 14 Dec 2018 14:17:17 -0800 Subject: [PATCH 1069/2608] userfaultfd: check VM_MAYWRITE was set after verifying the uffd is registered commit 01e881f5a1fca4677e82733061868c6d6ea05ca7 upstream. Calling UFFDIO_UNREGISTER on virtual ranges not yet registered in uffd could trigger an harmless false positive WARN_ON. Check the vma is already registered before checking VM_MAYWRITE to shut off the false positive warning. Link: http://lkml.kernel.org/r/20181206212028.18726-2-aarcange@redhat.com Cc: Fixes: 29ec90660d68 ("userfaultfd: shmem/hugetlbfs: only allow to register VM_MAYWRITE vmas") Signed-off-by: Andrea Arcangeli Reported-by: syzbot+06c7092e7d71218a2c16@syzkaller.appspotmail.com Acked-by: Mike Rapoport Acked-by: Hugh Dickins Acked-by: Peter Xu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/userfaultfd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f92e1f2fc846..5f10052d2671 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1567,7 +1567,6 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, cond_resched(); BUG_ON(!vma_can_userfault(vma)); - WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); /* * Nothing to do: this vma is already registered into this @@ -1576,6 +1575,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, if (!vma->vm_userfaultfd_ctx.ctx) goto skip; + WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); + if (vma->vm_start > start) start = vma->vm_start; vma_end = min(end, vma->vm_end); From 87d143de94e24bc209b10750b1937d7b5b9ee648 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 10 Dec 2018 19:33:31 +0000 Subject: [PATCH 1070/2608] arm64: dma-mapping: Fix FORCE_CONTIGUOUS buffer clearing commit 3238c359acee4ab57f15abb5a82b8ab38a661ee7 upstream. We need to invalidate the caches *before* clearing the buffer via the non-cacheable alias, else in the worst case __dma_flush_area() may write back dirty lines over the top of our nice new zeros. Fixes: dd65a941f6ba ("arm64: dma-mapping: clear buffers allocated with FORCE_CONTIGUOUS flag") Cc: # 4.18.x- Acked-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 58470b151bc3..ba88b5b68db6 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -633,9 +633,9 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, prot, __builtin_return_address(0)); if (addr) { - memset(addr, 0, size); if (!coherent) __dma_flush_area(page_to_virt(page), iosize); + memset(addr, 0, size); } else { iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs); dma_release_from_contiguous(dev, page, From 30d358d804dacc04c095a95ba8584fabca155e83 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Tue, 20 Nov 2018 01:14:00 +0200 Subject: [PATCH 1071/2608] MMC: OMAP: fix broken MMC on OMAP15XX/OMAP5910/OMAP310 commit e8cde625bfe8a714a856e1366bcbb259d7346095 upstream. Since v2.6.22 or so there has been reports [1] about OMAP MMC being broken on OMAP15XX based hardware (OMAP5910 and OMAP310). The breakage seems to have been caused by commit 46a6730e3ff9 ("mmc-omap: Fix omap to use MMC_POWER_ON") that changed clock enabling to be done on MMC_POWER_ON. This can happen multiple times in a row, and on 15XX the hardware doesn't seem to like it and the MMC just stops responding. Fix by memorizing the power mode and do the init only when necessary. Before the patch (on Palm TE): mmc0: new SD card at address b368 mmcblk0: mmc0:b368 SDC 977 MiB mmci-omap mmci-omap.0: command timeout (CMD18) mmci-omap mmci-omap.0: command timeout (CMD13) mmci-omap mmci-omap.0: command timeout (CMD13) mmci-omap mmci-omap.0: command timeout (CMD12) [x 6] mmci-omap mmci-omap.0: command timeout (CMD13) [x 6] mmcblk0: error -110 requesting status mmci-omap mmci-omap.0: command timeout (CMD8) mmci-omap mmci-omap.0: command timeout (CMD18) mmci-omap mmci-omap.0: command timeout (CMD13) mmci-omap mmci-omap.0: command timeout (CMD13) mmci-omap mmci-omap.0: command timeout (CMD12) [x 6] mmci-omap mmci-omap.0: command timeout (CMD13) [x 6] mmcblk0: error -110 requesting status mmcblk0: recovery failed! print_req_error: I/O error, dev mmcblk0, sector 0 Buffer I/O error on dev mmcblk0, logical block 0, async page read mmcblk0: unable to read partition table After the patch: mmc0: new SD card at address b368 mmcblk0: mmc0:b368 SDC 977 MiB mmcblk0: p1 The patch is based on a fix and analysis done by Ladislav Michl. Tested on OMAP15XX/OMAP310 (Palm TE), OMAP1710 (Nokia 770) and OMAP2420 (Nokia N810). [1] https://marc.info/?t=123175197000003&r=1&w=2 Fixes: 46a6730e3ff9 ("mmc-omap: Fix omap to use MMC_POWER_ON") Reported-by: Ladislav Michl Reported-by: Andrzej Zaborowski Tested-by: Ladislav Michl Acked-by: Tony Lindgren Signed-off-by: Aaro Koskinen Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/omap.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c index bd49f34d7654..c28c51ad650f 100644 --- a/drivers/mmc/host/omap.c +++ b/drivers/mmc/host/omap.c @@ -104,6 +104,7 @@ struct mmc_omap_slot { unsigned int vdd; u16 saved_con; u16 bus_mode; + u16 power_mode; unsigned int fclk_freq; struct tasklet_struct cover_tasklet; @@ -1157,7 +1158,7 @@ static void mmc_omap_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) struct mmc_omap_slot *slot = mmc_priv(mmc); struct mmc_omap_host *host = slot->host; int i, dsor; - int clk_enabled; + int clk_enabled, init_stream; mmc_omap_select_slot(slot, 0); @@ -1167,6 +1168,7 @@ static void mmc_omap_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) slot->vdd = ios->vdd; clk_enabled = 0; + init_stream = 0; switch (ios->power_mode) { case MMC_POWER_OFF: mmc_omap_set_power(slot, 0, ios->vdd); @@ -1174,13 +1176,17 @@ static void mmc_omap_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) case MMC_POWER_UP: /* Cannot touch dsor yet, just power up MMC */ mmc_omap_set_power(slot, 1, ios->vdd); + slot->power_mode = ios->power_mode; goto exit; case MMC_POWER_ON: mmc_omap_fclk_enable(host, 1); clk_enabled = 1; dsor |= 1 << 11; + if (slot->power_mode != MMC_POWER_ON) + init_stream = 1; break; } + slot->power_mode = ios->power_mode; if (slot->bus_mode != ios->bus_mode) { if (slot->pdata->set_bus_mode != NULL) @@ -1196,7 +1202,7 @@ static void mmc_omap_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) for (i = 0; i < 2; i++) OMAP_MMC_WRITE(host, CON, dsor); slot->saved_con = dsor; - if (ios->power_mode == MMC_POWER_ON) { + if (init_stream) { /* worst case at 400kHz, 80 cycles makes 200 microsecs */ int usecs = 250; @@ -1234,6 +1240,7 @@ static int mmc_omap_new_slot(struct mmc_omap_host *host, int id) slot->host = host; slot->mmc = mmc; slot->id = id; + slot->power_mode = MMC_POWER_UNDEFINED; slot->pdata = &host->pdata->slots[id]; host->slots[id] = slot; From 38ef9c5a931163eb8abf8f1c654d1b13de26f792 Mon Sep 17 00:00:00 2001 From: Alek Du Date: Thu, 6 Dec 2018 17:24:59 +0800 Subject: [PATCH 1072/2608] mmc: sdhci: fix the timeout check window for clock and reset commit b704441e38f645dcfba1348ca3cc1ba43d1a9f31 upstream. We observed some premature timeouts on a virtualization platform, the log is like this: case 1: [159525.255629] mmc1: Internal clock never stabilised. [159525.255818] mmc1: sdhci: ============ SDHCI REGISTER DUMP =========== [159525.256049] mmc1: sdhci: Sys addr: 0x00000000 | Version: 0x00001002 ... [159525.257205] mmc1: sdhci: Wake-up: 0x00000000 | Clock: 0x0000fa03 From the clock control register dump, we are pretty sure the clock was stablized. case 2: [ 914.550127] mmc1: Reset 0x2 never completed. [ 914.550321] mmc1: sdhci: ============ SDHCI REGISTER DUMP =========== [ 914.550608] mmc1: sdhci: Sys addr: 0x00000010 | Version: 0x00001002 After checking the sdhci code, we found the timeout check actually has a little window that the CPU can be scheduled out and when it comes back, the original time set or check is not valid. Fixes: 5a436cc0af62 ("mmc: sdhci: Optimize delay loops") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Alek Du Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index f063fe569339..0edcc2763f3c 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -193,8 +193,12 @@ void sdhci_reset(struct sdhci_host *host, u8 mask) timeout = ktime_add_ms(ktime_get(), 100); /* hw clears the bit when it's done */ - while (sdhci_readb(host, SDHCI_SOFTWARE_RESET) & mask) { - if (ktime_after(ktime_get(), timeout)) { + while (1) { + bool timedout = ktime_after(ktime_get(), timeout); + + if (!(sdhci_readb(host, SDHCI_SOFTWARE_RESET) & mask)) + break; + if (timedout) { pr_err("%s: Reset 0x%x never completed.\n", mmc_hostname(host->mmc), (int)mask); sdhci_dumpregs(host); @@ -1417,9 +1421,13 @@ void sdhci_enable_clk(struct sdhci_host *host, u16 clk) /* Wait max 20 ms */ timeout = ktime_add_ms(ktime_get(), 20); - while (!((clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL)) - & SDHCI_CLOCK_INT_STABLE)) { - if (ktime_after(ktime_get(), timeout)) { + while (1) { + bool timedout = ktime_after(ktime_get(), timeout); + + clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); + if (clk & SDHCI_CLOCK_INT_STABLE) + break; + if (timedout) { pr_err("%s: Internal clock never stabilised.\n", mmc_hostname(host->mmc)); sdhci_dumpregs(host); From c1149b873482a1f2a85659692376103e7d052dde Mon Sep 17 00:00:00 2001 From: Chad Austin Date: Mon, 10 Dec 2018 10:54:52 -0800 Subject: [PATCH 1073/2608] fuse: continue to send FUSE_RELEASEDIR when FUSE_OPEN returns ENOSYS commit 2e64ff154ce6ce9a8dc0f9556463916efa6ff460 upstream. When FUSE_OPEN returns ENOSYS, the no_open bit is set on the connection. Because the FUSE_RELEASE and FUSE_RELEASEDIR paths share code, this incorrectly caused the FUSE_RELEASEDIR request to be dropped and never sent to userspace. Pass an isdir bool to distinguish between FUSE_RELEASE and FUSE_RELEASEDIR inside of fuse_file_put. Fixes: 7678ac50615d ("fuse: support clients that don't implement 'open'") Cc: # v3.14 Signed-off-by: Chad Austin Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 2 +- fs/fuse/file.c | 21 +++++++++++---------- fs/fuse/fuse_i.h | 2 +- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 29868c35c19a..d933ecb7a08c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1424,7 +1424,7 @@ static int fuse_dir_open(struct inode *inode, struct file *file) static int fuse_dir_release(struct inode *inode, struct file *file) { - fuse_release_common(file, FUSE_RELEASEDIR); + fuse_release_common(file, true); return 0; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 47d7a510be5b..52514a64dcd6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -86,12 +86,12 @@ static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) iput(req->misc.release.inode); } -static void fuse_file_put(struct fuse_file *ff, bool sync) +static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir) { if (refcount_dec_and_test(&ff->count)) { struct fuse_req *req = ff->reserved_req; - if (ff->fc->no_open) { + if (ff->fc->no_open && !isdir) { /* * Drop the release request when client does not * implement 'open' @@ -244,10 +244,11 @@ static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode) req->in.args[0].value = inarg; } -void fuse_release_common(struct file *file, int opcode) +void fuse_release_common(struct file *file, bool isdir) { struct fuse_file *ff = file->private_data; struct fuse_req *req = ff->reserved_req; + int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE; fuse_prepare_release(ff, file->f_flags, opcode); @@ -269,7 +270,7 @@ void fuse_release_common(struct file *file, int opcode) * synchronous RELEASE is allowed (and desirable) in this case * because the server can be trusted not to screw up. */ - fuse_file_put(ff, ff->fc->destroy_req != NULL); + fuse_file_put(ff, ff->fc->destroy_req != NULL, isdir); } static int fuse_open(struct inode *inode, struct file *file) @@ -285,7 +286,7 @@ static int fuse_release(struct inode *inode, struct file *file) if (fc->writeback_cache) write_inode_now(inode, 1); - fuse_release_common(file, FUSE_RELEASE); + fuse_release_common(file, false); /* return value is ignored by VFS */ return 0; @@ -299,7 +300,7 @@ void fuse_sync_release(struct fuse_file *ff, int flags) * iput(NULL) is a no-op and since the refcount is 1 and everything's * synchronous, we are fine with not doing igrab() here" */ - fuse_file_put(ff, true); + fuse_file_put(ff, true, false); } EXPORT_SYMBOL_GPL(fuse_sync_release); @@ -804,7 +805,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) put_page(page); } if (req->ff) - fuse_file_put(req->ff, false); + fuse_file_put(req->ff, false, false); } static void fuse_send_readpages(struct fuse_req *req, struct file *file) @@ -1458,7 +1459,7 @@ static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) __free_page(req->pages[i]); if (req->ff) - fuse_file_put(req->ff, false); + fuse_file_put(req->ff, false, false); } static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) @@ -1615,7 +1616,7 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc) ff = __fuse_write_file_get(fc, fi); err = fuse_flush_times(inode, ff); if (ff) - fuse_file_put(ff, 0); + fuse_file_put(ff, false, false); return err; } @@ -1929,7 +1930,7 @@ static int fuse_writepages(struct address_space *mapping, err = 0; } if (data.ff) - fuse_file_put(data.ff, false); + fuse_file_put(data.ff, false, false); kfree(data.orig_pages); out: diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e105640153ce..e682f2eff6c0 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -739,7 +739,7 @@ void fuse_sync_release(struct fuse_file *ff, int flags); /** * Send RELEASE or RELEASEDIR request */ -void fuse_release_common(struct file *file, int opcode); +void fuse_release_common(struct file *file, bool isdir); /** * Send FSYNC or FSYNCDIR request From 529571392b7a3efb91d12d70551ece5c0e60ece6 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Sun, 2 Dec 2018 12:12:24 +0100 Subject: [PATCH 1074/2608] ARM: mmp/mmp2: fix cpu_is_mmp2() on mmp2-dt commit 76f4e2c3b6a560cdd7a75b87df543e04d05a9e5f upstream. cpu_is_mmp2() was equivalent to cpu_is_pj4(), wouldn't be correct for multiplatform kernels. Fix it by also considering mmp_chip_id, as is done for cpu_is_pxa168() and cpu_is_pxa910() above. Moreover, it is only available with CONFIG_CPU_MMP2 and thus doesn't work on DT-based MMP2 machines. Enable it on CONFIG_MACH_MMP2_DT too. Note: CONFIG_CPU_MMP2 is only used for machines that use board files instead of DT. It should perhaps be renamed. I'm not doing it now, because I don't have a better idea. Signed-off-by: Lubomir Rintel Acked-by: Arnd Bergmann Cc: stable@vger.kernel.org Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-mmp/cputype.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-mmp/cputype.h b/arch/arm/mach-mmp/cputype.h index 446edaeb78a7..a96abcf521b4 100644 --- a/arch/arm/mach-mmp/cputype.h +++ b/arch/arm/mach-mmp/cputype.h @@ -44,10 +44,12 @@ static inline int cpu_is_pxa910(void) #define cpu_is_pxa910() (0) #endif -#ifdef CONFIG_CPU_MMP2 +#if defined(CONFIG_CPU_MMP2) || defined(CONFIG_MACH_MMP2_DT) static inline int cpu_is_mmp2(void) { - return (((read_cpuid_id() >> 8) & 0xff) == 0x58); + return (((read_cpuid_id() >> 8) & 0xff) == 0x58) && + (((mmp_chip_id & 0xfff) == 0x410) || + ((mmp_chip_id & 0xfff) == 0x610)); } #else #define cpu_is_mmp2() (0) From cd5d8a9203ed16bfb16a03c570fb4cf7e7899125 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 11 Dec 2018 13:31:40 -0500 Subject: [PATCH 1075/2608] dm thin: send event about thin-pool state change _after_ making it commit f6c367585d0d851349d3a9e607c43e5bea993fa1 upstream. Sending a DM event before a thin-pool state change is about to happen is a bug. It wasn't realized until it became clear that userspace response to the event raced with the actual state change that the event was meant to notify about. Fix this by first updating internal thin-pool state to reflect what the DM event is being issued about. This fixes a long-standing racey/buggy userspace device-mapper-test-suite 'resize_io' test that would get an event but not find the state it was looking for -- so it would just go on to hang because no other events caused the test to reevaluate the thin-pool's state. Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin.c | 68 +++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 699c40c7fe60..da98fc7b995c 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -195,7 +195,7 @@ static void throttle_unlock(struct throttle *t) struct dm_thin_new_mapping; /* - * The pool runs in 4 modes. Ordered in degraded order for comparisons. + * The pool runs in various modes. Ordered in degraded order for comparisons. */ enum pool_mode { PM_WRITE, /* metadata may be changed */ @@ -281,9 +281,38 @@ struct pool { struct dm_bio_prison_cell **cell_sort_array; }; -static enum pool_mode get_pool_mode(struct pool *pool); static void metadata_operation_failed(struct pool *pool, const char *op, int r); +static enum pool_mode get_pool_mode(struct pool *pool) +{ + return pool->pf.mode; +} + +static void notify_of_pool_mode_change(struct pool *pool) +{ + const char *descs[] = { + "write", + "out-of-data-space", + "read-only", + "read-only", + "fail" + }; + const char *extra_desc = NULL; + enum pool_mode mode = get_pool_mode(pool); + + if (mode == PM_OUT_OF_DATA_SPACE) { + if (!pool->pf.error_if_no_space) + extra_desc = " (queue IO)"; + else + extra_desc = " (error IO)"; + } + + dm_table_event(pool->ti->table); + DMINFO("%s: switching pool to %s%s mode", + dm_device_name(pool->pool_md), + descs[(int)mode], extra_desc ? : ""); +} + /* * Target context for a pool. */ @@ -2362,8 +2391,6 @@ static void do_waker(struct work_struct *ws) queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); } -static void notify_of_pool_mode_change_to_oods(struct pool *pool); - /* * We're holding onto IO to allow userland time to react. After the * timeout either the pool will have been resized (and thus back in @@ -2376,7 +2403,7 @@ static void do_no_space_timeout(struct work_struct *ws) if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) { pool->pf.error_if_no_space = true; - notify_of_pool_mode_change_to_oods(pool); + notify_of_pool_mode_change(pool); error_retry_list_with_code(pool, BLK_STS_NOSPC); } } @@ -2444,26 +2471,6 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *)) /*----------------------------------------------------------------*/ -static enum pool_mode get_pool_mode(struct pool *pool) -{ - return pool->pf.mode; -} - -static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode) -{ - dm_table_event(pool->ti->table); - DMINFO("%s: switching pool to %s mode", - dm_device_name(pool->pool_md), new_mode); -} - -static void notify_of_pool_mode_change_to_oods(struct pool *pool) -{ - if (!pool->pf.error_if_no_space) - notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)"); - else - notify_of_pool_mode_change(pool, "out-of-data-space (error IO)"); -} - static bool passdown_enabled(struct pool_c *pt) { return pt->adjusted_pf.discard_passdown; @@ -2512,8 +2519,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) switch (new_mode) { case PM_FAIL: - if (old_mode != new_mode) - notify_of_pool_mode_change(pool, "failure"); dm_pool_metadata_read_only(pool->pmd); pool->process_bio = process_bio_fail; pool->process_discard = process_bio_fail; @@ -2527,8 +2532,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) case PM_OUT_OF_METADATA_SPACE: case PM_READ_ONLY: - if (!is_read_only_pool_mode(old_mode)) - notify_of_pool_mode_change(pool, "read-only"); dm_pool_metadata_read_only(pool->pmd); pool->process_bio = process_bio_read_only; pool->process_discard = process_bio_success; @@ -2549,8 +2552,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) * alarming rate. Adjust your low water mark if you're * frequently seeing this mode. */ - if (old_mode != new_mode) - notify_of_pool_mode_change_to_oods(pool); pool->out_of_data_space = true; pool->process_bio = process_bio_read_only; pool->process_discard = process_discard_bio; @@ -2563,8 +2564,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) break; case PM_WRITE: - if (old_mode != new_mode) - notify_of_pool_mode_change(pool, "write"); if (old_mode == PM_OUT_OF_DATA_SPACE) cancel_delayed_work_sync(&pool->no_space_timeout); pool->out_of_data_space = false; @@ -2584,6 +2583,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) * doesn't cause an unexpected mode transition on resume. */ pt->adjusted_pf.mode = new_mode; + + if (old_mode != new_mode) + notify_of_pool_mode_change(pool); } static void abort_transaction(struct pool *pool) From d17cc664f1ebc528b11a8db74c5b84910bf4ab14 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 9 Nov 2018 11:56:03 -0500 Subject: [PATCH 1076/2608] dm cache metadata: verify cache has blocks in blocks_are_clean_separate_dirty() commit 687cf4412a343a63928a5c9d91bdc0f522939d43 upstream. Otherwise dm_bitset_cursor_begin() return -ENODATA. Other calls to dm_bitset_cursor_begin() have similar negative checks. Fixes inability to create a cache in passthrough mode (even though doing so makes no sense). Fixes: 0d963b6e65 ("dm cache metadata: fix metadata2 format's blocks_are_clean_separate_dirty") Cc: stable@vger.kernel.org Reported-by: David Teigland Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-metadata.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 7f1c64c4ad24..bc60db87e6f1 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -929,6 +929,10 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd, bool dirty_flag; *result = true; + if (from_cblock(cmd->cache_blocks) == 0) + /* Nothing to do */ + return 0; + r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root, from_cblock(cmd->cache_blocks), &cmd->dirty_cursor); if (r) { From b2e08ad9d4cb0d2c6776ff0a73903084ddc7588b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 9 Dec 2018 21:17:30 -0500 Subject: [PATCH 1077/2608] tracing: Fix memory leak in set_trigger_filter() commit 3cec638b3d793b7cacdec5b8072364b41caeb0e1 upstream. When create_event_filter() fails in set_trigger_filter(), the filter may still be allocated and needs to be freed. The caller expects the data->filter to be updated with the new filter, even if the new filter failed (we could add an error message by setting set_str parameter of create_event_filter(), but that's another update). But because the error would just exit, filter was left hanging and nothing could free it. Found by kmemleak detector. Cc: stable@vger.kernel.org Fixes: bac5fb97a173a ("tracing: Add and use generic set_trigger_filter() implementation") Reviewed-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_trigger.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 43254c5e7e16..e2da180ca172 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -744,8 +744,10 @@ int set_trigger_filter(char *filter_str, /* The filter is for the 'trigger' event, not the triggered event */ ret = create_event_filter(file->event_call, filter_str, false, &filter); - if (ret) - goto out; + /* + * If create_event_filter() fails, filter still needs to be freed. + * Which the calling code will do with data->filter. + */ assign: tmp = rcu_access_pointer(data->filter); From a28d5f3d9c08a160801754804490dbac4ef9e2e1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 10 Dec 2018 23:58:01 -0500 Subject: [PATCH 1078/2608] tracing: Fix memory leak of instance function hash filters commit 2840f84f74035e5a535959d5f17269c69fa6edc5 upstream. The following commands will cause a memory leak: # cd /sys/kernel/tracing # mkdir instances/foo # echo schedule > instance/foo/set_ftrace_filter # rmdir instances/foo The reason is that the hashes that hold the filters to set_ftrace_filter and set_ftrace_notrace are not freed if they contain any data on the instance and the instance is removed. Found by kmemleak detector. Cc: stable@vger.kernel.org Fixes: 591dffdade9f ("ftrace: Allow for function tracing instance to filter functions") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7379bcf3baa0..9937d7cf2a64 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5534,6 +5534,7 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops) if (ops->flags & FTRACE_OPS_FL_ENABLED) ftrace_shutdown(ops, 0); ops->flags |= FTRACE_OPS_FL_DELETED; + ftrace_free_filter(ops); mutex_unlock(&ftrace_lock); } From e89aa818353bccf626c317f0dee0d1cd7cdc6772 Mon Sep 17 00:00:00 2001 From: Radu Rendec Date: Tue, 27 Nov 2018 22:20:48 -0500 Subject: [PATCH 1079/2608] powerpc/msi: Fix NULL pointer access in teardown code commit 78e7b15e17ac175e7eed9e21c6f92d03d3b0a6fa upstream. The arch_teardown_msi_irqs() function assumes that controller ops pointers were already checked in arch_setup_msi_irqs(), but this assumption is wrong: arch_teardown_msi_irqs() can be called even when arch_setup_msi_irqs() returns an error (-ENOSYS). This can happen in the following scenario: - msi_capability_init() calls pci_msi_setup_msi_irqs() - pci_msi_setup_msi_irqs() returns -ENOSYS - msi_capability_init() notices the error and calls free_msi_irqs() - free_msi_irqs() calls pci_msi_teardown_msi_irqs() This is easier to see when CONFIG_PCI_MSI_IRQ_DOMAIN is not set and pci_msi_setup_msi_irqs() and pci_msi_teardown_msi_irqs() are just aliases to arch_setup_msi_irqs() and arch_teardown_msi_irqs(). The call to free_msi_irqs() upon pci_msi_setup_msi_irqs() failure seems legit, as it does additional cleanup; e.g. list_del(&entry->list) and kfree(entry) inside free_msi_irqs() do happen (MSI descriptors are allocated before pci_msi_setup_msi_irqs() is called and need to be cleaned up if that fails). Fixes: 6b2fd7efeb88 ("PCI/MSI/PPC: Remove arch_msi_check_device()") Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Radu Rendec Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/msi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c index dab616a33b8d..f2197654be07 100644 --- a/arch/powerpc/kernel/msi.c +++ b/arch/powerpc/kernel/msi.c @@ -34,5 +34,10 @@ void arch_teardown_msi_irqs(struct pci_dev *dev) { struct pci_controller *phb = pci_bus_to_host(dev->bus); - phb->controller_ops.teardown_msi_irqs(dev); + /* + * We can be called even when arch_setup_msi_irqs() returns -ENOSYS, + * so check the pointer again. + */ + if (phb->controller_ops.teardown_msi_irqs) + phb->controller_ops.teardown_msi_irqs(dev); } From d0a954cbfbd09c9106192f880dbe52bf448f11a8 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 11 Dec 2018 18:56:20 -0500 Subject: [PATCH 1080/2608] drm/nouveau/kms: Fix memory leak in nv50_mstm_del() commit 24199c5436f267399afed0c4f1f57663c0408f57 upstream. Noticed this while working on redoing the reference counting scheme in the DP MST helpers. Nouveau doesn't attempt to call drm_dp_mst_topology_mgr_destroy() at all, which leaves it leaking all of the resources for drm_dp_mst_topology_mgr and it's children mstbs+ports. Fixes: f479c0ba4a17 ("drm/nouveau/kms/nv50: initial support for DP 1.2 multi-stream") Signed-off-by: Lyude Paul Cc: # v4.10+ Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nv50_display.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 926ec51ba5be..25f6a1f6ce20 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -3378,6 +3378,7 @@ nv50_mstm_del(struct nv50_mstm **pmstm) { struct nv50_mstm *mstm = *pmstm; if (mstm) { + drm_dp_mst_topology_mgr_destroy(&mstm->mgr); kfree(*pmstm); *pmstm = NULL; } From 8b1bdb94166862ac57b5a1f39d8e293265326e58 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 5 Dec 2018 10:16:57 -0800 Subject: [PATCH 1081/2608] Revert "drm/rockchip: Allow driver to be shutdown on reboot/kexec" commit 63238173b2faf3d6b85a416f1c69af6c7be2413f upstream. This reverts commit 7f3ef5dedb146e3d5063b6845781ad1bb59b92b5. It causes new warnings [1] on shutdown when running the Google Kevin or Scarlet (RK3399) boards under Chrome OS. Presumably our usage of DRM is different than what Marc and Heiko test. We're looking at a different approach (e.g., [2]) to replace this, but IMO the revert should be taken first, as it already propagated to -stable. [1] Report here: http://lkml.kernel.org/lkml/20181205030127.GA200921@google.com WARNING: CPU: 4 PID: 2035 at drivers/gpu/drm/drm_mode_config.c:477 drm_mode_config_cleanup+0x1c4/0x294 ... Call trace: drm_mode_config_cleanup+0x1c4/0x294 rockchip_drm_unbind+0x4c/0x8c component_master_del+0x88/0xb8 rockchip_drm_platform_remove+0x2c/0x44 rockchip_drm_platform_shutdown+0x20/0x2c platform_drv_shutdown+0x2c/0x38 device_shutdown+0x164/0x1b8 kernel_restart_prepare+0x40/0x48 kernel_restart+0x20/0x68 ... Memory manager not clean during takedown. WARNING: CPU: 4 PID: 2035 at drivers/gpu/drm/drm_mm.c:950 drm_mm_takedown+0x34/0x44 ... drm_mm_takedown+0x34/0x44 rockchip_drm_unbind+0x64/0x8c component_master_del+0x88/0xb8 rockchip_drm_platform_remove+0x2c/0x44 rockchip_drm_platform_shutdown+0x20/0x2c platform_drv_shutdown+0x2c/0x38 device_shutdown+0x164/0x1b8 kernel_restart_prepare+0x40/0x48 kernel_restart+0x20/0x68 ... [2] https://patchwork.kernel.org/patch/10556151/ https://www.spinics.net/lists/linux-rockchip/msg21342.html [PATCH] drm/rockchip: shutdown drm subsystem on shutdown Fixes: 7f3ef5dedb14 ("drm/rockchip: Allow driver to be shutdown on reboot/kexec") Cc: Jeffy Chen Cc: Robin Murphy Cc: Vicente Bergas Cc: Marc Zyngier Cc: Heiko Stuebner Cc: stable@vger.kernel.org Signed-off-by: Brian Norris Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20181205181657.177703-1-briannorris@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index 4cacb03f6733..ff3d0f5efbb1 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -425,11 +425,6 @@ static int rockchip_drm_platform_remove(struct platform_device *pdev) return 0; } -static void rockchip_drm_platform_shutdown(struct platform_device *pdev) -{ - rockchip_drm_platform_remove(pdev); -} - static const struct of_device_id rockchip_drm_dt_ids[] = { { .compatible = "rockchip,display-subsystem", }, { /* sentinel */ }, @@ -439,7 +434,6 @@ MODULE_DEVICE_TABLE(of, rockchip_drm_dt_ids); static struct platform_driver rockchip_drm_platform_driver = { .probe = rockchip_drm_platform_probe, .remove = rockchip_drm_platform_remove, - .shutdown = rockchip_drm_platform_shutdown, .driver = { .name = "rockchip-drm", .of_match_table = rockchip_drm_dt_ids, From 8a20046277e31905a1be1dc159f81668bc871d8c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 6 Dec 2018 08:44:31 +0000 Subject: [PATCH 1082/2608] drm/i915/execlists: Apply a full mb before execution for Braswell commit cf66b8a0ba142fbd1bf10ac8f3ae92d1b0cb7b8f upstream. Braswell is really picky about having our writes posted to memory before we execute or else the GPU may see stale values. A wmb() is insufficient as it only ensures the writes are visible to other cores, we need a full mb() to ensure the writes are in memory and visible to the GPU. The most frequent failure in flushing before execution is that we see stale PTE values and execute the wrong pages. References: 987abd5c62f9 ("drm/i915/execlists: Force write serialisation into context image vs execution") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20181206084431.9805-3-chris@chris-wilson.co.uk (cherry picked from commit 490b8c65b9db45896769e1095e78725775f47b3e) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lrc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0775e71ea95b..e0483c068d23 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -343,8 +343,13 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) * may not be visible to the HW prior to the completion of the UC * register write and that we may begin execution from the context * before its image is complete leading to invalid PD chasing. + * + * Furthermore, Braswell, at least, wants a full mb to be sure that + * the writes are coherent in memory (visible to the GPU) prior to + * execution, and not just visible to other CPUs (as is the result of + * wmb). */ - wmb(); + mb(); return ce->lrc_desc; } From c8626858b2d873e2528d3099b896459947996247 Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Fri, 7 Dec 2018 15:15:03 +0800 Subject: [PATCH 1083/2608] drm/amdgpu: update SMC firmware image for polaris10 variants commit d55d8be0747c96db28a1d08fc24d22ccd9b448ac upstream. Some new variants require different firmwares. Signed-off-by: Junwei Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index e8d9479615c9..bb4b804255a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -723,7 +723,8 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, (adev->pdev->revision == 0xe7) || (adev->pdev->revision == 0xef))) || ((adev->pdev->device == 0x6fdf) && - (adev->pdev->revision == 0xef))) { + ((adev->pdev->revision == 0xef) || + (adev->pdev->revision == 0xff)))) { info->is_kicker = true; strcpy(fw_name, "amdgpu/polaris10_k_smc.bin"); } else From f02ef68bdad39ebf881f64cab027e8696fe57ade Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 5 Dec 2018 15:27:19 +0900 Subject: [PATCH 1084/2608] x86/build: Fix compiler support check for CONFIG_RETPOLINE commit 25896d073d8a0403b07e6dec56f58e6c33678207 upstream. It is troublesome to add a diagnostic like this to the Makefile parse stage because the top-level Makefile could be parsed with a stale include/config/auto.conf. Once you are hit by the error about non-retpoline compiler, the compilation still breaks even after disabling CONFIG_RETPOLINE. The easiest fix is to move this check to the "archprepare" like this commit did: 829fe4aa9ac1 ("x86: Allow generating user-space headers without a compiler") Reported-by: Meelis Roos Tested-by: Meelis Roos Signed-off-by: Masahiro Yamada Acked-by: Zhenzhong Duan Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Zhenzhong Duan Fixes: 4cd24de3a098 ("x86/retpoline: Make CONFIG_RETPOLINE depend on compiler support") Link: http://lkml.kernel.org/r/1543991239-18476-1-git-send-email-yamada.masahiro@socionext.com Link: https://lkml.org/lkml/2018/12/4/206 Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Cc: Gi-Oh Kim Signed-off-by: Greg Kroah-Hartman --- arch/x86/Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index ce3658dd98e8..c5290aecdf06 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -241,9 +241,6 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre ifdef CONFIG_RETPOLINE -ifeq ($(RETPOLINE_CFLAGS),) - $(error You are building kernel with non-retpoline compiler, please update your compiler.) -endif KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) endif @@ -260,6 +257,13 @@ archprepare: ifeq ($(CONFIG_KEXEC_FILE),y) $(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c endif +ifdef CONFIG_RETPOLINE +ifeq ($(RETPOLINE_CFLAGS),) + @echo "You are building kernel with non-retpoline compiler." >&2 + @echo "Please update your compiler." >&2 + @false +endif +endif ### # Kernel objects From a9febd662cee68712aea69012944df00033d2dd8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 18 Dec 2018 18:13:51 +0100 Subject: [PATCH 1085/2608] locking: Remove smp_read_barrier_depends() from queued_spin_lock_slowpath() commit 548095dea63ffc016d39c35b32c628d033638aca upstream. Queued spinlocks are not used by DEC Alpha, and furthermore operations such as READ_ONCE() and release/relaxed RMW atomics are being changed to imply smp_read_barrier_depends(). This commit therefore removes the now-redundant smp_read_barrier_depends() from queued_spin_lock_slowpath(), and adjusts the comments accordingly. Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- kernel/locking/qspinlock.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 50dc42aeaa56..5541acb79e15 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -170,7 +170,7 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) * @tail : The new queue tail code word * Return: The previous queue tail code word * - * xchg(lock, tail) + * xchg(lock, tail), which heads an address dependency * * p,*,* -> n,*,* ; prev = xchg(lock, node) */ @@ -417,13 +417,11 @@ queue: if (old & _Q_TAIL_MASK) { prev = decode_tail(old); /* - * The above xchg_tail() is also a load of @lock which generates, - * through decode_tail(), a pointer. - * - * The address dependency matches the RELEASE of xchg_tail() - * such that the access to @prev must happen after. + * The above xchg_tail() is also a load of @lock which + * generates, through decode_tail(), a pointer. The address + * dependency matches the RELEASE of xchg_tail() such that + * the subsequent access to @prev happens after. */ - smp_read_barrier_depends(); WRITE_ONCE(prev->next, node); From 13f14c36323d71e2178d7d97dc65808c0e5c872e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Dec 2018 18:13:52 +0100 Subject: [PATCH 1086/2608] locking/qspinlock: Ensure node is initialised before updating prev->next commit 95bcade33a8af38755c9b0636e36a36ad3789fe6 upstream. When a locker ends up queuing on the qspinlock locking slowpath, we initialise the relevant mcs node and publish it indirectly by updating the tail portion of the lock word using xchg_tail. If we find that there was a pre-existing locker in the queue, we subsequently update their ->next field to point at our node so that we are notified when it's our turn to take the lock. This can be roughly illustrated as follows: /* Initialise the fields in node and encode a pointer to node in tail */ tail = initialise_node(node); /* * Exchange tail into the lockword using an atomic read-modify-write * operation with release semantics */ old = xchg_tail(lock, tail); /* If there was a pre-existing waiter ... */ if (old & _Q_TAIL_MASK) { prev = decode_tail(old); smp_read_barrier_depends(); /* ... then update their ->next field to point to node. WRITE_ONCE(prev->next, node); } The conditional update of prev->next therefore relies on the address dependency from the result of xchg_tail ensuring order against the prior initialisation of node. However, since the release semantics of the xchg_tail operation apply only to the write portion of the RmW, then this ordering is not guaranteed and it is possible for the CPU to return old before the writes to node have been published, consequently allowing us to point prev->next to an uninitialised node. This patch fixes the problem by making the update of prev->next a RELEASE operation, which also removes the reliance on dependency ordering. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1518528177-19169-2-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- kernel/locking/qspinlock.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 5541acb79e15..d880296245c5 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -416,14 +416,15 @@ queue: */ if (old & _Q_TAIL_MASK) { prev = decode_tail(old); - /* - * The above xchg_tail() is also a load of @lock which - * generates, through decode_tail(), a pointer. The address - * dependency matches the RELEASE of xchg_tail() such that - * the subsequent access to @prev happens after. - */ - WRITE_ONCE(prev->next, node); + /* + * We must ensure that the stores to @node are observed before + * the write to prev->next. The address dependency from + * xchg_tail is not sufficient to ensure this because the read + * component of xchg_tail is unordered with respect to the + * initialisation of @node. + */ + smp_store_release(&prev->next, node); pv_wait_node(node, prev); arch_mcs_spin_lock_contended(&node->locked); From 3dab30f33814dc0b97c001921d3d724004c09b5f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Dec 2018 18:13:53 +0100 Subject: [PATCH 1087/2608] locking/qspinlock: Bound spinning on pending->locked transition in slowpath commit 6512276d97b160d90b53285bd06f7f201459a7e3 upstream. If a locker taking the qspinlock slowpath reads a lock value indicating that only the pending bit is set, then it will spin whilst the concurrent pending->locked transition takes effect. Unfortunately, there is no guarantee that such a transition will ever be observed since concurrent lockers could continuously set pending and hand over the lock amongst themselves, leading to starvation. Whilst this would probably resolve in practice, it means that it is not possible to prove liveness properties about the lock and means that lock acquisition time is unbounded. Rather than removing the pending->locked spinning from the slowpath altogether (which has been shown to heavily penalise a 2-threaded locking stress test on x86), this patch replaces the explicit spinning with a call to atomic_cond_read_relaxed and allows the architecture to provide a bound on the number of spins. For architectures that can respond to changes in cacheline state in their smp_cond_load implementation, it should be sufficient to use the default bound of 1. Suggested-by: Waiman Long Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.feng@gmail.com Cc: linux-arm-kernel@lists.infradead.org Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-4-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- kernel/locking/qspinlock.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index d880296245c5..18161264227a 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -76,6 +76,18 @@ #define MAX_NODES 4 #endif +/* + * The pending bit spinning loop count. + * This heuristic is used to limit the number of lockword accesses + * made by atomic_cond_read_relaxed when waiting for the lock to + * transition out of the "== _Q_PENDING_VAL" state. We don't spin + * indefinitely because there's no guarantee that we'll make forward + * progress. + */ +#ifndef _Q_PENDING_LOOPS +#define _Q_PENDING_LOOPS 1 +#endif + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. @@ -306,13 +318,15 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) return; /* - * wait for in-progress pending->locked hand-overs + * Wait for in-progress pending->locked hand-overs with a bounded + * number of spins so that we guarantee forward progress. * * 0,1,0 -> 0,0,1 */ if (val == _Q_PENDING_VAL) { - while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL) - cpu_relax(); + int cnt = _Q_PENDING_LOOPS; + val = smp_cond_load_acquire(&lock->val.counter, + (VAL != _Q_PENDING_VAL) || !cnt--); } /* From 5261ad70e2d673b9cfea7c81f309cabc847b7a27 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Dec 2018 18:13:54 +0100 Subject: [PATCH 1088/2608] locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock' commit 625e88be1f41b53cec55827c984e4a89ea8ee9f9 upstream. 'struct __qspinlock' provides a handy union of fields so that subcomponents of the lockword can be accessed by name, without having to manage shifts and masks explicitly and take endianness into account. This is useful in qspinlock.h and also potentially in arch headers, so move the 'struct __qspinlock' into 'struct qspinlock' and kill the extra definition. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Acked-by: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-3-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- arch/x86/include/asm/qspinlock.h | 2 +- arch/x86/include/asm/qspinlock_paravirt.h | 3 +- include/asm-generic/qspinlock_types.h | 32 +++++++++++++++- kernel/locking/qspinlock.c | 46 ++--------------------- kernel/locking/qspinlock_paravirt.h | 34 ++++++----------- 5 files changed, 46 insertions(+), 71 deletions(-) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 9982dd96f093..cf4cdf508ef4 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -15,7 +15,7 @@ */ static inline void native_queued_spin_unlock(struct qspinlock *lock) { - smp_store_release((u8 *)lock, 0); + smp_store_release(&lock->locked, 0); } #ifdef CONFIG_PARAVIRT_SPINLOCKS diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 923307ea11c7..9ef5ee03d2d7 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -22,8 +22,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); * * void __pv_queued_spin_unlock(struct qspinlock *lock) * { - * struct __qspinlock *l = (void *)lock; - * u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0); + * u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0); * * if (likely(lockval == _Q_LOCKED_VAL)) * return; diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h index 034acd0c4956..0763f065b975 100644 --- a/include/asm-generic/qspinlock_types.h +++ b/include/asm-generic/qspinlock_types.h @@ -29,13 +29,41 @@ #endif typedef struct qspinlock { - atomic_t val; + union { + atomic_t val; + + /* + * By using the whole 2nd least significant byte for the + * pending bit, we can allow better optimization of the lock + * acquisition for the pending bit holder. + */ +#ifdef __LITTLE_ENDIAN + struct { + u8 locked; + u8 pending; + }; + struct { + u16 locked_pending; + u16 tail; + }; +#else + struct { + u16 tail; + u16 locked_pending; + }; + struct { + u8 reserved[2]; + u8 pending; + u8 locked; + }; +#endif + }; } arch_spinlock_t; /* * Initializier */ -#define __ARCH_SPIN_LOCK_UNLOCKED { ATOMIC_INIT(0) } +#define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } /* * Bitfields in the atomic value: diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 18161264227a..e60e618287b4 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -126,40 +126,6 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail) #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) -/* - * By using the whole 2nd least significant byte for the pending bit, we - * can allow better optimization of the lock acquisition for the pending - * bit holder. - * - * This internal structure is also used by the set_locked function which - * is not restricted to _Q_PENDING_BITS == 8. - */ -struct __qspinlock { - union { - atomic_t val; -#ifdef __LITTLE_ENDIAN - struct { - u8 locked; - u8 pending; - }; - struct { - u16 locked_pending; - u16 tail; - }; -#else - struct { - u16 tail; - u16 locked_pending; - }; - struct { - u8 reserved[2]; - u8 pending; - u8 locked; - }; -#endif - }; -}; - #if _Q_PENDING_BITS == 8 /** * clear_pending_set_locked - take ownership and clear the pending bit. @@ -171,9 +137,7 @@ struct __qspinlock { */ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) { - struct __qspinlock *l = (void *)lock; - - WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL); + WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL); } /* @@ -188,13 +152,11 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) */ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) { - struct __qspinlock *l = (void *)lock; - /* * Use release semantics to make sure that the MCS node is properly * initialized before changing the tail code. */ - return (u32)xchg_release(&l->tail, + return (u32)xchg_release(&lock->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; } @@ -249,9 +211,7 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) */ static __always_inline void set_locked(struct qspinlock *lock) { - struct __qspinlock *l = (void *)lock; - - WRITE_ONCE(l->locked, _Q_LOCKED_VAL); + WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); } diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 15b6a39366c6..1435ba7954c3 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -70,10 +70,8 @@ struct pv_node { #define queued_spin_trylock(l) pv_queued_spin_steal_lock(l) static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock) { - struct __qspinlock *l = (void *)lock; - if (!(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) && - (cmpxchg_acquire(&l->locked, 0, _Q_LOCKED_VAL) == 0)) { + (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) { qstat_inc(qstat_pv_lock_stealing, true); return true; } @@ -88,16 +86,12 @@ static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock) #if _Q_PENDING_BITS == 8 static __always_inline void set_pending(struct qspinlock *lock) { - struct __qspinlock *l = (void *)lock; - - WRITE_ONCE(l->pending, 1); + WRITE_ONCE(lock->pending, 1); } static __always_inline void clear_pending(struct qspinlock *lock) { - struct __qspinlock *l = (void *)lock; - - WRITE_ONCE(l->pending, 0); + WRITE_ONCE(lock->pending, 0); } /* @@ -107,10 +101,8 @@ static __always_inline void clear_pending(struct qspinlock *lock) */ static __always_inline int trylock_clear_pending(struct qspinlock *lock) { - struct __qspinlock *l = (void *)lock; - - return !READ_ONCE(l->locked) && - (cmpxchg_acquire(&l->locked_pending, _Q_PENDING_VAL, + return !READ_ONCE(lock->locked) && + (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL, _Q_LOCKED_VAL) == _Q_PENDING_VAL); } #else /* _Q_PENDING_BITS == 8 */ @@ -355,7 +347,6 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) { struct pv_node *pn = (struct pv_node *)node; - struct __qspinlock *l = (void *)lock; /* * If the vCPU is indeed halted, advance its state to match that of @@ -384,7 +375,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) * the hash table later on at unlock time, no atomic instruction is * needed. */ - WRITE_ONCE(l->locked, _Q_SLOW_VAL); + WRITE_ONCE(lock->locked, _Q_SLOW_VAL); (void)pv_hash(lock, pn); } @@ -399,7 +390,6 @@ static u32 pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) { struct pv_node *pn = (struct pv_node *)node; - struct __qspinlock *l = (void *)lock; struct qspinlock **lp = NULL; int waitcnt = 0; int loop; @@ -450,13 +440,13 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) * * Matches the smp_rmb() in __pv_queued_spin_unlock(). */ - if (xchg(&l->locked, _Q_SLOW_VAL) == 0) { + if (xchg(&lock->locked, _Q_SLOW_VAL) == 0) { /* * The lock was free and now we own the lock. * Change the lock value back to _Q_LOCKED_VAL * and unhash the table. */ - WRITE_ONCE(l->locked, _Q_LOCKED_VAL); + WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); WRITE_ONCE(*lp, NULL); goto gotlock; } @@ -464,7 +454,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) WRITE_ONCE(pn->state, vcpu_hashed); qstat_inc(qstat_pv_wait_head, true); qstat_inc(qstat_pv_wait_again, waitcnt); - pv_wait(&l->locked, _Q_SLOW_VAL); + pv_wait(&lock->locked, _Q_SLOW_VAL); /* * Because of lock stealing, the queue head vCPU may not be @@ -489,7 +479,6 @@ gotlock: __visible void __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) { - struct __qspinlock *l = (void *)lock; struct pv_node *node; if (unlikely(locked != _Q_SLOW_VAL)) { @@ -518,7 +507,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) * Now that we have a reference to the (likely) blocked pv_node, * release the lock. */ - smp_store_release(&l->locked, 0); + smp_store_release(&lock->locked, 0); /* * At this point the memory pointed at by lock can be freed/reused, @@ -544,7 +533,6 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) #ifndef __pv_queued_spin_unlock __visible void __pv_queued_spin_unlock(struct qspinlock *lock) { - struct __qspinlock *l = (void *)lock; u8 locked; /* @@ -552,7 +540,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) * unhash. Otherwise it would be possible to have multiple @lock * entries, which would be BAD. */ - locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0); + locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0); if (likely(locked == _Q_LOCKED_VAL)) return; From 7a617996cba577806960e87882e60d7444d01df4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Dec 2018 18:13:55 +0100 Subject: [PATCH 1089/2608] locking/qspinlock: Remove unbounded cmpxchg() loop from locking slowpath commit 59fb586b4a07b4e1a0ee577140ab4842ba451acd upstream. The qspinlock locking slowpath utilises a "pending" bit as a simple form of an embedded test-and-set lock that can avoid the overhead of explicit queuing in cases where the lock is held but uncontended. This bit is managed using a cmpxchg() loop which tries to transition the uncontended lock word from (0,0,0) -> (0,0,1) or (0,0,1) -> (0,1,1). Unfortunately, the cmpxchg() loop is unbounded and lockers can be starved indefinitely if the lock word is seen to oscillate between unlocked (0,0,0) and locked (0,0,1). This could happen if concurrent lockers are able to take the lock in the cmpxchg() loop without queuing and pass it around amongst themselves. This patch fixes the problem by unconditionally setting _Q_PENDING_VAL using atomic_fetch_or, and then inspecting the old value to see whether we need to spin on the current lock owner, or whether we now effectively hold the lock. The tricky scenario is when concurrent lockers end up queuing on the lock and the lock becomes available, causing us to see a lockword of (n,0,0). With pending now set, simply queuing could lead to deadlock as the head of the queue may not have observed the pending flag being cleared. Conversely, if the head of the queue did observe pending being cleared, then it could transition the lock from (n,0,0) -> (0,0,1) meaning that any attempt to "undo" our setting of the pending bit could race with a concurrent locker trying to set it. We handle this race by preserving the pending bit when taking the lock after reaching the head of the queue and leaving the tail entry intact if we saw pending set, because we know that the tail is going to be updated shortly. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.feng@gmail.com Cc: linux-arm-kernel@lists.infradead.org Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-6-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- kernel/locking/qspinlock.c | 102 ++++++++++++++++------------ kernel/locking/qspinlock_paravirt.h | 5 -- 2 files changed, 58 insertions(+), 49 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index e60e618287b4..7bd053e528c2 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -127,6 +127,17 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail) #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) #if _Q_PENDING_BITS == 8 +/** + * clear_pending - clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,* -> *,0,* + */ +static __always_inline void clear_pending(struct qspinlock *lock) +{ + WRITE_ONCE(lock->pending, 0); +} + /** * clear_pending_set_locked - take ownership and clear the pending bit. * @lock: Pointer to queued spinlock structure @@ -162,6 +173,17 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) #else /* _Q_PENDING_BITS == 8 */ +/** + * clear_pending - clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,* -> *,0,* + */ +static __always_inline void clear_pending(struct qspinlock *lock) +{ + atomic_andnot(_Q_PENDING_VAL, &lock->val); +} + /** * clear_pending_set_locked - take ownership and clear the pending bit. * @lock: Pointer to queued spinlock structure @@ -266,7 +288,7 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { struct mcs_spinlock *prev, *next, *node; - u32 new, old, tail; + u32 old, tail; int idx; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); @@ -289,59 +311,51 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) (VAL != _Q_PENDING_VAL) || !cnt--); } + /* + * If we observe any contention; queue. + */ + if (val & ~_Q_LOCKED_MASK) + goto queue; + /* * trylock || pending * * 0,0,0 -> 0,0,1 ; trylock * 0,0,1 -> 0,1,1 ; pending */ - for (;;) { + val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); + if (!(val & ~_Q_LOCKED_MASK)) { /* - * If we observe any contention; queue. + * We're pending, wait for the owner to go away. + * + * *,1,1 -> *,1,0 + * + * this wait loop must be a load-acquire such that we match the + * store-release that clears the locked bit and create lock + * sequentiality; this is because not all + * clear_pending_set_locked() implementations imply full + * barriers. */ - if (val & ~_Q_LOCKED_MASK) - goto queue; - - new = _Q_LOCKED_VAL; - if (val == new) - new |= _Q_PENDING_VAL; + if (val & _Q_LOCKED_MASK) { + smp_cond_load_acquire(&lock->val.counter, + !(VAL & _Q_LOCKED_MASK)); + } /* - * Acquire semantic is required here as the function may - * return immediately if the lock was free. + * take ownership and clear the pending bit. + * + * *,1,0 -> *,0,1 */ - old = atomic_cmpxchg_acquire(&lock->val, val, new); - if (old == val) - break; - - val = old; + clear_pending_set_locked(lock); + return; } /* - * we won the trylock + * If pending was clear but there are waiters in the queue, then + * we need to undo our setting of pending before we queue ourselves. */ - if (new == _Q_LOCKED_VAL) - return; - - /* - * we're pending, wait for the owner to go away. - * - * *,1,1 -> *,1,0 - * - * this wait loop must be a load-acquire such that we match the - * store-release that clears the locked bit and create lock - * sequentiality; this is because not all clear_pending_set_locked() - * implementations imply full barriers. - */ - smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK)); - - /* - * take ownership and clear the pending bit. - * - * *,1,0 -> *,0,1 - */ - clear_pending_set_locked(lock); - return; + if (!(val & _Q_PENDING_MASK)) + clear_pending(lock); /* * End of pending bit optimistic spinning and beginning of MCS @@ -445,15 +459,15 @@ locked: * claim the lock: * * n,0,0 -> 0,0,1 : lock, uncontended - * *,0,0 -> *,0,1 : lock, contended + * *,*,0 -> *,*,1 : lock, contended * - * If the queue head is the only one in the queue (lock value == tail), - * clear the tail code and grab the lock. Otherwise, we only need - * to grab the lock. + * If the queue head is the only one in the queue (lock value == tail) + * and nobody is pending, clear the tail code and grab the lock. + * Otherwise, we only need to grab the lock. */ for (;;) { /* In the PV case we might already have _Q_LOCKED_VAL set */ - if ((val & _Q_TAIL_MASK) != tail) { + if ((val & _Q_TAIL_MASK) != tail || (val & _Q_PENDING_MASK)) { set_locked(lock); break; } diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 1435ba7954c3..854443f7b60b 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -89,11 +89,6 @@ static __always_inline void set_pending(struct qspinlock *lock) WRITE_ONCE(lock->pending, 1); } -static __always_inline void clear_pending(struct qspinlock *lock) -{ - WRITE_ONCE(lock->pending, 0); -} - /* * The pending bit check in pv_queued_spin_steal_lock() isn't a memory * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the From 075703d79c3a0a603a69e09167f96c1a2502eabe Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Dec 2018 18:13:56 +0100 Subject: [PATCH 1090/2608] locking/qspinlock: Remove duplicate clear_pending() function from PV code commit 3bea9adc96842b8a7345c7fb202c16ae9c8d5b25 upstream. The native clear_pending() function is identical to the PV version, so the latter can simply be removed. This fixes the build for systems with >= 16K CPUs using the PV lock implementation. Reported-by: Waiman Long Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boqun.feng@gmail.com Cc: linux-arm-kernel@lists.infradead.org Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/20180427101619.GB21705@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- kernel/locking/qspinlock_paravirt.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 854443f7b60b..1e882dfc8b79 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -106,11 +106,6 @@ static __always_inline void set_pending(struct qspinlock *lock) atomic_or(_Q_PENDING_VAL, &lock->val); } -static __always_inline void clear_pending(struct qspinlock *lock) -{ - atomic_andnot(_Q_PENDING_VAL, &lock->val); -} - static __always_inline int trylock_clear_pending(struct qspinlock *lock) { int val = atomic_read(&lock->val); From f2f76a2c666e9b09fa1fb903bdb20c756dc90e21 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Dec 2018 18:13:57 +0100 Subject: [PATCH 1091/2608] locking/qspinlock: Kill cmpxchg() loop when claiming lock from head of queue commit c61da58d8a9ba9238250a548f00826eaf44af0f7 upstream. When a queued locker reaches the head of the queue, it claims the lock by setting _Q_LOCKED_VAL in the lockword. If there isn't contention, it must also clear the tail as part of this operation so that subsequent lockers can avoid taking the slowpath altogether. Currently this is expressed as a cmpxchg() loop that practically only runs up to two iterations. This is confusing to the reader and unhelpful to the compiler. Rewrite the cmpxchg() loop without the loop, so that a failed cmpxchg() implies that there is contention and we just need to write to _Q_LOCKED_VAL without considering the rest of the lockword. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.feng@gmail.com Cc: linux-arm-kernel@lists.infradead.org Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-7-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- kernel/locking/qspinlock.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 7bd053e528c2..841550dfb7b8 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -465,24 +465,21 @@ locked: * and nobody is pending, clear the tail code and grab the lock. * Otherwise, we only need to grab the lock. */ - for (;;) { - /* In the PV case we might already have _Q_LOCKED_VAL set */ - if ((val & _Q_TAIL_MASK) != tail || (val & _Q_PENDING_MASK)) { - set_locked(lock); - break; - } + + /* In the PV case we might already have _Q_LOCKED_VAL set */ + if ((val & _Q_TAIL_MASK) == tail) { /* * The smp_cond_load_acquire() call above has provided the - * necessary acquire semantics required for locking. At most - * two iterations of this loop may be ran. + * necessary acquire semantics required for locking. */ old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL); if (old == val) - goto release; /* No contention */ - - val = old; + goto release; /* No contention */ } + /* Either somebody is queued behind us or _Q_PENDING_VAL is set */ + set_locked(lock); + /* * contended path; wait for next if not observed yet, release. */ From 49849a651b8456263391ae767be6f7d02af8ef26 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Dec 2018 18:13:58 +0100 Subject: [PATCH 1092/2608] locking/qspinlock: Re-order code commit 53bf57fab7321fb42b703056a4c80fc9d986d170 upstream. Flip the branch condition after atomic_fetch_or_acquire(_Q_PENDING_VAL) such that we loose the indent. This also result in a more natural code flow IMO. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: andrea.parri@amarulasolutions.com Cc: longman@redhat.com Link: https://lkml.kernel.org/r/20181003130257.156322446@infradead.org Signed-off-by: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- kernel/locking/qspinlock.c | 54 ++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 841550dfb7b8..9ffc2f9af8b8 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -324,38 +324,36 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) * 0,0,1 -> 0,1,1 ; pending */ val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); - if (!(val & ~_Q_LOCKED_MASK)) { - /* - * We're pending, wait for the owner to go away. - * - * *,1,1 -> *,1,0 - * - * this wait loop must be a load-acquire such that we match the - * store-release that clears the locked bit and create lock - * sequentiality; this is because not all - * clear_pending_set_locked() implementations imply full - * barriers. - */ - if (val & _Q_LOCKED_MASK) { - smp_cond_load_acquire(&lock->val.counter, - !(VAL & _Q_LOCKED_MASK)); - } - - /* - * take ownership and clear the pending bit. - * - * *,1,0 -> *,0,1 - */ - clear_pending_set_locked(lock); - return; + /* + * If we observe any contention; undo and queue. + */ + if (unlikely(val & ~_Q_LOCKED_MASK)) { + if (!(val & _Q_PENDING_MASK)) + clear_pending(lock); + goto queue; } /* - * If pending was clear but there are waiters in the queue, then - * we need to undo our setting of pending before we queue ourselves. + * We're pending, wait for the owner to go away. + * + * 0,1,1 -> 0,1,0 + * + * this wait loop must be a load-acquire such that we match the + * store-release that clears the locked bit and create lock + * sequentiality; this is because not all + * clear_pending_set_locked() implementations imply full + * barriers. */ - if (!(val & _Q_PENDING_MASK)) - clear_pending(lock); + if (val & _Q_LOCKED_MASK) + smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK)); + + /* + * take ownership and clear the pending bit. + * + * 0,1,0 -> 0,0,1 + */ + clear_pending_set_locked(lock); + return; /* * End of pending bit optimistic spinning and beginning of MCS From 4e21502d37a503d4311e2bdb46a66aef783a7cfe Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Dec 2018 18:13:59 +0100 Subject: [PATCH 1093/2608] locking/qspinlock/x86: Increase _Q_PENDING_LOOPS upper bound commit b247be3fe89b6aba928bf80f4453d1c4ba8d2063 upstream. On x86, atomic_cond_read_relaxed will busy-wait with a cpu_relax() loop, so it is desirable to increase the number of times we spin on the qspinlock lockword when it is found to be transitioning from pending to locked. According to Waiman Long: | Ideally, the spinning times should be at least a few times the typical | cacheline load time from memory which I think can be down to 100ns or | so for each cacheline load with the newest systems or up to several | hundreds ns for older systems. which in his benchmarking corresponded to 512 iterations. Suggested-by: Waiman Long Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.feng@gmail.com Cc: linux-arm-kernel@lists.infradead.org Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-5-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- arch/x86/include/asm/qspinlock.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index cf4cdf508ef4..2cb6624acaec 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -6,6 +6,8 @@ #include #include +#define _Q_PENDING_LOOPS (1 << 9) + #define queued_spin_unlock queued_spin_unlock /** * queued_spin_unlock - release a queued spinlock From 5d01e063296a80e93abe93803602e5cf37309865 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Dec 2018 18:14:00 +0100 Subject: [PATCH 1094/2608] locking/qspinlock, x86: Provide liveness guarantee commit 7aa54be2976550f17c11a1c3e3630002dea39303 upstream. On x86 we cannot do fetch_or() with a single instruction and thus end up using a cmpxchg loop, this reduces determinism. Replace the fetch_or() with a composite operation: tas-pending + load. Using two instructions of course opens a window we previously did not have. Consider the scenario: CPU0 CPU1 CPU2 1) lock trylock -> (0,0,1) 2) lock trylock /* fail */ 3) unlock -> (0,0,0) 4) lock trylock -> (0,0,1) 5) tas-pending -> (0,1,1) load-val <- (0,1,0) from 3 6) clear-pending-set-locked -> (0,0,1) FAIL: _2_ owners where 5) is our new composite operation. When we consider each part of the qspinlock state as a separate variable (as we can when _Q_PENDING_BITS == 8) then the above is entirely possible, because tas-pending will only RmW the pending byte, so the later load is able to observe prior tail and lock state (but not earlier than its own trylock, which operates on the whole word, due to coherence). To avoid this we need 2 things: - the load must come after the tas-pending (obviously, otherwise it can trivially observe prior state). - the tas-pending must be a full word RmW instruction, it cannot be an XCHGB for example, such that we cannot observe other state prior to setting pending. On x86 we can realize this by using "LOCK BTS m32, r32" for tas-pending followed by a regular load. Note that observing later state is not a problem: - if we fail to observe a later unlock, we'll simply spin-wait for that store to become visible. - if we observe a later xchg_tail(), there is no difference from that xchg_tail() having taken place before the tas-pending. Suggested-by: Will Deacon Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Will Deacon Cc: Linus Torvalds Cc: Peter Zijlstra Cc: andrea.parri@amarulasolutions.com Cc: longman@redhat.com Fixes: 59fb586b4a07 ("locking/qspinlock: Remove unbounded cmpxchg() loop from locking slowpath") Link: https://lkml.kernel.org/r/20181003130957.183726335@infradead.org Signed-off-by: Ingo Molnar [bigeasy: GEN_BINARY_RMWcc macro redo] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- arch/x86/include/asm/qspinlock.h | 21 +++++++++++++++++++++ kernel/locking/qspinlock.c | 17 ++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 2cb6624acaec..f784b95e44df 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -5,9 +5,30 @@ #include #include #include +#include #define _Q_PENDING_LOOPS (1 << 9) +#define queued_fetch_set_pending_acquire queued_fetch_set_pending_acquire + +static __always_inline bool __queued_RMW_btsl(struct qspinlock *lock) +{ + GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, + "I", _Q_PENDING_OFFSET, "%0", c); +} + +static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock) +{ + u32 val = 0; + + if (__queued_RMW_btsl(lock)) + val |= _Q_PENDING_VAL; + + val |= atomic_read(&lock->val) & ~_Q_PENDING_MASK; + + return val; +} + #define queued_spin_unlock queued_spin_unlock /** * queued_spin_unlock - release a queued spinlock diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 9ffc2f9af8b8..1011a1b292ac 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -225,6 +225,20 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) } #endif /* _Q_PENDING_BITS == 8 */ +/** + * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending + * @lock : Pointer to queued spinlock structure + * Return: The previous lock value + * + * *,*,* -> *,1,* + */ +#ifndef queued_fetch_set_pending_acquire +static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock) +{ + return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); +} +#endif + /** * set_locked - Set the lock bit and own the lock * @lock: Pointer to queued spinlock structure @@ -323,7 +337,8 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) * 0,0,0 -> 0,0,1 ; trylock * 0,0,1 -> 0,1,1 ; pending */ - val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); + val = queued_fetch_set_pending_acquire(lock); + /* * If we observe any contention; undo and queue. */ From 2a35d21a4d12fe774c46d17831e2938781179c83 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 25 Oct 2017 12:33:42 -0600 Subject: [PATCH 1095/2608] elevator: lookup mq vs non-mq elevators [ Upstream commit 2527d99789e248576ac8081530cd4fd88730f8c7 ] If an IO scheduler is selected via elevator= and it doesn't match the driver in question wrt blk-mq support, then we fail to boot. The elevator= parameter is deprecated and only supported for non-mq devices. Augment the elevator lookup API so that we pass in if we're looking for an mq capable scheduler or not, so that we only ever return a valid type for the queue in question. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=196695 Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/elevator.c | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index 153926a90901..8320d97240be 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -83,12 +83,15 @@ bool elv_bio_merge_ok(struct request *rq, struct bio *bio) } EXPORT_SYMBOL(elv_bio_merge_ok); -static struct elevator_type *elevator_find(const char *name) +/* + * Return scheduler with name 'name' and with matching 'mq capability + */ +static struct elevator_type *elevator_find(const char *name, bool mq) { struct elevator_type *e; list_for_each_entry(e, &elv_list, list) { - if (!strcmp(e->elevator_name, name)) + if (!strcmp(e->elevator_name, name) && (mq == e->uses_mq)) return e; } @@ -100,25 +103,25 @@ static void elevator_put(struct elevator_type *e) module_put(e->elevator_owner); } -static struct elevator_type *elevator_get(const char *name, bool try_loading) +static struct elevator_type *elevator_get(struct request_queue *q, + const char *name, bool try_loading) { struct elevator_type *e; spin_lock(&elv_list_lock); - e = elevator_find(name); + e = elevator_find(name, q->mq_ops != NULL); if (!e && try_loading) { spin_unlock(&elv_list_lock); request_module("%s-iosched", name); spin_lock(&elv_list_lock); - e = elevator_find(name); + e = elevator_find(name, q->mq_ops != NULL); } if (e && !try_module_get(e->elevator_owner)) e = NULL; spin_unlock(&elv_list_lock); - return e; } @@ -144,8 +147,12 @@ void __init load_default_elevator_module(void) if (!chosen_elevator[0]) return; + /* + * Boot parameter is deprecated, we haven't supported that for MQ. + * Only look for non-mq schedulers from here. + */ spin_lock(&elv_list_lock); - e = elevator_find(chosen_elevator); + e = elevator_find(chosen_elevator, false); spin_unlock(&elv_list_lock); if (!e) @@ -202,7 +209,7 @@ int elevator_init(struct request_queue *q, char *name) q->boundary_rq = NULL; if (name) { - e = elevator_get(name, true); + e = elevator_get(q, name, true); if (!e) return -EINVAL; } @@ -214,7 +221,7 @@ int elevator_init(struct request_queue *q, char *name) * allowed from async. */ if (!e && !q->mq_ops && *chosen_elevator) { - e = elevator_get(chosen_elevator, false); + e = elevator_get(q, chosen_elevator, false); if (!e) printk(KERN_ERR "I/O scheduler %s not found\n", chosen_elevator); @@ -229,17 +236,17 @@ int elevator_init(struct request_queue *q, char *name) */ if (q->mq_ops) { if (q->nr_hw_queues == 1) - e = elevator_get("mq-deadline", false); + e = elevator_get(q, "mq-deadline", false); if (!e) return 0; } else - e = elevator_get(CONFIG_DEFAULT_IOSCHED, false); + e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false); if (!e) { printk(KERN_ERR "Default I/O scheduler not found. " \ "Using noop.\n"); - e = elevator_get("noop", false); + e = elevator_get(q, "noop", false); } } @@ -905,7 +912,7 @@ int elv_register(struct elevator_type *e) /* register, don't allow duplicate names */ spin_lock(&elv_list_lock); - if (elevator_find(e->elevator_name)) { + if (elevator_find(e->elevator_name, e->uses_mq)) { spin_unlock(&elv_list_lock); if (e->icq_cache) kmem_cache_destroy(e->icq_cache); @@ -1066,7 +1073,7 @@ static int __elevator_change(struct request_queue *q, const char *name) return elevator_switch(q, NULL); strlcpy(elevator_name, name, sizeof(elevator_name)); - e = elevator_get(strstrip(elevator_name), true); + e = elevator_get(q, strstrip(elevator_name), true); if (!e) return -EINVAL; @@ -1076,15 +1083,6 @@ static int __elevator_change(struct request_queue *q, const char *name) return 0; } - if (!e->uses_mq && q->mq_ops) { - elevator_put(e); - return -EINVAL; - } - if (e->uses_mq && !q->mq_ops) { - elevator_put(e); - return -EINVAL; - } - return elevator_switch(q, e); } From 5515c5bd3f560e89222cce52ea2f71c71687a2e8 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 12 Jan 2018 11:16:50 +0100 Subject: [PATCH 1096/2608] netfilter: ipset: Fix wraparound in hash:*net* types [ Upstream commit 0b8d9073539e217f79ec1bff65eb205ac796723d ] Fix wraparound bug which could lead to memory exhaustion when adding an x.x.x.x-255.255.255.255 range to any hash:*net* types. Fixes Netfilter's bugzilla id #1212, reported by Thomas Schwark. Fixes: 48596a8ddc46 ("netfilter: ipset: Fix adding an IPv4 range containing more than 2^31 addresses") Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_hash_ipportnet.c | 26 +++++++-------- net/netfilter/ipset/ip_set_hash_net.c | 9 +++-- net/netfilter/ipset/ip_set_hash_netiface.c | 9 +++-- net/netfilter/ipset/ip_set_hash_netnet.c | 28 ++++++++-------- net/netfilter/ipset/ip_set_hash_netport.c | 19 ++++++----- net/netfilter/ipset/ip_set_hash_netportnet.c | 35 ++++++++++---------- 6 files changed, 63 insertions(+), 63 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index a2f19b9906e9..543518384aa7 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -168,7 +168,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; + u32 ip2_from = 0, ip2_to = 0, ip2; bool with_ports = false; u8 cidr; int ret; @@ -269,22 +269,21 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1); } - if (retried) + if (retried) { ip = ntohl(h->next.ip); + p = ntohs(h->next.port); + ip2 = ntohl(h->next.ip2); + } else { + p = port; + ip2 = ip2_from; + } for (; ip <= ip_to; ip++) { e.ip = htonl(ip); - p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) - : port; for (; p <= port_to; p++) { e.port = htons(p); - ip2 = retried && - ip == ntohl(h->next.ip) && - p == ntohs(h->next.port) - ? ntohl(h->next.ip2) : ip2_from; - while (ip2 <= ip2_to) { + do { e.ip2 = htonl(ip2); - ip2_last = ip_set_range_to_cidr(ip2, ip2_to, - &cidr); + ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr); e.cidr = cidr - 1; ret = adtfn(set, &e, &ext, &ext, flags); @@ -292,9 +291,10 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; ret = 0; - ip2 = ip2_last + 1; - } + } while (ip2++ < ip2_to); + ip2 = ip2_from; } + p = port; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 1c67a1761e45..5449e23af13a 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -143,7 +143,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, last; + u32 ip = 0, ip_to = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -193,16 +193,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], } if (retried) ip = ntohl(h->next.ip); - while (ip <= ip_to) { + do { e.ip = htonl(ip); - last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; - ip = last + 1; - } + } while (ip++ < ip_to); return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index d417074f1c1a..f5164c1efce2 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -200,7 +200,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, last; + u32 ip = 0, ip_to = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -255,17 +255,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - while (ip <= ip_to) { + do { e.ip = htonl(ip); - last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; - ip = last + 1; - } + } while (ip++ < ip_to); return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 7f9ae2e9645b..5a2b923bd81f 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -169,8 +169,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, last; - u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2; + u32 ip = 0, ip_to = 0; + u32 ip2 = 0, ip2_from = 0, ip2_to = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -247,27 +247,27 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } - if (retried) + if (retried) { ip = ntohl(h->next.ip[0]); + ip2 = ntohl(h->next.ip[1]); + } else { + ip2 = ip2_from; + } - while (ip <= ip_to) { + do { e.ip[0] = htonl(ip); - last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); - ip2 = (retried && - ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1]) - : ip2_from; - while (ip2 <= ip2_to) { + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); + do { e.ip[1] = htonl(ip2); - last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); + ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; - ip2 = last2 + 1; - } - ip = last + 1; - } + } while (ip2++ < ip2_to); + ip2 = ip2_from; + } while (ip++ < ip_to); return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index e6ef382febe4..1a187be9ebc8 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -161,7 +161,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 port, port_to, p = 0, ip = 0, ip_to = 0, last; + u32 port, port_to, p = 0, ip = 0, ip_to = 0; bool with_ports = false; u8 cidr; int ret; @@ -239,25 +239,26 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], ip_set_mask_from_to(ip, ip_to, e.cidr + 1); } - if (retried) + if (retried) { ip = ntohl(h->next.ip); - while (ip <= ip_to) { + p = ntohs(h->next.port); + } else { + p = port; + } + do { e.ip = htonl(ip); - last = ip_set_range_to_cidr(ip, ip_to, &cidr); + ip = ip_set_range_to_cidr(ip, ip_to, &cidr); e.cidr = cidr - 1; - p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) - : port; for (; p <= port_to; p++) { e.port = htons(p); ret = adtfn(set, &e, &ext, &ext, flags); - if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } - ip = last + 1; - } + p = port; + } while (ip++ < ip_to); return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 0e6e40c6f652..613e18e720a4 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -184,8 +184,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; + u32 ip = 0, ip_to = 0, p = 0, port, port_to; + u32 ip2_from = 0, ip2_to = 0, ip2; bool with_ports = false; int ret; @@ -288,33 +288,34 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } - if (retried) + if (retried) { ip = ntohl(h->next.ip[0]); + p = ntohs(h->next.port); + ip2 = ntohl(h->next.ip[1]); + } else { + p = port; + ip2 = ip2_from; + } - while (ip <= ip_to) { + do { e.ip[0] = htonl(ip); - ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); - p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port) - : port; + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); for (; p <= port_to; p++) { e.port = htons(p); - ip2 = (retried && ip == ntohl(h->next.ip[0]) && - p == ntohs(h->next.port)) ? ntohl(h->next.ip[1]) - : ip2_from; - while (ip2 <= ip2_to) { + do { e.ip[1] = htonl(ip2); - ip2_last = ip_set_range_to_cidr(ip2, ip2_to, - &e.cidr[1]); + ip2 = ip_set_range_to_cidr(ip2, ip2_to, + &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; - ip2 = ip2_last + 1; - } + } while (ip2++ < ip2_to); + ip2 = ip2_from; } - ip = ip_last + 1; - } + p = port; + } while (ip++ < ip_to); return ret; } From 603bd4dc73594c8ac39a578d9fae997ea04e3a17 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 26 Mar 2018 16:21:04 +0300 Subject: [PATCH 1097/2608] mac80211: don't WARN on bad WMM parameters from buggy APs [ Upstream commit c470bdc1aaf36669e04ba65faf1092b2d1c6cabe ] Apparently, some APs are buggy enough to send a zeroed WMM IE. Don't WARN on this since this is not caused by a bug on the client's system. This aligns the condition of the WARNING in drv_conf_tx with the validity check in ieee80211_sta_wmm_params. We will now pick the default values whenever we get a zeroed WMM IE. This has been reported here: https://bugzilla.kernel.org/show_bug.cgi?id=199161 Fixes: f409079bb678 ("mac80211: sanity check CW_min/CW_max towards driver") Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 328ac10084e4..75909a744121 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1861,7 +1861,8 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, params[ac].acm = acm; params[ac].uapsd = uapsd; - if (params[ac].cw_min > params[ac].cw_max) { + if (params->cw_min == 0 || + params[ac].cw_min > params[ac].cw_max) { sdata_info(sdata, "AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n", params[ac].cw_min, params[ac].cw_max, aci); From 8f8a5a9be2b422af6a7ea12a8fe2bdefdab754c5 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Tue, 3 Apr 2018 11:35:22 +0300 Subject: [PATCH 1098/2608] mac80211: Fix condition validating WMM IE [ Upstream commit 911a26484c33e10de6237228ca1d7293548e9f49 ] Commit c470bdc1aaf3 ("mac80211: don't WARN on bad WMM parameters from buggy APs") handled cases where an AP reports a zeroed WMM IE. However, the condition that checks the validity accessed the wrong index in the ieee80211_tx_queue_params array, thus wrongly deducing that the parameters are invalid. Fix it. Fixes: c470bdc1aaf3 ("mac80211: don't WARN on bad WMM parameters from buggy APs") Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 75909a744121..4c59b5507e7a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1861,7 +1861,7 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, params[ac].acm = acm; params[ac].uapsd = uapsd; - if (params->cw_min == 0 || + if (params[ac].cw_min == 0 || params[ac].cw_min > params[ac].cw_max) { sdata_info(sdata, "AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n", From 12f75e8ad273df5b4e0bdab97d55cc23be1a0d7f Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 18 Dec 2018 16:04:18 -0500 Subject: [PATCH 1099/2608] IB/hfi1: Remove race conditions in user_sdma send path commit 28a9a9e83ceae2cee25b9af9ad20d53aaa9ab951 upstream Packet queue state is over used to determine SDMA descriptor availablitity and packet queue request state. cpu 0 ret = user_sdma_send_pkts(req, pcount); cpu 0 if (atomic_read(&pq->n_reqs)) cpu 1 IRQ user_sdma_txreq_cb calls pq_update() (state to _INACTIVE) cpu 0 xchg(&pq->state, SDMA_PKT_Q_ACTIVE); At this point pq->n_reqs == 0 and pq->state is incorrectly SDMA_PKT_Q_ACTIVE. The close path will hang waiting for the state to return to _INACTIVE. This can also change the state from _DEFERRED to _ACTIVE. However, this is a mostly benign race. Remove the racy code path. Use n_reqs to determine if a packet queue is active or not. Cc: # 4.14.0> Reviewed-by: Mitko Haralanov Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/user_sdma.c | 24 ++++++++++-------------- drivers/infiniband/hw/hfi1/user_sdma.h | 9 +++++---- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index c14ec04f2a89..cbe5ab26d95b 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -187,7 +187,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, pq->ctxt = uctxt->ctxt; pq->subctxt = fd->subctxt; pq->n_max_reqs = hfi1_sdma_comp_ring_size; - pq->state = SDMA_PKT_Q_INACTIVE; atomic_set(&pq->n_reqs, 0); init_waitqueue_head(&pq->wait); atomic_set(&pq->n_locked, 0); @@ -276,7 +275,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, /* Wait until all requests have been freed. */ wait_event_interruptible( pq->wait, - (ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE)); + !atomic_read(&pq->n_reqs)); kfree(pq->reqs); kfree(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); @@ -312,6 +311,13 @@ static u8 dlid_to_selector(u16 dlid) return mapping[hash]; } +/** + * hfi1_user_sdma_process_request() - Process and start a user sdma request + * @fd: valid file descriptor + * @iovec: array of io vectors to process + * @dim: overall iovec array size + * @count: number of io vector array entries processed + */ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, struct iovec *iovec, unsigned long dim, unsigned long *count) @@ -560,20 +566,12 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->ahg_idx = sdma_ahg_alloc(req->sde); set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); + pq->state = SDMA_PKT_Q_ACTIVE; /* Send the first N packets in the request to buy us some time */ ret = user_sdma_send_pkts(req, pcount); if (unlikely(ret < 0 && ret != -EBUSY)) goto free_req; - /* - * It is possible that the SDMA engine would have processed all the - * submitted packets by the time we get here. Therefore, only set - * packet queue state to ACTIVE if there are still uncompleted - * requests. - */ - if (atomic_read(&pq->n_reqs)) - xchg(&pq->state, SDMA_PKT_Q_ACTIVE); - /* * This is a somewhat blocking send implementation. * The driver will block the caller until all packets of the @@ -1391,10 +1389,8 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq) { - if (atomic_dec_and_test(&pq->n_reqs)) { - xchg(&pq->state, SDMA_PKT_Q_INACTIVE); + if (atomic_dec_and_test(&pq->n_reqs)) wake_up(&pq->wait); - } } static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 5af52334b7dc..2b5326d6db53 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -94,9 +94,10 @@ #define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ -#define SDMA_PKT_Q_INACTIVE BIT(0) -#define SDMA_PKT_Q_ACTIVE BIT(1) -#define SDMA_PKT_Q_DEFERRED BIT(2) +enum pkt_q_sdma_state { + SDMA_PKT_Q_ACTIVE, + SDMA_PKT_Q_DEFERRED, +}; /* * Maximum retry attempts to submit a TX request @@ -124,7 +125,7 @@ struct hfi1_user_sdma_pkt_q { struct user_sdma_request *reqs; unsigned long *req_in_use; struct iowait busy; - unsigned state; + enum pkt_q_sdma_state state; wait_queue_head_t wait; unsigned long unpinned; struct mmu_rb_handler *handler; From 1f972505011c27d09423a46707789192a039b66a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 21 Jun 2018 20:35:26 -0400 Subject: [PATCH 1100/2608] locking/qspinlock: Fix build for anonymous union in older GCC compilers [ Upstream commit 6cc65be4f6f2a7186af8f3e09900787c7912dad2 ] One of my tests compiles the kernel with gcc 4.5.3, and I hit the following build error: include/linux/semaphore.h: In function 'sema_init': include/linux/semaphore.h:35:17: error: unknown field 'val' specified in initializer include/linux/semaphore.h:35:17: warning: missing braces around initializer include/linux/semaphore.h:35:17: warning: (near initialization for '(anonymous).raw_lock..val') I bisected it down to: 625e88be1f41 ("locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock'") ... which makes qspinlock have an anonymous union, which makes initializing it special for older compilers. By adding strategic brackets, it makes the build happy again. Signed-off-by: Steven Rostedt (VMware) Acked-by: Waiman Long Cc: Andrew Morton Cc: Boqun Feng Cc: Linus Torvalds Cc: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Fixes: 625e88be1f41 ("locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock'") Link: http://lkml.kernel.org/r/20180621203526.172ab5c4@vmware.local.home Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- include/asm-generic/qspinlock_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h index 0763f065b975..d10f1e7d6ba8 100644 --- a/include/asm-generic/qspinlock_types.h +++ b/include/asm-generic/qspinlock_types.h @@ -63,7 +63,7 @@ typedef struct qspinlock { /* * Initializier */ -#define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } +#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = ATOMIC_INIT(0) } } /* * Bitfields in the atomic value: From 0d2d1629780025966c3fc793c201e387da18f5be Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 5 Oct 2018 23:22:06 +0300 Subject: [PATCH 1101/2608] mac80211_hwsim: fix module init error paths for netlink [ Upstream commit 05cc09de4c017663a217630682041066f2f9a5cd ] There is no unregister netlink notifier and family on error paths in init_mac80211_hwsim(). Also there is an error path where hwsim_class is not destroyed. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Fixes: 62759361eb49 ("mac80211-hwsim: Provide multicast event for HWSIM_CMD_NEW_RADIO") Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/mac80211_hwsim.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 670224be3c8b..8f57ca969c9f 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3472,16 +3472,16 @@ static int __init init_mac80211_hwsim(void) if (err) goto out_unregister_pernet; + err = hwsim_init_netlink(); + if (err) + goto out_unregister_driver; + hwsim_class = class_create(THIS_MODULE, "mac80211_hwsim"); if (IS_ERR(hwsim_class)) { err = PTR_ERR(hwsim_class); - goto out_unregister_driver; + goto out_exit_netlink; } - err = hwsim_init_netlink(); - if (err < 0) - goto out_unregister_driver; - for (i = 0; i < radios; i++) { struct hwsim_new_radio_params param = { 0 }; @@ -3587,6 +3587,8 @@ out_free_mon: free_netdev(hwsim_mon); out_free_radios: mac80211_hwsim_free(); +out_exit_netlink: + hwsim_exit_netlink(); out_unregister_driver: platform_driver_unregister(&mac80211_hwsim_driver); out_unregister_pernet: From ce28c745aafcd5ae6a5a85731c8585e1cf0e3e2e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 15 Nov 2018 11:05:10 -0800 Subject: [PATCH 1102/2608] Input: hyper-v - fix wakeup from suspend-to-idle [ Upstream commit 10f91c73cc41ceead210a905dbd196398e99c7d2 ] It makes little sense but still possible to put Hyper-V guests into suspend-to-idle state. To wake them up two wakeup sources were registered in the past: hyperv-keyboard and hid-hyperv. However, since commit eed4d47efe95 ("ACPI / sleep: Ignore spurious SCI wakeups from suspend-to-idle") pm_wakeup_event() from these devices is ignored. Switch to pm_wakeup_hard_event() API as these devices are actually the only possible way to wakeup Hyper-V guests. Fixes: eed4d47efe95 (ACPI / sleep: Ignore spurious SCI wakeups from suspend-to-idle) Reviewed-by: Rafael J. Wysocki Acked-by: K. Y. Srinivasan Acked-by: Jiri Kosina Signed-off-by: Vitaly Kuznetsov Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/hid/hid-hyperv.c | 2 +- drivers/input/serio/hyperv-keyboard.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c index 6039f071fab1..5f1de24206ab 100644 --- a/drivers/hid/hid-hyperv.c +++ b/drivers/hid/hid-hyperv.c @@ -309,7 +309,7 @@ static void mousevsc_on_receive(struct hv_device *device, hid_input_report(input_dev->hid_device, HID_INPUT_REPORT, input_dev->input_buf, len, 1); - pm_wakeup_event(&input_dev->device->device, 0); + pm_wakeup_hard_event(&input_dev->device->device); break; default: diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c index 25151d9214e0..55288a026e4e 100644 --- a/drivers/input/serio/hyperv-keyboard.c +++ b/drivers/input/serio/hyperv-keyboard.c @@ -177,7 +177,7 @@ static void hv_kbd_on_receive(struct hv_device *hv_dev, * state because the Enter-UP can trigger a wakeup at once. */ if (!(info & IS_BREAK)) - pm_wakeup_event(&hv_dev->device, 0); + pm_wakeup_hard_event(&hv_dev->device); break; From 1e30cdb191150be718bb01d0eb21c790ac24a1c3 Mon Sep 17 00:00:00 2001 From: Fred Herard Date: Tue, 20 Nov 2018 20:22:45 -0500 Subject: [PATCH 1103/2608] scsi: libiscsi: Fix NULL pointer dereference in iscsi_eh_session_reset [ Upstream commit 5db6dd14b31397e8cccaaddab2ff44ebec1acf25 ] This commit addresses NULL pointer dereference in iscsi_eh_session_reset. Reference should not be made to session->leadconn when session->state is set to ISCSI_STATE_TERMINATE. Signed-off-by: Fred Herard Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libiscsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index cf8a15e54d83..3ff536b350a1 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -2416,8 +2416,8 @@ int iscsi_eh_session_reset(struct scsi_cmnd *sc) failed: ISCSI_DBG_EH(session, "failing session reset: Could not log back into " - "%s, %s [age %d]\n", session->targetname, - conn->persistent_address, session->age); + "%s [age %d]\n", session->targetname, + session->age); spin_unlock_bh(&session->frwd_lock); mutex_unlock(&session->eh_mutex); return FAILED; From 3a2c2aae1bca2e176bcc93f02ce4cc8fe8bda326 Mon Sep 17 00:00:00 2001 From: Cathy Avery Date: Tue, 27 Nov 2018 14:28:53 -0500 Subject: [PATCH 1104/2608] scsi: vmw_pscsi: Rearrange code to avoid multiple calls to free_irq during unload [ Upstream commit 02f425f811cefcc4d325d7a72272651e622dc97e ] Currently pvscsi_remove calls free_irq more than once as pvscsi_release_resources and __pvscsi_shutdown both call pvscsi_shutdown_intr. This results in a 'Trying to free already-free IRQ' warning and stack trace. To solve the problem pvscsi_shutdown_intr has been moved out of pvscsi_release_resources. Signed-off-by: Cathy Avery Reviewed-by: Ewan D. Milne Reviewed-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/vmw_pvscsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index 0cd947f78b5b..890b8aaf95e1 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -1202,8 +1202,6 @@ static void pvscsi_shutdown_intr(struct pvscsi_adapter *adapter) static void pvscsi_release_resources(struct pvscsi_adapter *adapter) { - pvscsi_shutdown_intr(adapter); - if (adapter->workqueue) destroy_workqueue(adapter->workqueue); @@ -1535,6 +1533,7 @@ static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id) out_reset_adapter: ll_adapter_reset(adapter); out_release_resources: + pvscsi_shutdown_intr(adapter); pvscsi_release_resources(adapter); scsi_host_put(host); out_disable_device: @@ -1543,6 +1542,7 @@ out_disable_device: return error; out_release_resources_and_disable: + pvscsi_shutdown_intr(adapter); pvscsi_release_resources(adapter); goto out_disable_device; } From 81710cedad2fa7ea3221cdf77bcb1d39683f07ac Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Thu, 29 Nov 2018 18:12:30 +0100 Subject: [PATCH 1105/2608] x86/earlyprintk/efi: Fix infinite loop on some screen widths [ Upstream commit 79c2206d369b87b19ac29cb47601059b6bf5c291 ] An affected screen resolution is 1366 x 768, which width is not divisible by 8, the default font width. On such screens, when longer lines are earlyprintk'ed, overflow-to-next-line can never trigger, due to the left-most x-coordinate of the next character always less than the screen width. Earlyprintk will infinite loop in trying to print the rest of the string but unable to, due to the line being full. This patch makes the trigger consider the right-most x-coordinate, instead of left-most, as the value to compare against the screen width threshold. Signed-off-by: YiFei Zhu Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Arend van Spriel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Snowberg Cc: Hans de Goede Cc: Joe Perches Cc: Jon Hunter Cc: Julien Thierry Cc: Linus Torvalds Cc: Marc Zyngier Cc: Matt Fleming Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Sedat Dilek Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-12-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/platform/efi/early_printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c index 5fdacb322ceb..c3e6be110b7d 100644 --- a/arch/x86/platform/efi/early_printk.c +++ b/arch/x86/platform/efi/early_printk.c @@ -179,7 +179,7 @@ early_efi_write(struct console *con, const char *str, unsigned int num) num--; } - if (efi_x >= si->lfb_width) { + if (efi_x + font->width > si->lfb_width) { efi_x = 0; efi_y += font->height; } From 5094dea5acab748453e75fcb53db6fe9ce629ab7 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Wed, 3 Oct 2018 16:22:31 -0400 Subject: [PATCH 1106/2608] drm/msm: Grab a vblank reference when waiting for commit_done [ Upstream commit 3b712e43e3876b42b38321ecf790a1f5fe59c834 ] Similar to the atomic helpers, we should enable vblank while we're waiting for the commit to finish. DPU needs this, MDP5 seems to work fine without it. Reviewed-by: Abhinav Kumar Signed-off-by: Sean Paul Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/msm_atomic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c index 025d454163b0..8f77047a226d 100644 --- a/drivers/gpu/drm/msm/msm_atomic.c +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -93,7 +93,12 @@ static void msm_atomic_wait_for_commit_done(struct drm_device *dev, if (!new_crtc_state->active) continue; + if (drm_crtc_vblank_get(crtc)) + continue; + kms->funcs->wait_for_crtc_commit_done(kms, crtc); + + drm_crtc_vblank_put(crtc); } } From 32f91e02d0e93535b914bb8ed7111e5004dfdecd Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Fri, 30 Nov 2018 09:47:31 +0000 Subject: [PATCH 1107/2608] ARC: io.h: Implement reads{x}()/writes{x}() [ Upstream commit 10d443431dc2bb733cf7add99b453e3fb9047a2e ] Some ARC CPU's do not support unaligned loads/stores. Currently, generic implementation of reads{b/w/l}()/writes{b/w/l}() is being used with ARC. This can lead to misfunction of some drivers as generic functions do a plain dereference of a pointer that can be unaligned. Let's use {get/put}_unaligned() helpers instead of plain dereference of pointer in order to fix. The helpers allow to get and store data from an unaligned address whilst preserving the CPU internal alignment. According to [1], the use of these helpers are costly in terms of performance so we added an initial check for a buffer already aligned so that the usage of the helpers can be avoided, when possible. [1] Documentation/unaligned-memory-access.txt Cc: Alexey Brodkin Cc: Joao Pinto Cc: David Laight Tested-by: Vitor Soares Signed-off-by: Jose Abreu Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/include/asm/io.h | 72 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index c22b181e8206..2f39d9b3886e 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_ISA_ARCV2 #include @@ -94,6 +95,42 @@ static inline u32 __raw_readl(const volatile void __iomem *addr) return w; } +/* + * {read,write}s{b,w,l}() repeatedly access the same IO address in + * native endianness in 8-, 16-, 32-bit chunks {into,from} memory, + * @count times + */ +#define __raw_readsx(t,f) \ +static inline void __raw_reads##f(const volatile void __iomem *addr, \ + void *ptr, unsigned int count) \ +{ \ + bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0; \ + u##t *buf = ptr; \ + \ + if (!count) \ + return; \ + \ + /* Some ARC CPU's don't support unaligned accesses */ \ + if (is_aligned) { \ + do { \ + u##t x = __raw_read##f(addr); \ + *buf++ = x; \ + } while (--count); \ + } else { \ + do { \ + u##t x = __raw_read##f(addr); \ + put_unaligned(x, buf++); \ + } while (--count); \ + } \ +} + +#define __raw_readsb __raw_readsb +__raw_readsx(8, b) +#define __raw_readsw __raw_readsw +__raw_readsx(16, w) +#define __raw_readsl __raw_readsl +__raw_readsx(32, l) + #define __raw_writeb __raw_writeb static inline void __raw_writeb(u8 b, volatile void __iomem *addr) { @@ -126,6 +163,35 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr) } +#define __raw_writesx(t,f) \ +static inline void __raw_writes##f(volatile void __iomem *addr, \ + const void *ptr, unsigned int count) \ +{ \ + bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0; \ + const u##t *buf = ptr; \ + \ + if (!count) \ + return; \ + \ + /* Some ARC CPU's don't support unaligned accesses */ \ + if (is_aligned) { \ + do { \ + __raw_write##f(*buf++, addr); \ + } while (--count); \ + } else { \ + do { \ + __raw_write##f(get_unaligned(buf++), addr); \ + } while (--count); \ + } \ +} + +#define __raw_writesb __raw_writesb +__raw_writesx(8, b) +#define __raw_writesw __raw_writesw +__raw_writesx(16, w) +#define __raw_writesl __raw_writesl +__raw_writesx(32, l) + /* * MMIO can also get buffered/optimized in micro-arch, so barriers needed * Based on ARM model for the typical use case @@ -141,10 +207,16 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr) #define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; }) #define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; }) #define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; }) +#define readsb(p,d,l) ({ __raw_readsb(p,d,l); __iormb(); }) +#define readsw(p,d,l) ({ __raw_readsw(p,d,l); __iormb(); }) +#define readsl(p,d,l) ({ __raw_readsl(p,d,l); __iormb(); }) #define writeb(v,c) ({ __iowmb(); writeb_relaxed(v,c); }) #define writew(v,c) ({ __iowmb(); writew_relaxed(v,c); }) #define writel(v,c) ({ __iowmb(); writel_relaxed(v,c); }) +#define writesb(p,d,l) ({ __iowmb(); __raw_writesb(p,d,l); }) +#define writesw(p,d,l) ({ __iowmb(); __raw_writesw(p,d,l); }) +#define writesl(p,d,l) ({ __iowmb(); __raw_writesl(p,d,l); }) /* * Relaxed API for drivers which can handle barrier ordering themselves From 87c1a07b0e7f7127aa4be68864ef9cf6112e0535 Mon Sep 17 00:00:00 2001 From: Toni Peltonen Date: Tue, 27 Nov 2018 16:56:57 +0200 Subject: [PATCH 1108/2608] bonding: fix 802.3ad state sent to partner when unbinding slave [ Upstream commit 3b5b3a3331d141e8f2a7aaae3a94dfa1e61ecbe4 ] Previously when unbinding a slave the 802.3ad implementation only told partner that the port is not suitable for aggregation by setting the port aggregation state from aggregatable to individual. This is not enough. If the physical layer still stays up and we only unbinded this port from the bond there is nothing in the aggregation status alone to prevent the partner from sending traffic towards us. To ensure that the partner doesn't consider this port at all anymore we should also disable collecting and distributing to signal that this actor is going away. Also clear AD_STATE_SYNCHRONIZATION to ensure partner exits collecting + distributing state. I have tested this behaviour againts Arista EOS switches with mlx5 cards (physical link stays up even when interface is down) and simulated the same situation virtually Linux <-> Linux with two network namespaces running two veth device pairs. In both cases setting aggregation to individual doesn't alone prevent traffic from being to sent towards this port given that the link stays up in partners end. Partner still keeps it's end in collecting + distributing state and continues until timeout is reached. In most cases this means we are losing the traffic partner sends towards our port while we wait for timeout. This is most visible with slow periodic time (LACP rate slow). Other open source implementations like Open VSwitch and libreswitch, and vendor implementations like Arista EOS, seem to disable collecting + distributing to when doing similar port disabling/detaching/removing change. With this patch kernel implementation would behave the same way and ensure partner doesn't consider our actor viable anymore. Signed-off-by: Toni Peltonen Signed-off-by: Jay Vosburgh Acked-by: Jonathan Toppins Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_3ad.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index f43fb2f958a5..93dfcef8afc4 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2086,6 +2086,9 @@ void bond_3ad_unbind_slave(struct slave *slave) aggregator->aggregator_identifier); /* Tell the partner that this port is not suitable for aggregation */ + port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; + port->actor_oper_port_state &= ~AD_STATE_COLLECTING; + port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; port->actor_oper_port_state &= ~AD_STATE_AGGREGATION; __update_lacpdu_from_port(port); ad_lacpdu_send(port); From 27be23532ed8cdc4dbe872b9b3b16887d3252e17 Mon Sep 17 00:00:00 2001 From: David Miller Date: Wed, 28 Nov 2018 22:33:53 -0800 Subject: [PATCH 1109/2608] bpf: Fix verifier log string check for bad alignment. [ Upstream commit c01ac66b38660f2b507ccd0b75d28e3002d56fbb ] The message got changed a lot time ago. This was responsible for 36 test case failures on sparc64. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/test_verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 041dbbb30ff0..a0591d06c61b 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -8070,7 +8070,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, reject_from_alignment = fd_prog < 0 && (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) && - strstr(bpf_vlog, "Unknown alignment."); + strstr(bpf_vlog, "misaligned"); #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS if (reject_from_alignment) { printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n", From 76413fbfc7c1193b9b5b2a0fde8a90003adb32d3 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Tue, 27 Nov 2018 19:31:30 +0000 Subject: [PATCH 1110/2608] nfs: don't dirty kernel pages read by direct-io [ Upstream commit ad3cba223ac02dc769c3bbe88efe277bbb457566 ] When we use direct_IO with an NFS backing store, we can trigger a WARNING in __set_page_dirty(), as below, since we're dirtying the page unnecessarily in nfs_direct_read_completion(). To fix, replicate the logic in commit 53cbf3b157a0 ("fs: direct-io: don't dirtying pages for ITER_BVEC/ITER_KVEC direct read"). Other filesystems that implement direct_IO handle this; most use blockdev_direct_IO(). ceph and cifs have similar logic. mount 127.0.0.1:/export /nfs dd if=/dev/zero of=/nfs/image bs=1M count=200 losetup --direct-io=on -f /nfs/image mkfs.btrfs /dev/loop0 mount -t btrfs /dev/loop0 /mnt/ kernel: WARNING: CPU: 0 PID: 8067 at fs/buffer.c:580 __set_page_dirty+0xaf/0xd0 kernel: Modules linked in: loop(E) nfsv3(E) rpcsec_gss_krb5(E) nfsv4(E) dns_resolver(E) nfs(E) fscache(E) nfsd(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) fuse(E) tun(E) ip6t_rpfilter(E) ipt_REJECT(E) nf_ kernel: snd_seq(E) snd_seq_device(E) snd_pcm(E) video(E) snd_timer(E) snd(E) soundcore(E) ip_tables(E) xfs(E) libcrc32c(E) sd_mod(E) sr_mod(E) cdrom(E) ata_generic(E) pata_acpi(E) crc32c_intel(E) ahci(E) li kernel: CPU: 0 PID: 8067 Comm: kworker/0:2 Tainted: G E 4.20.0-rc1.master.20181111.ol7.x86_64 #1 kernel: Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 kernel: Workqueue: nfsiod rpc_async_release [sunrpc] kernel: RIP: 0010:__set_page_dirty+0xaf/0xd0 kernel: Code: c3 48 8b 02 f6 c4 04 74 d4 48 89 df e8 ba 05 f7 ff 48 89 c6 eb cb 48 8b 43 08 a8 01 75 1f 48 89 d8 48 8b 00 a8 04 74 02 eb 87 <0f> 0b eb 83 48 83 e8 01 eb 9f 48 83 ea 01 0f 1f 00 eb 8b 48 83 e8 kernel: RSP: 0000:ffffc1c8825b7d78 EFLAGS: 00013046 kernel: RAX: 000fffffc0020089 RBX: fffff2b603308b80 RCX: 0000000000000001 kernel: RDX: 0000000000000001 RSI: ffff9d11478115c8 RDI: ffff9d11478115d0 kernel: RBP: ffffc1c8825b7da0 R08: 0000646f6973666e R09: 8080808080808080 kernel: R10: 0000000000000001 R11: 0000000000000000 R12: ffff9d11478115d0 kernel: R13: ffff9d11478115c8 R14: 0000000000003246 R15: 0000000000000001 kernel: FS: 0000000000000000(0000) GS:ffff9d115ba00000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 00007f408686f640 CR3: 0000000104d8e004 CR4: 00000000000606f0 kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 kernel: Call Trace: kernel: __set_page_dirty_buffers+0xb6/0x110 kernel: set_page_dirty+0x52/0xb0 kernel: nfs_direct_read_completion+0xc4/0x120 [nfs] kernel: nfs_pgio_release+0x10/0x20 [nfs] kernel: rpc_free_task+0x30/0x70 [sunrpc] kernel: rpc_async_release+0x12/0x20 [sunrpc] kernel: process_one_work+0x174/0x390 kernel: worker_thread+0x4f/0x3e0 kernel: kthread+0x102/0x140 kernel: ? drain_workqueue+0x130/0x130 kernel: ? kthread_stop+0x110/0x110 kernel: ret_from_fork+0x35/0x40 kernel: ---[ end trace 01341980905412c9 ]--- Signed-off-by: Dave Kleikamp Signed-off-by: Santosh Shilimkar [forward-ported to v4.20] Signed-off-by: Calum Mackay Reviewed-by: Dave Kleikamp Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/direct.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 621c517b325c..89c03a507dd9 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -98,8 +98,11 @@ struct nfs_direct_req { struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */ struct work_struct work; int flags; + /* for write */ #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ #define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ + /* for read */ +#define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */ struct nfs_writeverf verf; /* unstable write verifier */ }; @@ -412,7 +415,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; - if (!PageCompound(page) && bytes < hdr->good_bytes) + if (!PageCompound(page) && bytes < hdr->good_bytes && + (dreq->flags == NFS_ODIRECT_SHOULD_DIRTY)) set_page_dirty(page); bytes += req->wb_bytes; nfs_list_remove_request(req); @@ -587,6 +591,9 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; + if (iter_is_iovec(iter)) + dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; + nfs_start_io_direct(inode); NFS_I(inode)->read_io += count; From c3f68415c9871e4828255ffb52b01ed4fe28dca1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 1 Dec 2018 23:18:00 -0500 Subject: [PATCH 1111/2608] SUNRPC: Fix a potential race in xprt_connect() [ Upstream commit 0a9a4304f3614e25d9de9b63502ca633c01c0d70 ] If an asynchronous connection attempt completes while another task is in xprt_connect(), then the call to rpc_sleep_on() could end up racing with the call to xprt_wake_pending_tasks(). So add a second test of the connection state after we've put the task to sleep and set the XPRT_CONNECTING flag, when we know that there can be no asynchronous connection attempts still in progress. Fixes: 0b9e79431377d ("SUNRPC: Move the test for XPRT_CONNECTING into...") Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- net/sunrpc/xprt.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 8eb0c4f3b3e9..d0282cc88b14 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -780,8 +780,15 @@ void xprt_connect(struct rpc_task *task) return; if (xprt_test_and_set_connecting(xprt)) return; - xprt->stat.connect_start = jiffies; - xprt->ops->connect(xprt, task); + /* Race breaker */ + if (!xprt_connected(xprt)) { + xprt->stat.connect_start = jiffies; + xprt->ops->connect(xprt, task); + } else { + xprt_clear_connecting(xprt); + task->tk_status = 0; + rpc_wake_up_queued_task(&xprt->pending, task); + } } xprt_release_write(xprt, task); } From 923e09c74a9ca2761d3fa5f2c31b497360138244 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 20 Nov 2018 08:30:40 -0500 Subject: [PATCH 1112/2608] sbus: char: add of_node_put() [ Upstream commit 87d81a23e24f24ebe014891e8bdf3ff8785031e8 ] use of_node_put() to release the refcount. Signed-off-by: Yangtao Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/sbus/char/display7seg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c index f32765d3cbd8..db761aca8667 100644 --- a/drivers/sbus/char/display7seg.c +++ b/drivers/sbus/char/display7seg.c @@ -221,6 +221,7 @@ static int d7s_probe(struct platform_device *op) dev_set_drvdata(&op->dev, p); d7s_device = p; err = 0; + of_node_put(opts); out: return err; From 6b37106bc20ab5265b113a67563d4fb0b5191153 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 20 Nov 2018 08:38:26 -0500 Subject: [PATCH 1113/2608] drivers/sbus/char: add of_node_put() [ Upstream commit 6bd520ab7cf69486ea81fd3cdfd2d5a390ad1100 ] use of_node_put() to release the refcount. Signed-off-by: Yangtao Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/sbus/char/envctrl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c index 56e962a01493..b8481927bfe4 100644 --- a/drivers/sbus/char/envctrl.c +++ b/drivers/sbus/char/envctrl.c @@ -910,8 +910,10 @@ static void envctrl_init_i2c_child(struct device_node *dp, for (len = 0; len < PCF8584_MAX_CHANNELS; ++len) { pchild->mon_type[len] = ENVCTRL_NOMON; } + of_node_put(root_node); return; } + of_node_put(root_node); } /* Get the monitor channels. */ From a19e57563ce7416727c7b318162972d780334020 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 21 Nov 2018 10:22:54 -0500 Subject: [PATCH 1114/2608] drivers/tty: add missing of_node_put() [ Upstream commit dac097c4546e4c5b16dd303a1e97c1d319c8ab3e ] of_find_node_by_path() acquires a reference to the node returned by it and that reference needs to be dropped by its caller. This place is not doing this, so fix it. Signed-off-by: Yangtao Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/tty/serial/suncore.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/suncore.c b/drivers/tty/serial/suncore.c index 127472bd6a7c..209f314745ab 100644 --- a/drivers/tty/serial/suncore.c +++ b/drivers/tty/serial/suncore.c @@ -111,6 +111,7 @@ void sunserial_console_termios(struct console *con, struct device_node *uart_dp) mode = of_get_property(dp, mode_prop, NULL); if (!mode) mode = "9600,8,n,1,-"; + of_node_put(dp); } cflag = CREAD | HUPCL | CLOCAL; From 8b9860ad1efa031be8b37bbd60cdb699d0fdb64c Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 20 Nov 2018 08:02:49 -0500 Subject: [PATCH 1115/2608] ide: pmac: add of_node_put() [ Upstream commit a51921c0db3fd26c4ed83dc0ec5d32988fa02aa5 ] use of_node_put() to release the refcount. Signed-off-by: Yangtao Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/ide/pmac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c index c5b902b86b44..203ed4adc04a 100644 --- a/drivers/ide/pmac.c +++ b/drivers/ide/pmac.c @@ -920,6 +920,7 @@ static u8 pmac_ide_cable_detect(ide_hwif_t *hwif) struct device_node *root = of_find_node_by_path("/"); const char *model = of_get_property(root, "model", NULL); + of_node_put(root); /* Get cable type from device-tree. */ if (cable && !strncmp(cable, "80-", 3)) { /* Some drives fail to detect 80c cable in PowerBook */ From 308d65a18461c90b2e4e14dac55478376382a5d3 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Thu, 29 Nov 2018 14:01:50 +0800 Subject: [PATCH 1116/2608] drm/msm: Fix error return checking [ Upstream commit 098336deb946f37a70afc0979af388b615c378bf ] The error checks on ret for a negative error return always fails because the return value of iommu_map_sg() is unsigned and can never be negative. Detected with Coccinelle: drivers/gpu/drm/msm/msm_iommu.c:69:9-12: WARNING: Unsigned expression compared with zero: ret < 0 Signed-off-by: Wen Yang CC: Rob Clark CC: David Airlie CC: Julia Lawall CC: linux-arm-msm@vger.kernel.org CC: dri-devel@lists.freedesktop.org CC: freedreno@lists.freedesktop.org CC: linux-kernel@vger.kernel.org Signed-off-by: Sean Paul Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/msm_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index b23d33622f37..2a90aa4caec0 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -66,7 +66,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova, // pm_runtime_get_sync(mmu->dev); ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot); // pm_runtime_put_sync(mmu->dev); - WARN_ON(ret < 0); + WARN_ON(!ret); return (ret == len) ? 0 : -EINVAL; } From d5f2432339ba2827d75f682887eac604afb06535 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 3 Dec 2018 17:50:55 +0300 Subject: [PATCH 1117/2608] clk: mvebu: Off by one bugs in cp110_of_clk_get() [ Upstream commit d9f5b7f5dd0fa74a89de5a7ac1e26366f211ccee ] These > comparisons should be >= to prevent reading beyond the end of of the clk_data->hws[] buffer. The clk_data->hws[] array is allocated in cp110_syscon_common_probe() when we do: cp110_clk_data = devm_kzalloc(dev, sizeof(*cp110_clk_data) + sizeof(struct clk_hw *) * CP110_CLK_NUM, GFP_KERNEL); As you can see, it has CP110_CLK_NUM elements which is equivalent to CP110_MAX_CORE_CLOCKS + CP110_MAX_GATABLE_CLOCKS. Fixes: d3da3eaef7f4 ("clk: mvebu: new driver for Armada CP110 system controller") Signed-off-by: Dan Carpenter Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/mvebu/cp110-system-controller.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/mvebu/cp110-system-controller.c b/drivers/clk/mvebu/cp110-system-controller.c index ca9a0a536174..05c127cafa46 100644 --- a/drivers/clk/mvebu/cp110-system-controller.c +++ b/drivers/clk/mvebu/cp110-system-controller.c @@ -203,11 +203,11 @@ static struct clk_hw *cp110_of_clk_get(struct of_phandle_args *clkspec, unsigned int idx = clkspec->args[1]; if (type == CP110_CLK_TYPE_CORE) { - if (idx > CP110_MAX_CORE_CLOCKS) + if (idx >= CP110_MAX_CORE_CLOCKS) return ERR_PTR(-EINVAL); return clk_data->hws[idx]; } else if (type == CP110_CLK_TYPE_GATABLE) { - if (idx > CP110_MAX_GATABLE_CLOCKS) + if (idx >= CP110_MAX_GATABLE_CLOCKS) return ERR_PTR(-EINVAL); return clk_data->hws[CP110_MAX_CORE_CLOCKS + idx]; } From 2f74717d0237cde6fa63a70be9de7b275e2e8cd0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 3 Dec 2018 17:51:43 +0300 Subject: [PATCH 1118/2608] clk: mmp: Off by one in mmp_clk_add() [ Upstream commit 2e85c57493e391b93445c1e0d530b36b95becc64 ] The > comparison should be >= or we write one element beyond the end of the unit->clk_table[] array. (The unit->clk_table[] array is allocated in the mmp_clk_init() function and it has unit->nr_clks elements). Fixes: 4661fda10f8b ("clk: mmp: add basic support functions for DT support") Signed-off-by: Dan Carpenter Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/mmp/clk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/mmp/clk.c b/drivers/clk/mmp/clk.c index ad8d483a35cd..ca7d37e2c7be 100644 --- a/drivers/clk/mmp/clk.c +++ b/drivers/clk/mmp/clk.c @@ -183,7 +183,7 @@ void mmp_clk_add(struct mmp_clk_unit *unit, unsigned int id, pr_err("CLK %d has invalid pointer %p\n", id, clk); return; } - if (id > unit->nr_clks) { + if (id >= unit->nr_clks) { pr_err("CLK %d is invalid\n", id); return; } From 2dc84c590cea6b1852e866132edb0079d477dbf6 Mon Sep 17 00:00:00 2001 From: Teika Kazura Date: Mon, 3 Dec 2018 11:26:03 -0800 Subject: [PATCH 1119/2608] Input: synaptics - enable SMBus for HP 15-ay000 [ Upstream commit 5a6dab15f7a79817cab4af612ddd99eda793fce6 ] SMBus works fine for the touchpad with id SYN3221, used in the HP 15-ay000 series, This device has been reported in these messages in the "linux-input" mailing list: * https://marc.info/?l=linux-input&m=152016683003369&w=2 * https://www.spinics.net/lists/linux-input/msg52525.html Reported-by: Nitesh Debnath Reported-by: Teika Kazura Signed-off-by: Teika Kazura Reviewed-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 65c9095eb517..54f0d037b5b6 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -178,6 +178,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0096", /* X280 */ "LEN0097", /* X280 -> ALPS trackpoint */ "LEN200f", /* T450s */ + "SYN3221", /* HP 15-ay000 */ NULL }; From 647492ad7341cc76a9ebafeb67cea1664f674cca Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 3 Dec 2018 11:24:30 -0800 Subject: [PATCH 1120/2608] Input: omap-keypad - fix keyboard debounce configuration [ Upstream commit 6c3516fed7b61a3527459ccfa67fab130d910610 ] I noticed that the Android v3.0.8 kernel on droid4 is using different keypad values from the mainline kernel and does not have issues with keys occasionally being stuck until pressed again. Turns out there was an earlier patch posted to fix this as "Input: omap-keypad: errata i689: Correct debounce time", but it was never reposted to fix use macros for timing calculations. This updated version is using macros, and also fixes the use of the input clock rate to use 32768KiHz instead of 32000KiHz. And we want to use the known good Android kernel values of 3 and 6 instead of 2 and 6 in the earlier patch. Reported-by: Pavel Machek Signed-off-by: Tony Lindgren Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/keyboard/omap4-keypad.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c index 940d38b08e6b..ce8e2baf31bb 100644 --- a/drivers/input/keyboard/omap4-keypad.c +++ b/drivers/input/keyboard/omap4-keypad.c @@ -60,8 +60,18 @@ /* OMAP4 values */ #define OMAP4_VAL_IRQDISABLE 0x0 -#define OMAP4_VAL_DEBOUNCINGTIME 0x7 -#define OMAP4_VAL_PVT 0x7 + +/* + * Errata i689: If a key is released for a time shorter than debounce time, + * the keyboard will idle and never detect the key release. The workaround + * is to use at least a 12ms debounce time. See omap5432 TRM chapter + * "26.4.6.2 Keyboard Controller Timer" for more information. + */ +#define OMAP4_KEYPAD_PTV_DIV_128 0x6 +#define OMAP4_KEYPAD_DEBOUNCINGTIME_MS(dbms, ptv) \ + ((((dbms) * 1000) / ((1 << ((ptv) + 1)) * (1000000 / 32768))) - 1) +#define OMAP4_VAL_DEBOUNCINGTIME_16MS \ + OMAP4_KEYPAD_DEBOUNCINGTIME_MS(16, OMAP4_KEYPAD_PTV_DIV_128) enum { KBD_REVISION_OMAP4 = 0, @@ -181,9 +191,9 @@ static int omap4_keypad_open(struct input_dev *input) kbd_writel(keypad_data, OMAP4_KBD_CTRL, OMAP4_DEF_CTRL_NOSOFTMODE | - (OMAP4_VAL_PVT << OMAP4_DEF_CTRL_PTV_SHIFT)); + (OMAP4_KEYPAD_PTV_DIV_128 << OMAP4_DEF_CTRL_PTV_SHIFT)); kbd_writel(keypad_data, OMAP4_KBD_DEBOUNCINGTIME, - OMAP4_VAL_DEBOUNCINGTIME); + OMAP4_VAL_DEBOUNCINGTIME_16MS); /* clear pending interrupts */ kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS, kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS)); From 4b427e57f4f6eb3ad1c02d3faa3d551336f2fd59 Mon Sep 17 00:00:00 2001 From: Juha-Matti Tilli Date: Sun, 2 Dec 2018 12:47:08 +0200 Subject: [PATCH 1121/2608] libata: whitelist all SAMSUNG MZ7KM* solid-state disks [ Upstream commit fd6f32f78645db32b6b95a42e45da2ddd6de0e67 ] These devices support read zero after trim (RZAT), as they advertise to the OS. However, the OS doesn't believe the SSDs unless they are explicitly whitelisted. Acked-by: Martin K. Petersen Signed-off-by: Juha-Matti Tilli Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/ata/libata-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 6938bd86ff1c..04f406d7e973 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4593,6 +4593,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "SSD*INTEL*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "SAMSUNG*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "SAMSUNG*MZ7KM*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "ST[1248][0248]0[FH]*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, /* From 1a130bc711d3931567575c0449cdd255859c1e58 Mon Sep 17 00:00:00 2001 From: Anderson Luiz Alves Date: Fri, 30 Nov 2018 21:58:36 -0200 Subject: [PATCH 1122/2608] mv88e6060: disable hardware level MAC learning [ Upstream commit a74515604a7b171f2702bdcbd1e231225fb456d0 ] Disable hardware level MAC learning because it breaks station roaming. When enabled it drops all frames that arrive from a MAC address that is on a different port at learning table. Signed-off-by: Anderson Luiz Alves Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6060.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index f123ed57630d..86b41840f41a 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -114,8 +114,7 @@ static int mv88e6060_switch_reset(struct dsa_switch *ds) /* Reset the switch. */ REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL, GLOBAL_ATU_CONTROL_SWRESET | - GLOBAL_ATU_CONTROL_ATUSIZE_1024 | - GLOBAL_ATU_CONTROL_ATE_AGE_5MIN); + GLOBAL_ATU_CONTROL_LEARNDIS); /* Wait up to one second for reset to complete. */ timeout = jiffies + 1 * HZ; @@ -140,13 +139,10 @@ static int mv88e6060_setup_global(struct dsa_switch *ds) */ REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, GLOBAL_CONTROL_MAX_FRAME_1536); - /* Enable automatic address learning, set the address - * database size to 1024 entries, and set the default aging - * time to 5 minutes. + /* Disable automatic address learning. */ REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL, - GLOBAL_ATU_CONTROL_ATUSIZE_1024 | - GLOBAL_ATU_CONTROL_ATE_AGE_5MIN); + GLOBAL_ATU_CONTROL_LEARNDIS); return 0; } From e7c36edac701bad12b54f182de7b63b137307e62 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 2 Dec 2018 14:34:37 +0200 Subject: [PATCH 1123/2608] net/mlx4_en: Fix build break when CONFIG_INET is off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1b603f9e4313348608f256b564ed6e3d9e67f377 ] MLX4_EN depends on NETDEVICES, ETHERNET and INET Kconfigs. Make sure they are listed in MLX4_EN Kconfig dependencies. This fixes the following build break: drivers/net/ethernet/mellanox/mlx4/en_rx.c:582:18: warning: ‘struct iphdr’ declared inside parameter list [enabled by default] struct iphdr *iph) ^ drivers/net/ethernet/mellanox/mlx4/en_rx.c:582:18: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default] drivers/net/ethernet/mellanox/mlx4/en_rx.c: In function ‘get_fixed_ipv4_csum’: drivers/net/ethernet/mellanox/mlx4/en_rx.c:586:20: error: dereferencing pointer to incomplete type _u8 ipproto = iph->protocol; Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx4/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index 22b1cc012bc9..c1a39be0dbe7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -5,7 +5,7 @@ config MLX4_EN tristate "Mellanox Technologies 1/10/40Gbit Ethernet support" depends on MAY_USE_DEVLINK - depends on PCI + depends on PCI && NETDEVICES && ETHERNET && INET select MLX4_CORE imply PTP_1588_CLOCK ---help--- From 394f9a113b4c42672c24ce03697712643dc87dee Mon Sep 17 00:00:00 2001 From: Chris Cole Date: Fri, 23 Nov 2018 12:20:45 +0100 Subject: [PATCH 1124/2608] ARM: 8814/1: mm: improve/fix ARM v7_dma_inv_range() unaligned address handling [ Upstream commit a1208f6a822ac29933e772ef1f637c5d67838da9 ] This patch addresses possible memory corruption when v7_dma_inv_range(start_address, end_address) address parameters are not aligned to whole cache lines. This function issues "invalidate" cache management operations to all cache lines from start_address (inclusive) to end_address (exclusive). When start_address and/or end_address are not aligned, the start and/or end cache lines are first issued "clean & invalidate" operation. The assumption is this is done to ensure that any dirty data addresses outside the address range (but part of the first or last cache lines) are cleaned/flushed so that data is not lost, which could happen if just an invalidate is issued. The problem is that these first/last partial cache lines are issued "clean & invalidate" and then "invalidate". This second "invalidate" is not required and worse can cause "lost" writes to addresses outside the address range but part of the cache line. If another component writes to its part of the cache line between the "clean & invalidate" and "invalidate" operations, the write can get lost. This fix is to remove the extra "invalidate" operation when unaligned addressed are used. A kernel module is available that has a stress test to reproduce the issue and a unit test of the updated v7_dma_inv_range(). It can be downloaded from http://ftp.sageembedded.com/outgoing/linux/cache-test-20181107.tgz. v7_dma_inv_range() is call by dmac_[un]map_area(addr, len, direction) when the direction is DMA_FROM_DEVICE. One can (I believe) successfully argue that DMA from a device to main memory should use buffers aligned to cache line size, because the "clean & invalidate" might overwrite data that the device just wrote using DMA. But if a driver does use unaligned buffers, at least this fix will prevent memory corruption outside the buffer. Signed-off-by: Chris Cole Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/mm/cache-v7.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index de78109d002d..50a70edbc863 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -359,14 +359,16 @@ v7_dma_inv_range: ALT_UP(W(nop)) #endif mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line + addne r0, r0, r2 tst r1, r3 bic r1, r1, r3 mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line -1: - mcr p15, 0, r0, c7, c6, 1 @ invalidate D / U line - add r0, r0, r2 cmp r0, r1 +1: + mcrlo p15, 0, r0, c7, c6, 1 @ invalidate D / U line + addlo r0, r0, r2 + cmplo r0, r1 blo 1b dsb st ret lr From a2cc1b2d1954ec291ef61aa6976ae10467895f8d Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Fri, 23 Nov 2018 12:25:21 +0100 Subject: [PATCH 1125/2608] ARM: 8815/1: V7M: align v7m_dma_inv_range() with v7 counterpart [ Upstream commit 3d0358d0ba048c5afb1385787aaec8fa5ad78fcc ] Chris has discovered and reported that v7_dma_inv_range() may corrupt memory if address range is not aligned to cache line size. Since the whole cache-v7m.S was lifted form cache-v7.S the same observation applies to v7m_dma_inv_range(). So the fix just mirrors what has been done for v7 with a little specific of M-class. Cc: Chris Cole Signed-off-by: Vladimir Murzin Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/mm/cache-v7m.S | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S index 788486e830d3..32aa2a2aa260 100644 --- a/arch/arm/mm/cache-v7m.S +++ b/arch/arm/mm/cache-v7m.S @@ -73,9 +73,11 @@ /* * dcimvac: Invalidate data cache line by MVA to PoC */ -.macro dcimvac, rt, tmp - v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC +.irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo +.macro dcimvac\c, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC, \c .endm +.endr /* * dccmvau: Clean data cache line by MVA to PoU @@ -369,14 +371,16 @@ v7m_dma_inv_range: tst r0, r3 bic r0, r0, r3 dccimvacne r0, r3 + addne r0, r0, r2 subne r3, r2, #1 @ restore r3, corrupted by v7m's dccimvac tst r1, r3 bic r1, r1, r3 dccimvacne r1, r3 -1: - dcimvac r0, r3 - add r0, r0, r2 cmp r0, r1 +1: + dcimvaclo r0, r3 + addlo r0, r0, r2 + cmplo r0, r1 blo 1b dsb st ret lr From d126d4bf34807707445c8716d9d7c3584a478cfe Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Mon, 3 Dec 2018 13:21:01 +0100 Subject: [PATCH 1126/2608] ethernet: fman: fix wrong of_node_put() in probe function [ Upstream commit ecb239d96d369c23c33d41708646df646de669f4 ] After getting a reference to the platform device's of_node the probe function ends up calling of_find_matching_node() using the node as an argument. The function takes care of decreasing the refcount on it. We are then incorrectly decreasing the refcount on that node again. This patch removes the unwarranted call to of_node_put(). Fixes: 414fd46e7762 ("fsl/fman: Add FMan support") Signed-off-by: Nicolas Saenz Julienne Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fman/fman.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index 9530405030a7..97425d94e280 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -2786,7 +2786,7 @@ static struct fman *read_dts_node(struct platform_device *of_dev) if (!muram_node) { dev_err(&of_dev->dev, "%s: could not find MURAM node\n", __func__); - goto fman_node_put; + goto fman_free; } err = of_address_to_resource(muram_node, 0, @@ -2795,11 +2795,10 @@ static struct fman *read_dts_node(struct platform_device *of_dev) of_node_put(muram_node); dev_err(&of_dev->dev, "%s: of_address_to_resource() = %d\n", __func__, err); - goto fman_node_put; + goto fman_free; } of_node_put(muram_node); - of_node_put(fm_node); err = devm_request_irq(&of_dev->dev, irq, fman_irq, 0, "fman", fman); if (err < 0) { From 938199486ba9bc203fd312a6e02f7edb1640b2b4 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Mon, 3 Dec 2018 11:53:21 +1100 Subject: [PATCH 1127/2608] drm/ast: Fix connector leak during driver unload [ Upstream commit e594a5e349ddbfdaca1951bb3f8d72f3f1660d73 ] When unloading the ast driver, a warning message is printed by drm_mode_config_cleanup() because a reference is still held to one of the drm_connector structs. Correct this by calling drm_crtc_force_disable_all() in ast_fbdev_destroy(). Signed-off-by: Sam Bobroff Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/1e613f3c630c7bbc72e04a44b178259b9164d2f6.1543798395.git.sbobroff@linux.ibm.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/ast/ast_fb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/ast/ast_fb.c b/drivers/gpu/drm/ast/ast_fb.c index 0cd827e11fa2..de26df0c6044 100644 --- a/drivers/gpu/drm/ast/ast_fb.c +++ b/drivers/gpu/drm/ast/ast_fb.c @@ -263,6 +263,7 @@ static void ast_fbdev_destroy(struct drm_device *dev, { struct ast_framebuffer *afb = &afbdev->afb; + drm_crtc_force_disable_all(dev); drm_fb_helper_unregister_fbi(&afbdev->helper); if (afb->obj) { From ab9697222a1da17b6a4e909fab554b6ae5691789 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 3 Nov 2018 15:02:44 -0500 Subject: [PATCH 1128/2608] cifs: In Kconfig CONFIG_CIFS_POSIX needs depends on legacy (insecure cifs) [ Upstream commit 6e785302dad32228819d8066e5376acd15d0e6ba ] Missing a dependency. Shouldn't show cifs posix extensions in Kconfig if CONFIG_CIFS_ALLOW_INSECURE_DIALECTS (ie SMB1 protocol) is disabled. Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Signed-off-by: Sasha Levin --- fs/cifs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index cb0f1fbe836d..7b95e7971d18 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -121,7 +121,7 @@ config CIFS_XATTR config CIFS_POSIX bool "CIFS POSIX Extensions" - depends on CIFS_XATTR + depends on CIFS && CIFS_ALLOW_INSECURE_LEGACY && CIFS_XATTR help Enabling this option will cause the cifs client to attempt to negotiate a newer dialect with servers, such as Samba 3.0.5 From 7e43eec4b4587716505fcf388879e4191331e6fa Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 6 Dec 2018 19:14:34 +0000 Subject: [PATCH 1129/2608] vhost/vsock: fix reset orphans race with close timeout [ Upstream commit c38f57da428b033f2721b611d84b1f40bde674a8 ] If a local process has closed a connected socket and hasn't received a RST packet yet, then the socket remains in the table until a timeout expires. When a vhost_vsock instance is released with the timeout still pending, the socket is never freed because vhost_vsock has already set the SOCK_DONE flag. Check if the close timer is pending and let it close the socket. This prevents the race which can leak sockets. Reported-by: Maximilian Riemensberger Cc: Graham Whaley Signed-off-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vsock.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index b044a0800805..248533c0f9ac 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -561,13 +561,21 @@ static void vhost_vsock_reset_orphans(struct sock *sk) * executing. */ - if (!vhost_vsock_get(vsk->remote_addr.svm_cid)) { - sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; - sk->sk_state = SS_UNCONNECTED; - sk->sk_err = ECONNRESET; - sk->sk_error_report(sk); - } + /* If the peer is still valid, no need to reset connection */ + if (vhost_vsock_get(vsk->remote_addr.svm_cid)) + return; + + /* If the close timeout is pending, let it expire. This avoids races + * with the timeout callback. + */ + if (vsk->close_work_scheduled) + return; + + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ECONNRESET; + sk->sk_error_report(sk); } static int vhost_vsock_dev_release(struct inode *inode, struct file *file) From 73c821e378e86d9152bb53d9e261ac128b05e582 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 6 Dec 2018 17:44:53 +0000 Subject: [PATCH 1130/2608] mlxsw: spectrum_switchdev: Fix VLAN device deletion via ioctl [ Upstream commit 993107fea5eefdfdfde1ca38d3f01f0bebf76e77 ] When deleting a VLAN device using an ioctl the netdev is unregistered before the VLAN filter is updated via ndo_vlan_rx_kill_vid(). It can lead to a use-after-free in mlxsw in case the VLAN device is deleted while being enslaved to a bridge. The reason for the above is that when mlxsw receives the CHANGEUPPER event, it wrongly assumes that the VLAN device is no longer its upper and thus destroys the internal representation of the bridge port despite the reference count being non-zero. Fix this by checking if the VLAN device is our upper using its real device. In net-next I'm going to remove this trick and instead make mlxsw completely agnostic to the order of the events. Fixes: c57529e1d5d8 ("mlxsw: spectrum: Replace vPorts with Port-VLAN") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 32c25772f755..21611613f44c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -295,7 +295,13 @@ static bool mlxsw_sp_bridge_port_should_destroy(const struct mlxsw_sp_bridge_port * bridge_port) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_port->dev); + struct net_device *dev = bridge_port->dev; + struct mlxsw_sp *mlxsw_sp; + + if (is_vlan_dev(dev)) + mlxsw_sp = mlxsw_sp_lower_get(vlan_dev_real_dev(dev)); + else + mlxsw_sp = mlxsw_sp_lower_get(dev); /* In case ports were pulled from out of a bridged LAG, then * it's possible the reference count isn't zero, yet the bridge @@ -1646,7 +1652,7 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device, u16 vid = vlan_dev_vlan_id(bridge_port->dev); mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); - if (WARN_ON(!mlxsw_sp_port_vlan)) + if (!mlxsw_sp_port_vlan) return; mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); From 4bdfff5c4f913c50a419b1e28e5893ebc04a09ff Mon Sep 17 00:00:00 2001 From: "Adamski, Krzysztof (Nokia - PL/Wroclaw)" Date: Fri, 16 Nov 2018 13:24:41 +0000 Subject: [PATCH 1131/2608] i2c: axxia: properly handle master timeout [ Upstream commit 6c7f25cae54b840302e4f1b371dbf318fbf09ab2 ] According to Intel (R) Axxia TM Lionfish Communication Processor Peripheral Subsystem Hardware Reference Manual, the AXXIA I2C module have a programmable Master Wait Timer, which among others, checks the time between commands send in manual mode. When a timeout (25ms) passes, TSS bit is set in Master Interrupt Status register and a Stop command is issued by the hardware. The axxia_i2c_xfer(), does not properly handle this situation, however. For each message a separate axxia_i2c_xfer_msg() is called and this function incorrectly assumes that any interrupt might happen only when waiting for completion. This is mostly correct but there is one exception - a master timeout can trigger if enough time has passed between individual transfers. It will, by definition, happen between transfers when the interrupts are disabled by the code. If that happens, the hardware issues Stop command. The interrupt indicating timeout will not be triggered as soon as we enable them since the Master Interrupt Status is cleared when master mode is entered again (which happens before enabling irqs) meaning this error is lost and the transfer is continued even though the Stop was issued on the bus. The subsequent operations completes without error but a bogus value (0xFF in case of read) is read as the client device is confused because aborted transfer. No error is returned from master_xfer() making caller believe that a valid value was read. To fix the problem, the TSS bit (indicating timeout) in Master Interrupt Status register is checked before each transfer. If it is set, there was a timeout before this transfer and (as described above) the hardware already issued Stop command so the transaction should be aborted thus -ETIMEOUT is returned from the master_xfer() callback. In order to be sure no timeout was issued we can't just read the status just before starting new transaction as there will always be a small window of time (few CPU cycles at best) where this might still happen. For this reason we have to temporally disable the timer before checking for TSS bit. Disabling it will, however, clear the TSS bit so in order to preserve that information, we have to read it in ISR so we have to ensure that the TSS interrupt is not masked between transfers of one transaction. There is no need to call bus recovery or controller reinitialization if that happens so it's skipped. Signed-off-by: Krzysztof Adamski Reviewed-by: Alexander Sverdlin Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-axxia.c | 40 ++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/i2c/busses/i2c-axxia.c b/drivers/i2c/busses/i2c-axxia.c index 13f07482ec68..deea13838648 100644 --- a/drivers/i2c/busses/i2c-axxia.c +++ b/drivers/i2c/busses/i2c-axxia.c @@ -74,8 +74,7 @@ MST_STATUS_ND) #define MST_STATUS_ERR (MST_STATUS_NAK | \ MST_STATUS_AL | \ - MST_STATUS_IP | \ - MST_STATUS_TSS) + MST_STATUS_IP) #define MST_TX_BYTES_XFRD 0x50 #define MST_RX_BYTES_XFRD 0x54 #define SCL_HIGH_PERIOD 0x80 @@ -241,7 +240,7 @@ static int axxia_i2c_empty_rx_fifo(struct axxia_i2c_dev *idev) */ if (c <= 0 || c > I2C_SMBUS_BLOCK_MAX) { idev->msg_err = -EPROTO; - i2c_int_disable(idev, ~0); + i2c_int_disable(idev, ~MST_STATUS_TSS); complete(&idev->msg_complete); break; } @@ -299,14 +298,19 @@ static irqreturn_t axxia_i2c_isr(int irq, void *_dev) if (status & MST_STATUS_SCC) { /* Stop completed */ - i2c_int_disable(idev, ~0); + i2c_int_disable(idev, ~MST_STATUS_TSS); complete(&idev->msg_complete); } else if (status & MST_STATUS_SNS) { /* Transfer done */ - i2c_int_disable(idev, ~0); + i2c_int_disable(idev, ~MST_STATUS_TSS); if (i2c_m_rd(idev->msg) && idev->msg_xfrd < idev->msg->len) axxia_i2c_empty_rx_fifo(idev); complete(&idev->msg_complete); + } else if (status & MST_STATUS_TSS) { + /* Transfer timeout */ + idev->msg_err = -ETIMEDOUT; + i2c_int_disable(idev, ~MST_STATUS_TSS); + complete(&idev->msg_complete); } else if (unlikely(status & MST_STATUS_ERR)) { /* Transfer error */ i2c_int_disable(idev, ~0); @@ -339,10 +343,10 @@ static int axxia_i2c_xfer_msg(struct axxia_i2c_dev *idev, struct i2c_msg *msg) u32 rx_xfer, tx_xfer; u32 addr_1, addr_2; unsigned long time_left; + unsigned int wt_value; idev->msg = msg; idev->msg_xfrd = 0; - idev->msg_err = 0; reinit_completion(&idev->msg_complete); if (i2c_m_ten(msg)) { @@ -382,9 +386,18 @@ static int axxia_i2c_xfer_msg(struct axxia_i2c_dev *idev, struct i2c_msg *msg) else if (axxia_i2c_fill_tx_fifo(idev) != 0) int_mask |= MST_STATUS_TFL; + wt_value = WT_VALUE(readl(idev->base + WAIT_TIMER_CONTROL)); + /* Disable wait timer temporarly */ + writel(wt_value, idev->base + WAIT_TIMER_CONTROL); + /* Check if timeout error happened */ + if (idev->msg_err) + goto out; + /* Start manual mode */ writel(CMD_MANUAL, idev->base + MST_COMMAND); + writel(WT_EN | wt_value, idev->base + WAIT_TIMER_CONTROL); + i2c_int_enable(idev, int_mask); time_left = wait_for_completion_timeout(&idev->msg_complete, @@ -395,13 +408,15 @@ static int axxia_i2c_xfer_msg(struct axxia_i2c_dev *idev, struct i2c_msg *msg) if (readl(idev->base + MST_COMMAND) & CMD_BUSY) dev_warn(idev->dev, "busy after xfer\n"); - if (time_left == 0) + if (time_left == 0) { idev->msg_err = -ETIMEDOUT; - - if (idev->msg_err == -ETIMEDOUT) i2c_recover_bus(&idev->adapter); + axxia_i2c_init(idev); + } - if (unlikely(idev->msg_err) && idev->msg_err != -ENXIO) +out: + if (unlikely(idev->msg_err) && idev->msg_err != -ENXIO && + idev->msg_err != -ETIMEDOUT) axxia_i2c_init(idev); return idev->msg_err; @@ -409,7 +424,7 @@ static int axxia_i2c_xfer_msg(struct axxia_i2c_dev *idev, struct i2c_msg *msg) static int axxia_i2c_stop(struct axxia_i2c_dev *idev) { - u32 int_mask = MST_STATUS_ERR | MST_STATUS_SCC; + u32 int_mask = MST_STATUS_ERR | MST_STATUS_SCC | MST_STATUS_TSS; unsigned long time_left; reinit_completion(&idev->msg_complete); @@ -436,6 +451,9 @@ axxia_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) int i; int ret = 0; + idev->msg_err = 0; + i2c_int_enable(idev, MST_STATUS_TSS); + for (i = 0; ret == 0 && i < num; ++i) ret = axxia_i2c_xfer_msg(idev, &msgs[i]); From c39910b57c7f557d304f50775eab28b6c892a7fd Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 21 Nov 2018 10:19:55 +0100 Subject: [PATCH 1132/2608] i2c: scmi: Fix probe error on devices with an empty SMB0001 ACPI device node [ Upstream commit 0544ee4b1ad574aec3b6379af5f5cdee42840971 ] Some AMD based HP laptops have a SMB0001 ACPI device node which does not define any methods. This leads to the following error in dmesg: [ 5.222731] cmi: probe of SMB0001:00 failed with error -5 This commit makes acpi_smbus_cmi_add() return -ENODEV instead in this case silencing the error. In case of a failure of the i2c_add_adapter() call this commit now propagates the error from that call instead of -EIO. Signed-off-by: Hans de Goede Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-scmi.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-scmi.c b/drivers/i2c/busses/i2c-scmi.c index efefcfa24a4c..d2178f701b41 100644 --- a/drivers/i2c/busses/i2c-scmi.c +++ b/drivers/i2c/busses/i2c-scmi.c @@ -364,6 +364,7 @@ static int acpi_smbus_cmi_add(struct acpi_device *device) { struct acpi_smbus_cmi *smbus_cmi; const struct acpi_device_id *id; + int ret; smbus_cmi = kzalloc(sizeof(struct acpi_smbus_cmi), GFP_KERNEL); if (!smbus_cmi) @@ -385,8 +386,10 @@ static int acpi_smbus_cmi_add(struct acpi_device *device) acpi_walk_namespace(ACPI_TYPE_METHOD, smbus_cmi->handle, 1, acpi_smbus_cmi_query_methods, NULL, smbus_cmi, NULL); - if (smbus_cmi->cap_info == 0) + if (smbus_cmi->cap_info == 0) { + ret = -ENODEV; goto err; + } snprintf(smbus_cmi->adapter.name, sizeof(smbus_cmi->adapter.name), "SMBus CMI adapter %s", @@ -397,7 +400,8 @@ static int acpi_smbus_cmi_add(struct acpi_device *device) smbus_cmi->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD; smbus_cmi->adapter.dev.parent = &device->dev; - if (i2c_add_adapter(&smbus_cmi->adapter)) { + ret = i2c_add_adapter(&smbus_cmi->adapter); + if (ret) { dev_err(&device->dev, "Couldn't register adapter!\n"); goto err; } @@ -407,7 +411,7 @@ static int acpi_smbus_cmi_add(struct acpi_device *device) err: kfree(smbus_cmi); device->driver_data = NULL; - return -EIO; + return ret; } static int acpi_smbus_cmi_remove(struct acpi_device *device) From 90265a60aff7ad21eed76a9e62159760d1d364b2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 6 Dec 2018 12:55:27 +0900 Subject: [PATCH 1133/2608] i2c: uniphier: fix violation of tLOW requirement for Fast-mode [ Upstream commit 8469636ab5d8c77645b953746c10fda6983a8830 ] Currently, the clock duty is set as tLOW/tHIGH = 1/1. For Fast-mode, tLOW is set to 1.25 us while the I2C spec requires tLOW >= 1.3 us. tLOW/tHIGH = 5/4 would meet both Standard-mode and Fast-mode: Standard-mode: tLOW = 5.56 us, tHIGH = 4.44 us Fast-mode: tLOW = 1.39 us, tHIGH = 1.11 us Signed-off-by: Masahiro Yamada Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-uniphier.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-uniphier.c b/drivers/i2c/busses/i2c-uniphier.c index 454f914ae66d..c488e558aef7 100644 --- a/drivers/i2c/busses/i2c-uniphier.c +++ b/drivers/i2c/busses/i2c-uniphier.c @@ -320,7 +320,13 @@ static void uniphier_i2c_hw_init(struct uniphier_i2c_priv *priv) uniphier_i2c_reset(priv, true); - writel((cyc / 2 << 16) | cyc, priv->membase + UNIPHIER_I2C_CLK); + /* + * Bit30-16: clock cycles of tLOW. + * Standard-mode: tLOW = 4.7 us, tHIGH = 4.0 us + * Fast-mode: tLOW = 1.3 us, tHIGH = 0.6 us + * "tLow/tHIGH = 5/4" meets both. + */ + writel((cyc * 5 / 9 << 16) | cyc, priv->membase + UNIPHIER_I2C_CLK); uniphier_i2c_reset(priv, false); } From 07d4f1c4cd57b6ba4b23d243bcb8d1a63316e784 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 6 Dec 2018 12:55:28 +0900 Subject: [PATCH 1134/2608] i2c: uniphier-f: fix violation of tLOW requirement for Fast-mode [ Upstream commit ece27a337d42a3197935711997f2880f0957ed7e ] Currently, the clock duty is set as tLOW/tHIGH = 1/1. For Fast-mode, tLOW is set to 1.25 us while the I2C spec requires tLOW >= 1.3 us. tLOW/tHIGH = 5/4 would meet both Standard-mode and Fast-mode: Standard-mode: tLOW = 5.56 us, tHIGH = 4.44 us Fast-mode: tLOW = 1.39 us, tHIGH = 1.11 us Signed-off-by: Masahiro Yamada Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-uniphier-f.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c index a403e8579b65..bc26ec822e26 100644 --- a/drivers/i2c/busses/i2c-uniphier-f.c +++ b/drivers/i2c/busses/i2c-uniphier-f.c @@ -470,9 +470,26 @@ static void uniphier_fi2c_hw_init(struct uniphier_fi2c_priv *priv) uniphier_fi2c_reset(priv); + /* + * Standard-mode: tLOW + tHIGH = 10 us + * Fast-mode: tLOW + tHIGH = 2.5 us + */ writel(cyc, priv->membase + UNIPHIER_FI2C_CYC); - writel(cyc / 2, priv->membase + UNIPHIER_FI2C_LCTL); + /* + * Standard-mode: tLOW = 4.7 us, tHIGH = 4.0 us, tBUF = 4.7 us + * Fast-mode: tLOW = 1.3 us, tHIGH = 0.6 us, tBUF = 1.3 us + * "tLow/tHIGH = 5/4" meets both. + */ + writel(cyc * 5 / 9, priv->membase + UNIPHIER_FI2C_LCTL); + /* + * Standard-mode: tHD;STA = 4.0 us, tSU;STA = 4.7 us, tSU;STO = 4.0 us + * Fast-mode: tHD;STA = 0.6 us, tSU;STA = 0.6 us, tSU;STO = 0.6 us + */ writel(cyc / 2, priv->membase + UNIPHIER_FI2C_SSUT); + /* + * Standard-mode: tSU;DAT = 250 ns + * Fast-mode: tSU;DAT = 100 ns + */ writel(cyc / 16, priv->membase + UNIPHIER_FI2C_DSUT); uniphier_fi2c_prepare_operation(priv); From 60e3480e0bbf00ffcec4b1409131cf01b69c7cf1 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Wed, 5 Dec 2018 16:54:57 +0000 Subject: [PATCH 1135/2608] nvmet-rdma: fix response use after free [ Upstream commit d7dcdf9d4e15189ecfda24cc87339a3425448d5c ] nvmet_rdma_release_rsp() may free the response before using it at error flow. Fixes: 8407879 ("nvmet-rdma: fix possible bogus dereference under heavy load") Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/target/rdma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index a70b3d24936d..5d8140e58f6f 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -524,6 +524,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvmet_rdma_rsp *rsp = container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe); + struct nvmet_rdma_queue *queue = cq->cq_context; nvmet_rdma_release_rsp(rsp); @@ -531,7 +532,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) wc->status != IB_WC_WR_FLUSH_ERR)) { pr_err("SEND for CQE 0x%p failed with status %s (%d).\n", wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status); - nvmet_rdma_error_comp(rsp->queue); + nvmet_rdma_error_comp(queue); } } From f1e1eb5c90b6b6a359e584e01468ce0c05801f4d Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Wed, 16 May 2018 16:45:51 -0700 Subject: [PATCH 1136/2608] rtc: snvs: Add timeouts to avoid kernel lockups [ Upstream commit cd7f3a249dbed2858e6c2f30e5be7f1f7a709ee2 ] In order to read correctly from asynchronously updated RTC registers, it's necessary to read repeatedly until their values do not change from read to read. It's also necessary to wait for three RTC clock ticks for certain operations. There are no timeouts in this code and these operations could possibly loop forever. To avoid kernel hangs, put in timeouts. The iMX7d can be configured to stop the SRTC on a tamper event, which will lockup the kernel inside this driver as described above. These hangs can happen when running under qemu, which doesn't emulate the SNVS RTC, though currently the driver will refuse to load on qemu due to a timeout in the driver probe method. It could also happen if the SRTC block where somehow placed into reset or the slow speed clock that drives the SRTC counter (but not the CPU) were to stop. The symptoms on a two core iMX7d are a work queue hang on rtc_timer_do_work(), which eventually blocks a systemd fsnotify operation that triggers a work queue flush, causing systemd to hang and thus causing all services that should be started by systemd, like a console getty, to fail to start or stop. Also optimize the wait code to wait less. It only needs to wait for the clock to advance three ticks, not to see it change three times. Cc: Alexandre Belloni Cc: Alessandro Zummo Cc: Fabio Estevam Cc: Shawn Guo Cc: Bryan O'Donoghue Signed-off-by: Trent Piepho Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-snvs.c | 103 +++++++++++++++++++++++++++-------------- 1 file changed, 69 insertions(+), 34 deletions(-) diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c index 9af591d5223c..71eee39520f0 100644 --- a/drivers/rtc/rtc-snvs.c +++ b/drivers/rtc/rtc-snvs.c @@ -47,49 +47,83 @@ struct snvs_rtc_data { struct clk *clk; }; +/* Read 64 bit timer register, which could be in inconsistent state */ +static u64 rtc_read_lpsrt(struct snvs_rtc_data *data) +{ + u32 msb, lsb; + + regmap_read(data->regmap, data->offset + SNVS_LPSRTCMR, &msb); + regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &lsb); + return (u64)msb << 32 | lsb; +} + +/* Read the secure real time counter, taking care to deal with the cases of the + * counter updating while being read. + */ static u32 rtc_read_lp_counter(struct snvs_rtc_data *data) { u64 read1, read2; - u32 val; + unsigned int timeout = 100; + /* As expected, the registers might update between the read of the LSB + * reg and the MSB reg. It's also possible that one register might be + * in partially modified state as well. + */ + read1 = rtc_read_lpsrt(data); do { - regmap_read(data->regmap, data->offset + SNVS_LPSRTCMR, &val); - read1 = val; - read1 <<= 32; - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &val); - read1 |= val; - - regmap_read(data->regmap, data->offset + SNVS_LPSRTCMR, &val); - read2 = val; - read2 <<= 32; - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &val); - read2 |= val; - } while (read1 != read2); + read2 = read1; + read1 = rtc_read_lpsrt(data); + } while (read1 != read2 && --timeout); + if (!timeout) + dev_err(&data->rtc->dev, "Timeout trying to get valid LPSRT Counter read\n"); /* Convert 47-bit counter to 32-bit raw second count */ return (u32) (read1 >> CNTR_TO_SECS_SH); } -static void rtc_write_sync_lp(struct snvs_rtc_data *data) +/* Just read the lsb from the counter, dealing with inconsistent state */ +static int rtc_read_lp_counter_lsb(struct snvs_rtc_data *data, u32 *lsb) { - u32 count1, count2, count3; - int i; + u32 count1, count2; + unsigned int timeout = 100; - /* Wait for 3 CKIL cycles */ - for (i = 0; i < 3; i++) { - do { - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1); - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count2); - } while (count1 != count2); - - /* Now wait until counter value changes */ - do { - do { - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count2); - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count3); - } while (count2 != count3); - } while (count3 == count1); + regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1); + do { + count2 = count1; + regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1); + } while (count1 != count2 && --timeout); + if (!timeout) { + dev_err(&data->rtc->dev, "Timeout trying to get valid LPSRT Counter read\n"); + return -ETIMEDOUT; } + + *lsb = count1; + return 0; +} + +static int rtc_write_sync_lp(struct snvs_rtc_data *data) +{ + u32 count1, count2; + u32 elapsed; + unsigned int timeout = 1000; + int ret; + + ret = rtc_read_lp_counter_lsb(data, &count1); + if (ret) + return ret; + + /* Wait for 3 CKIL cycles, about 61.0-91.5 µs */ + do { + ret = rtc_read_lp_counter_lsb(data, &count2); + if (ret) + return ret; + elapsed = count2 - count1; /* wrap around _is_ handled! */ + } while (elapsed < 3 && --timeout); + if (!timeout) { + dev_err(&data->rtc->dev, "Timeout waiting for LPSRT Counter to change\n"); + return -ETIMEDOUT; + } + return 0; } static int snvs_rtc_enable(struct snvs_rtc_data *data, bool enable) @@ -173,9 +207,7 @@ static int snvs_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) (SNVS_LPCR_LPTA_EN | SNVS_LPCR_LPWUI_EN), enable ? (SNVS_LPCR_LPTA_EN | SNVS_LPCR_LPWUI_EN) : 0); - rtc_write_sync_lp(data); - - return 0; + return rtc_write_sync_lp(data); } static int snvs_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) @@ -183,11 +215,14 @@ static int snvs_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) struct snvs_rtc_data *data = dev_get_drvdata(dev); struct rtc_time *alrm_tm = &alrm->time; unsigned long time; + int ret; rtc_tm_to_time(alrm_tm, &time); regmap_update_bits(data->regmap, data->offset + SNVS_LPCR, SNVS_LPCR_LPTA_EN, 0); - rtc_write_sync_lp(data); + ret = rtc_write_sync_lp(data); + if (ret) + return ret; regmap_write(data->regmap, data->offset + SNVS_LPTAR, time); /* Clear alarm interrupt status bit */ From ad962d20d4eb1e690ace8ca9e390e2b05a786999 Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Wed, 19 Dec 2018 22:40:42 +0100 Subject: [PATCH 1137/2608] bpf, arm: fix emit_ldx_r and emit_mov_i using TMP_REG_1 emit_ldx_r() and emit_a32_mov_i() were both using TMP_REG_1 and clashing with each other. Using TMP_REG_2 in emit_ldx_r() fixes the issue. Fixes: ec19e02b343 ("ARM: net: bpf: fix LDX instructions") Cc: Russell King Signed-off-by: Nicolas Schichan Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- arch/arm/net/bpf_jit_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index ece2d1d43724..dafeb5f81353 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -915,7 +915,7 @@ static inline void emit_str_r(const u8 dst, const u8 src, bool dstk, /* dst = *(size*)(src + off) */ static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk, s32 off, struct jit_ctx *ctx, const u8 sz){ - const u8 *tmp = bpf2a32[TMP_REG_1]; + const u8 *tmp = bpf2a32[TMP_REG_2]; const u8 *rd = dstk ? tmp : dst; u8 rm = src; s32 off_max; From 592f5569e18471c07208f74540f4e0f646b226f7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2018 14:13:19 +0100 Subject: [PATCH 1138/2608] Linux 4.14.90 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b83477be8d0c..280c7193e246 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 89 +SUBLEVEL = 90 EXTRAVERSION = NAME = Petit Gorille From 1e9290ca9d6fa96b69c4d6125f87dc77793179e1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 May 2018 15:09:41 -0600 Subject: [PATCH 1139/2608] block: break discard submissions into the user defined size [ Upstream commit af097f5d199e2aa3ab3ef777f0716e487b8f7b08 ] Don't build discards bigger than what the user asked for, if the user decided to limit the size by writing to 'discard_max_bytes'. Reviewed-by: Darrick J. Wong Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-lib.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index 2bc544ce3d2e..53a45663e688 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -59,10 +59,16 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, unsigned int req_sects; sector_t end_sect, tmp; - /* Make sure bi_size doesn't overflow */ - req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9); + /* + * Issue in chunks of the user defined max discard setting, + * ensuring that bi_size doesn't overflow + */ + req_sects = min_t(sector_t, nr_sects, + q->limits.max_discard_sectors); + if (req_sects > UINT_MAX >> 9) + req_sects = UINT_MAX >> 9; - /** + /* * If splitting a request, and the next starting sector would be * misaligned, stop the discard at the previous aligned sector. */ From 07eae146f5b76da9d5bb41bc8825fe486bf0f05a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 3 Jul 2018 13:34:22 -0400 Subject: [PATCH 1140/2608] block: fix infinite loop if the device loses discard capability [ Upstream commit b88aef36b87c9787a4db724923ec4f57dfd513f3 ] If __blkdev_issue_discard is in progress and a device mapper device is reloaded with a table that doesn't support discard, q->limits.max_discard_sectors is set to zero. This results in infinite loop in __blkdev_issue_discard. This patch checks if max_discard_sectors is zero and aborts with -EOPNOTSUPP. Signed-off-by: Mikulas Patocka Tested-by: Zdenek Kabelac Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-lib.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/block/blk-lib.c b/block/blk-lib.c index 53a45663e688..0bdc77888dc5 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -65,6 +65,8 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, */ req_sects = min_t(sector_t, nr_sects, q->limits.max_discard_sectors); + if (!req_sects) + goto fail; if (req_sects > UINT_MAX >> 9) req_sects = UINT_MAX >> 9; @@ -102,6 +104,14 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, *biop = bio; return 0; + +fail: + if (bio) { + submit_bio_wait(bio); + bio_put(bio); + } + *biop = NULL; + return -EOPNOTSUPP; } EXPORT_SYMBOL(__blkdev_issue_discard); From 2e5981029574928d073f381c2b446c6750d84bd1 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 11 Oct 2018 20:32:05 +0200 Subject: [PATCH 1141/2608] ASoC: sta32x: set ->component pointer in private struct commit 747df19747bc9752cd40b9cce761e17a033aa5c2 upstream The ESD watchdog code in sta32x_watchdog() dereferences the pointer which is never assigned. This is a regression from a1be4cead9b950 ("ASoC: sta32x: Convert to direct regmap API usage.") which went unnoticed since nobody seems to use that ESD workaround. Fixes: a1be4cead9b950 ("ASoC: sta32x: Convert to direct regmap API usage.") Signed-off-by: Daniel Mack Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Sudip Mukherjee Signed-off-by: Sasha Levin --- sound/soc/codecs/sta32x.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/sta32x.c b/sound/soc/codecs/sta32x.c index 5b888476d9ff..b728140c79a9 100644 --- a/sound/soc/codecs/sta32x.c +++ b/sound/soc/codecs/sta32x.c @@ -879,6 +879,9 @@ static int sta32x_probe(struct snd_soc_codec *codec) struct sta32x_priv *sta32x = snd_soc_codec_get_drvdata(codec); struct sta32x_platform_data *pdata = sta32x->pdata; int i, ret = 0, thermal = 0; + + sta32x->codec = codec; + ret = regulator_bulk_enable(ARRAY_SIZE(sta32x->supplies), sta32x->supplies); if (ret != 0) { From 56303ade45046b4470eaa47181d83e648d768b2d Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 11 Jun 2018 23:41:09 +0200 Subject: [PATCH 1142/2608] ubifs: Fix directory size calculation for symlinks commit 00ee8b60102862f4daf0814d12a2ea2744fc0b9b upstream We have to account the name of the symlink and not the target length. Fixes: ca7f85be8d6c ("ubifs: Add support for encrypted symlinks") Cc: Signed-off-by: Richard Weinberger Signed-off-by: Sudip Mukherjee Signed-off-by: Sasha Levin --- fs/ubifs/dir.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index ef820f803176..4e6e32c0c08a 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1147,8 +1147,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, struct ubifs_inode *ui; struct ubifs_inode *dir_ui = ubifs_inode(dir); struct ubifs_info *c = dir->i_sb->s_fs_info; - int err, len = strlen(symname); - int sz_change = CALC_DENT_SIZE(len); + int err, sz_change, len = strlen(symname); struct fscrypt_str disk_link = FSTR_INIT((char *)symname, len + 1); struct fscrypt_symlink_data *sd = NULL; struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, @@ -1189,6 +1188,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, if (err) goto out_budg; + sz_change = CALC_DENT_SIZE(fname_len(&nm)); + inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO); if (IS_ERR(inode)) { err = PTR_ERR(inode); From 179c8da7f3acdd04f2c23cac81989f24609f281c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 2 Jul 2018 14:08:45 -0700 Subject: [PATCH 1143/2608] ib_srpt: Fix a use-after-free in __srpt_close_all_ch() commit 14d15c2b278011056482eb015dff89f9cbf2b841 upstream BUG: KASAN: use-after-free in srpt_set_enabled+0x1a9/0x1e0 [ib_srpt] Read of size 4 at addr ffff8801269d23f8 by task check/29726 CPU: 4 PID: 29726 Comm: check Not tainted 4.18.0-rc2-dbg+ #4 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0xa4/0xf5 print_address_description+0x6f/0x270 kasan_report+0x241/0x360 __asan_load4+0x78/0x80 srpt_set_enabled+0x1a9/0x1e0 [ib_srpt] srpt_tpg_enable_store+0xb8/0x120 [ib_srpt] configfs_write_file+0x14e/0x1d0 [configfs] __vfs_write+0xd2/0x3b0 vfs_write+0x101/0x270 ksys_write+0xab/0x120 __x64_sys_write+0x43/0x50 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f235cfe6154 Fixes: aaf45bd83eba ("IB/srpt: Detect session shutdown reliably") Signed-off-by: Bart Van Assche Cc: Signed-off-by: Jason Gunthorpe Signed-off-by: Sudip Mukherjee Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/srpt/ib_srpt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 60105ba77889..47f3f562d86f 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1775,8 +1775,8 @@ static void __srpt_close_all_ch(struct srpt_device *sdev) list_for_each_entry(ch, &sdev->rch_list, list) { if (srpt_disconnect_ch(ch) >= 0) - pr_info("Closing channel %s-%d because target %s has been disabled\n", - ch->sess_name, ch->qp->qp_num, + pr_info("Closing channel %s because target %s has been disabled\n", + ch->sess_name, sdev->device->name); srpt_close_ch(ch); } From 0d9b51366d2da5dc5537c1c6e856cf4c0d08de0e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 14 Mar 2018 10:22:04 +0100 Subject: [PATCH 1144/2608] perf record: Synthesize features before events in pipe mode [ Upstream commit a2015516c5c0be932a69e1d3405c2fb03b4eacf1 ] We need to synthesize events first, because some features works on top of them (on report side). Signed-off-by: Jiri Olsa Tested-by: Stephane Eranian Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180314092205.23291-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-record.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index b205c1340456..5e53cafe6cf9 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -800,13 +800,10 @@ static int record__synthesize(struct record *rec, bool tail) return 0; if (file->is_pipe) { - err = perf_event__synthesize_features( - tool, session, rec->evlist, process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize features.\n"); - return err; - } - + /* + * We need to synthesize events first, because some + * features works on top of them (on report side). + */ err = perf_event__synthesize_attrs(tool, session, process_synthesized_event); if (err < 0) { @@ -814,6 +811,13 @@ static int record__synthesize(struct record *rec, bool tail) goto out; } + err = perf_event__synthesize_features(tool, session, rec->evlist, + process_synthesized_event); + if (err < 0) { + pr_err("Couldn't synthesize features.\n"); + return err; + } + if (have_tracepoints(&rec->evlist->entries)) { /* * FIXME err <= 0 here actually means that From 3845c73607ae8de8c7b60d701cdecdc5505c821d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Sep 2018 14:12:07 +0300 Subject: [PATCH 1145/2608] cifs: integer overflow in in SMB2_ioctl() commit 2d204ee9d671327915260071c19350d84344e096 upstream The "le32_to_cpu(rsp->OutputOffset) + *plen" addition can overflow and wrap around to a smaller value which looks like it would lead to an information leak. Fixes: 4a72dafa19ba ("SMB2 FSCTL and IOCTL worker function") Signed-off-by: Dan Carpenter Signed-off-by: Steve French Reviewed-by: Aurelien Aptel CC: Stable Signed-off-by: Sudip Mukherjee Signed-off-by: Sasha Levin --- fs/cifs/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 69309538ffb8..1581e8668b09 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2020,14 +2020,14 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, /* We check for obvious errors in the output buffer length and offset */ if (*plen == 0) goto ioctl_exit; /* server returned no data */ - else if (*plen > 0xFF00) { + else if (*plen > rsp_iov.iov_len || *plen > 0xFF00) { cifs_dbg(VFS, "srv returned invalid ioctl length: %d\n", *plen); *plen = 0; rc = -EIO; goto ioctl_exit; } - if (get_rfc1002_length(rsp) < le32_to_cpu(rsp->OutputOffset) + *plen) { + if (get_rfc1002_length(rsp) - *plen < le32_to_cpu(rsp->OutputOffset)) { cifs_dbg(VFS, "Malformed ioctl resp: len %d offset %d\n", *plen, le32_to_cpu(rsp->OutputOffset)); *plen = 0; From 49be8dc589aee04c64d61e362c5029ab20fd6fd7 Mon Sep 17 00:00:00 2001 From: Hui Peng Date: Wed, 12 Dec 2018 12:42:24 +0100 Subject: [PATCH 1146/2608] USB: hso: Fix OOB memory access in hso_probe/hso_get_config_data commit 5146f95df782b0ac61abde36567e718692725c89 upstream. The function hso_probe reads if_num from the USB device (as an u8) and uses it without a length check to index an array, resulting in an OOB memory read in hso_probe or hso_get_config_data. Add a length check for both locations and updated hso_probe to bail on error. This issue has been assigned CVE-2018-19985. Reported-by: Hui Peng Reported-by: Mathias Payer Signed-off-by: Hui Peng Signed-off-by: Mathias Payer Reviewed-by: Sebastian Andrzej Siewior Signed-off-by: Greg Kroah-Hartman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/hso.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index d7a3379ea668..18a0952f68a8 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2806,6 +2806,12 @@ static int hso_get_config_data(struct usb_interface *interface) return -EIO; } + /* check if we have a valid interface */ + if (if_num > 16) { + kfree(config_data); + return -EINVAL; + } + switch (config_data[if_num]) { case 0x0: result = 0; @@ -2876,10 +2882,18 @@ static int hso_probe(struct usb_interface *interface, /* Get the interface/port specification from either driver_info or from * the device itself */ - if (id->driver_info) + if (id->driver_info) { + /* if_num is controlled by the device, driver_info is a 0 terminated + * array. Make sure, the access is in bounds! */ + for (i = 0; i <= if_num; ++i) + if (((u32 *)(id->driver_info))[i] == 0) + goto exit; port_spec = ((u32 *)(id->driver_info))[if_num]; - else + } else { port_spec = hso_get_config_data(interface); + if (port_spec < 0) + goto exit; + } /* Check if we need to switch to alt interfaces prior to port * configuration */ From 2ab5db613774c2c743d109eb63f71c5ee3b6a4ea Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 14 Dec 2018 10:54:43 +0200 Subject: [PATCH 1147/2608] xhci: Don't prevent USB2 bus suspend in state check intended for USB3 only commit 45f750c16cae3625014c14c77bd9005eda975d35 upstream. The code to prevent a bus suspend if a USB3 port was still in link training also reacted to USB2 port polling state. This caused bus suspend to busyloop in some cases. USB2 polling state is different from USB3, and should not prevent bus suspend. Limit the USB3 link training state check to USB3 root hub ports only. The origial commit went to stable so this need to be applied there as well Fixes: 2f31a67f01a8 ("usb: xhci: Prevent bus suspend if a port connect change or polling state is detected") Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 6b2f6c41e2a9..997ff183c9cb 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1512,7 +1512,8 @@ int xhci_bus_suspend(struct usb_hcd *hcd) portsc_buf[port_index] = 0; /* Bail out if a USB3 port has a new device in link training */ - if ((t1 & PORT_PLS_MASK) == XDEV_POLLING) { + if ((hcd->speed >= HCD_USB3) && + (t1 & PORT_PLS_MASK) == XDEV_POLLING) { bus_state->bus_suspended = 0; spin_unlock_irqrestore(&xhci->lock, flags); xhci_dbg(xhci, "Bus suspend bailout, port in polling\n"); From 4aa9cf83b766c2b0b31a360c5c93e1987c19e44c Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Mon, 17 Dec 2018 14:37:40 +0100 Subject: [PATCH 1148/2608] USB: xhci: fix 'broken_suspend' placement in struct xchi_hcd commit 2419f30a4a4fcaa5f35111563b4c61f1b2b26841 upstream. As commented in the struct's definition there shouldn't be anything underneath its 'priv[0]' member as it would break some macros. The patch converts the broken_suspend into a bit-field and relocates it next to to the rest of bit-fields. Fixes: a7d57abcc8a5 ("xhci: workaround CSS timeout on AMD SNPS 3.0 xHC") Reported-by: Oliver Neukum Signed-off-by: Nicolas Saenz Julienne Acked-by: Mathias Nyman Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 1ccff2d9dee9..cbc91536e512 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1859,6 +1859,8 @@ struct xhci_hcd { unsigned sw_lpm_support:1; /* support xHCI 1.0 spec USB2 hardware LPM */ unsigned hw_lpm_support:1; + /* Broken Suspend flag for SNPS Suspend resume issue */ + unsigned broken_suspend:1; /* cached usb2 extened protocol capabilites */ u32 *ext_caps; unsigned int num_ext_caps; @@ -1871,8 +1873,6 @@ struct xhci_hcd { /* platform-specific data -- must come last */ unsigned long priv[0] __aligned(sizeof(s64)); - /* Broken Suspend flag for SNPS Suspend resume issue */ - u8 broken_suspend; }; /* Platform specific overrides to generic XHCI hc_driver ops */ From ed101ab689be374d95c8edd517dec35941067596 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rgen=20Storvist?= Date: Tue, 11 Dec 2018 18:28:28 +0100 Subject: [PATCH 1149/2608] USB: serial: option: add GosunCn ZTE WeLink ME3630 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 70a7444c550a75584ffcfae95267058817eff6a7 upstream. Added USB serial option driver support for GosunCn ZTE WeLink ME3630 series cellular modules for USB modes ECM/NCM and MBIM. usb-devices output MBIM mode: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 10 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=19d2 ProdID=0602 Rev=03.18 S: Manufacturer=Android S: Product=Android S: SerialNumber= C: #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#= 4 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim usb-devices output ECM/NCM mode: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 11 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=19d2 ProdID=1476 Rev=03.18 S: Manufacturer=Android S: Product=Android S: SerialNumber= C: #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether I: If#= 4 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether Signed-off-by: Jörgen Storvist Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 392fddc80c44..7cca17a57456 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1331,6 +1331,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0414, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0417, 0xff, 0xff, 0xff) }, + { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x0602, 0xff) }, /* GosunCn ZTE WeLink ME3630 (MBIM mode) */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1008, 0xff, 0xff, 0xff), .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1010, 0xff, 0xff, 0xff), @@ -1534,6 +1535,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(2) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1428, 0xff, 0xff, 0xff), /* Telewell TW-LTE 4G v2 */ .driver_info = RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x1476, 0xff) }, /* GosunCn ZTE WeLink ME3630 (ECM/NCM mode) */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1533, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1534, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1535, 0xff, 0xff, 0xff) }, From 0b3b6c5ba96f5e4b8497a33db563d5738dfe8d55 Mon Sep 17 00:00:00 2001 From: Tore Anderson Date: Sat, 8 Dec 2018 19:05:12 +0100 Subject: [PATCH 1150/2608] USB: serial: option: add HP lt4132 commit d57ec3c83b5153217a70b561d4fb6ed96f2f7a25 upstream. The HP lt4132 is a rebranded Huawei ME906s-158 LTE modem. The interface with protocol 0x16 is "CDC ECM & NCM" according to the *.inf files included with the Windows driver. Attaching the option driver to it doesn't result in a /dev/ttyUSB* device being created, so I've excluded it. Note that it is also excluded for corresponding Huawei-branded devices, cf. commit d544db293a44 ("USB: support new huawei devices in option.c"). T: Bus=01 Lev=01 Prnt=01 Port=02 Cnt=02 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=ff MxPS=64 #Cfgs= 3 P: Vendor=03f0 ProdID=a31d Rev=01.02 S: Manufacturer=HP Inc. S: Product=HP lt4132 LTE/HSPA+ 4G Module S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=2mA I: If#=0x0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=06 Prot=10 Driver=option I: If#=0x1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=06 Prot=13 Driver=option I: If#=0x2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=06 Prot=12 Driver=option I: If#=0x3 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=06 Prot=16 Driver=(none) I: If#=0x4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=06 Prot=14 Driver=option I: If#=0x5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=06 Prot=1b Driver=option T: Bus=01 Lev=01 Prnt=01 Port=02 Cnt=02 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=ff MxPS=64 #Cfgs= 3 P: Vendor=03f0 ProdID=a31d Rev=01.02 S: Manufacturer=HP Inc. S: Product=HP lt4132 LTE/HSPA+ 4G Module S: SerialNumber=0123456789ABCDEF C: #Ifs= 7 Cfg#= 2 Atr=a0 MxPwr=2mA I: If#=0x0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether I: If#=0x1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=06 Prot=00 Driver=cdc_ether I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=06 Prot=10 Driver=option I: If#=0x3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=06 Prot=13 Driver=option I: If#=0x4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=06 Prot=12 Driver=option I: If#=0x5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=06 Prot=14 Driver=option I: If#=0x6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=06 Prot=1b Driver=option T: Bus=01 Lev=01 Prnt=01 Port=02 Cnt=02 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=ff MxPS=64 #Cfgs= 3 P: Vendor=03f0 ProdID=a31d Rev=01.02 S: Manufacturer=HP Inc. S: Product=HP lt4132 LTE/HSPA+ 4G Module S: SerialNumber=0123456789ABCDEF C: #Ifs= 3 Cfg#= 3 Atr=a0 MxPwr=2mA I: If#=0x0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#=0x1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#=0x2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=06 Prot=14 Driver=option Signed-off-by: Tore Anderson Cc: stable@vger.kernel.org [ johan: drop id defines ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 7cca17a57456..277a3e9de42c 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1944,7 +1944,12 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_6802, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD300, 0xff, 0xff, 0xff) }, - { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, 0xff, 0xff, 0xff) }, /* HP lt2523 (Novatel E371) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, 0xff, 0xff, 0xff) }, /* HP lt2523 (Novatel E371) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x10) }, /* HP lt4132 (Huawei ME906s-158) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x12) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x13) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x14) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x1b) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); From 4497e8fa2361427fe3ee72de57541d8d677707a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rgen=20Storvist?= Date: Wed, 12 Dec 2018 08:39:39 +0100 Subject: [PATCH 1151/2608] USB: serial: option: add Simcom SIM7500/SIM7600 (MBIM mode) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cc6730df08a291e51e145bc65e24ffb5e2f17ab6 upstream. Added USB serial option driver support for Simcom SIM7500/SIM7600 series cellular modules exposing MBIM interface (VID 0x1e0e,PID 0x9003) T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 14 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1e0e ProdID=9003 Rev=03.18 S: Manufacturer=SimTech, Incorporated S: Product=SimTech, Incorporated S: SerialNumber=0123456789ABCDEF C: #Ifs= 7 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 5 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#= 6 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim Signed-off-by: Jörgen Storvist Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 277a3e9de42c..ba1edc892f79 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1763,6 +1763,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ALINK_VENDOR_ID, ALINK_PRODUCT_3GU, 0xff, 0xff, 0xff) }, { USB_DEVICE(ALINK_VENDOR_ID, SIMCOM_PRODUCT_SIM7100E), .driver_info = RSVD(5) | RSVD(6) }, + { USB_DEVICE_INTERFACE_CLASS(0x1e0e, 0x9003, 0xff) }, /* Simcom SIM7500/SIM7600 MBIM mode */ { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200), .driver_info = NCTRL(0) | NCTRL(1) | RSVD(4) }, { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X220_X500D), From 4363aa8fb114592e2cba5b748b82b993afd7e141 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rgen=20Storvist?= Date: Wed, 12 Dec 2018 21:47:36 +0100 Subject: [PATCH 1152/2608] USB: serial: option: add Fibocom NL668 series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 30360224441ce89a98ed627861e735beb4010775 upstream. Added USB serial option driver support for Fibocom NL668 series cellular modules. Reserved USB endpoints 4, 5 and 6 for network + ADB interfaces. usb-devices output (QMI mode) T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1508 ProdID=1001 Rev=03.18 S: Manufacturer=Nodecom NL668 Modem S: Product=Nodecom NL668-CN Modem S: SerialNumber= C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) usb-devices output (ECM mode) T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 17 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1508 ProdID=1001 Rev=03.18 S: Manufacturer=Nodecom NL668 Modem S: Product=Nodecom NL668-CN Modem S: SerialNumber= C: #Ifs= 7 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether I: If#= 5 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) Signed-off-by: Jörgen Storvist Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index ba1edc892f79..3d88c6fca370 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1951,6 +1951,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x13) }, { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x14) }, { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x1b) }, + { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 */ + .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); From 61c1a4a5ba4c90e8a35fdfb75b815025a84cdf61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rgen=20Storvist?= Date: Thu, 13 Dec 2018 17:32:08 +0100 Subject: [PATCH 1153/2608] USB: serial: option: add Telit LN940 series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 28a86092b1753b802ef7e3de8a4c4a69a9c1bb03 upstream. Added USB serial option driver support for Telit LN940 series cellular modules. Covering both QMI and MBIM modes. usb-devices output (0x1900): T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 21 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1900 Rev=03.10 S: Manufacturer=Telit S: Product=Telit LN940 Mobile Broadband S: SerialNumber=0123456789ABCDEF C: #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option usb-devices output (0x1901): T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 20 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1901 Rev=03.10 S: Manufacturer=Telit S: Product=Telit LN940 Mobile Broadband S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#= 5 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim Signed-off-by: Jörgen Storvist Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 3d88c6fca370..988be9ca2b4f 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1167,6 +1167,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1213, 0xff) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1214), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) }, + { USB_DEVICE(TELIT_VENDOR_ID, 0x1900), /* Telit LN940 (QMI) */ + .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1901, 0xff), /* Telit LN940 (MBIM) */ + .driver_info = NCTRL(0) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff), .driver_info = RSVD(1) }, From fe646761cb20a36ed395c323d47db5139db509a7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 12 Dec 2018 06:46:55 -0700 Subject: [PATCH 1154/2608] scsi: sd: use mempool for discard special page commit 61cce6f6eeced5ddd9cac55e807fe28b4f18c1ba upstream. When boxes are run near (or to) OOM, we have a problem with the discard page allocation in sd. If we fail allocating the special page, we return busy, and it'll get retried. But since ordering is honored for dispatch requests, we can keep retrying this same IO and failing. Behind that IO could be requests that want to free memory, but they never get the chance. This means you get repeated spews of traces like this: [1201401.625972] Call Trace: [1201401.631748] dump_stack+0x4d/0x65 [1201401.639445] warn_alloc+0xec/0x190 [1201401.647335] __alloc_pages_slowpath+0xe84/0xf30 [1201401.657722] ? get_page_from_freelist+0x11b/0xb10 [1201401.668475] ? __alloc_pages_slowpath+0x2e/0xf30 [1201401.679054] __alloc_pages_nodemask+0x1f9/0x210 [1201401.689424] alloc_pages_current+0x8c/0x110 [1201401.699025] sd_setup_write_same16_cmnd+0x51/0x150 [1201401.709987] sd_init_command+0x49c/0xb70 [1201401.719029] scsi_setup_cmnd+0x9c/0x160 [1201401.727877] scsi_queue_rq+0x4d9/0x610 [1201401.736535] blk_mq_dispatch_rq_list+0x19a/0x360 [1201401.747113] blk_mq_sched_dispatch_requests+0xff/0x190 [1201401.758844] __blk_mq_run_hw_queue+0x95/0xa0 [1201401.768653] blk_mq_run_work_fn+0x2c/0x30 [1201401.777886] process_one_work+0x14b/0x400 [1201401.787119] worker_thread+0x4b/0x470 [1201401.795586] kthread+0x110/0x150 [1201401.803089] ? rescuer_thread+0x320/0x320 [1201401.812322] ? kthread_park+0x90/0x90 [1201401.820787] ? do_syscall_64+0x53/0x150 [1201401.829635] ret_from_fork+0x29/0x40 Ensure that the discard page allocation has a mempool backing, so we know we can make progress. Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c7b284587365..39754cc90043 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -133,6 +133,7 @@ static DEFINE_MUTEX(sd_ref_mutex); static struct kmem_cache *sd_cdb_cache; static mempool_t *sd_cdb_pool; +static mempool_t *sd_page_pool; static const char *sd_cache_types[] = { "write through", "none", "write back", @@ -759,9 +760,10 @@ static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) unsigned int data_len = 24; char *buf; - rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); + rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC); if (!rq->special_vec.bv_page) return BLKPREP_DEFER; + clear_highpage(rq->special_vec.bv_page); rq->special_vec.bv_offset = 0; rq->special_vec.bv_len = data_len; rq->rq_flags |= RQF_SPECIAL_PAYLOAD; @@ -792,9 +794,10 @@ static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap) u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9); u32 data_len = sdp->sector_size; - rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); + rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC); if (!rq->special_vec.bv_page) return BLKPREP_DEFER; + clear_highpage(rq->special_vec.bv_page); rq->special_vec.bv_offset = 0; rq->special_vec.bv_len = data_len; rq->rq_flags |= RQF_SPECIAL_PAYLOAD; @@ -822,9 +825,10 @@ static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap) u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9); u32 data_len = sdp->sector_size; - rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); + rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC); if (!rq->special_vec.bv_page) return BLKPREP_DEFER; + clear_highpage(rq->special_vec.bv_page); rq->special_vec.bv_offset = 0; rq->special_vec.bv_len = data_len; rq->rq_flags |= RQF_SPECIAL_PAYLOAD; @@ -1299,7 +1303,7 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt) sd_zbc_write_unlock_zone(SCpnt); if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) - __free_page(rq->special_vec.bv_page); + mempool_free(rq->special_vec.bv_page, sd_page_pool); if (SCpnt->cmnd != scsi_req(rq)->cmd) { cmnd = SCpnt->cmnd; @@ -3655,6 +3659,13 @@ static int __init init_sd(void) goto err_out_cache; } + sd_page_pool = mempool_create_page_pool(SD_MEMPOOL_SIZE, 0); + if (!sd_page_pool) { + printk(KERN_ERR "sd: can't init discard page pool\n"); + err = -ENOMEM; + goto err_out_ppool; + } + err = scsi_register_driver(&sd_template.gendrv); if (err) goto err_out_driver; @@ -3662,6 +3673,9 @@ static int __init init_sd(void) return 0; err_out_driver: + mempool_destroy(sd_page_pool); + +err_out_ppool: mempool_destroy(sd_cdb_pool); err_out_cache: @@ -3688,6 +3702,7 @@ static void __exit exit_sd(void) scsi_unregister_driver(&sd_template.gendrv); mempool_destroy(sd_cdb_pool); + mempool_destroy(sd_page_pool); kmem_cache_destroy(sd_cdb_cache); class_unregister(&sd_disk_class); From 8af6fad484c24e9d5b6b8f32ab9cfb262dfcbe21 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 10 Dec 2018 17:52:36 +0100 Subject: [PATCH 1155/2608] mmc: core: Reset HPI enabled state during re-init and in case of errors commit a0741ba40a009f97c019ae7541dc61c1fdf41efb upstream. During a re-initialization of the eMMC card, we may fail to re-enable HPI. In these cases, that isn't properly reflected in the card->ext_csd.hpi_en bit, as it keeps being set. This may cause following attempts to use HPI, even if's not enabled. Let's fix this! Fixes: eb0d8f135b67 ("mmc: core: support HPI send command") Cc: Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/mmc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index bad5c1bf4ed9..914ea8d6c54a 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1755,9 +1755,11 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, if (err) { pr_warn("%s: Enabling HPI failed\n", mmc_hostname(card->host)); + card->ext_csd.hpi_en = 0; err = 0; - } else + } else { card->ext_csd.hpi_en = 1; + } } /* From 2b591835dcc5185302fbc69c351d3acc64ec4018 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 10 Dec 2018 17:52:37 +0100 Subject: [PATCH 1156/2608] mmc: core: Allow BKOPS and CACHE ctrl even if no HPI support commit ba9f39a785a9977e72233000711ef1eb48203551 upstream. In commit 5320226a0512 ("mmc: core: Disable HPI for certain Hynix eMMC cards"), then intent was to prevent HPI from being used for some eMMC cards, which didn't properly support it. However, that went too far, as even BKOPS and CACHE ctrl became prevented. Let's restore those parts and allow BKOPS and CACHE ctrl even if HPI isn't supported. Fixes: 5320226a0512 ("mmc: core: Disable HPI for certain Hynix eMMC cards") Cc: Pratibhasagar V Cc: Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/mmc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 914ea8d6c54a..c1d12a4fde08 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -526,8 +526,7 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd) card->cid.year += 16; /* check whether the eMMC card supports BKOPS */ - if (!mmc_card_broken_hpi(card) && - ext_csd[EXT_CSD_BKOPS_SUPPORT] & 0x1) { + if (ext_csd[EXT_CSD_BKOPS_SUPPORT] & 0x1) { card->ext_csd.bkops = 1; card->ext_csd.man_bkops_en = (ext_csd[EXT_CSD_BKOPS_EN] & @@ -1766,8 +1765,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, * If cache size is higher than 0, this indicates * the existence of cache and it can be turned on. */ - if (!mmc_card_broken_hpi(card) && - card->ext_csd.cache_size > 0) { + if (card->ext_csd.cache_size > 0) { err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_CACHE_CTRL, 1, card->ext_csd.generic_cmd6_time); From b097c4436eefdb59902aa74818897bd23d6a0551 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 10 Dec 2018 17:52:38 +0100 Subject: [PATCH 1157/2608] mmc: core: Use a minimum 1600ms timeout when enabling CACHE ctrl commit e3ae3401aa19432ee4943eb0bbc2ec704d07d793 upstream. Some eMMCs from Micron have been reported to need ~800 ms timeout, while enabling the CACHE ctrl after running sudden power failure tests. The needed timeout is greater than what the card specifies as its generic CMD6 timeout, through the EXT_CSD register, hence the problem. Normally we would introduce a card quirk to extend the timeout for these specific Micron cards. However, due to the rather complicated debug process needed to find out the error, let's simply use a minimum timeout of 1600ms, the double of what has been reported, for all cards when enabling CACHE ctrl. Reported-by: Sjoerd Simons Reported-by: Andreas Dannenberg Reported-by: Faiz Abbas Cc: Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/mmc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index c1d12a4fde08..814a04e8fdd7 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -30,6 +30,7 @@ #include "pwrseq.h" #define DEFAULT_CMD6_TIMEOUT_MS 500 +#define MIN_CACHE_EN_TIMEOUT_MS 1600 static const unsigned int tran_exp[] = { 10000, 100000, 1000000, 10000000, @@ -1762,13 +1763,18 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, } /* - * If cache size is higher than 0, this indicates - * the existence of cache and it can be turned on. + * If cache size is higher than 0, this indicates the existence of cache + * and it can be turned on. Note that some eMMCs from Micron has been + * reported to need ~800 ms timeout, while enabling the cache after + * sudden power failure tests. Let's extend the timeout to a minimum of + * DEFAULT_CACHE_EN_TIMEOUT_MS and do it for all cards. */ if (card->ext_csd.cache_size > 0) { + unsigned int timeout_ms = MIN_CACHE_EN_TIMEOUT_MS; + + timeout_ms = max(card->ext_csd.generic_cmd6_time, timeout_ms); err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_CACHE_CTRL, 1, - card->ext_csd.generic_cmd6_time); + EXT_CSD_CACHE_CTRL, 1, timeout_ms); if (err && err != -EBADMSG) goto free_card; From 4d7a081d0a35a2cd1908451edf66755914cc6ef1 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 11 Dec 2018 14:41:31 +0000 Subject: [PATCH 1158/2608] mmc: omap_hsmmc: fix DMA API warning commit 0b479790684192ab7024ce6a621f93f6d0a64d92 upstream. While booting with rootfs on MMC, the following warning is encountered on OMAP4430: omap-dma-engine 4a056000.dma-controller: DMA-API: mapping sg segment longer than device claims to support [len=69632] [max=65536] This is because the DMA engine has a default maximum segment size of 64K but HSMMC sets: mmc->max_blk_size = 512; /* Block Length at max can be 1024 */ mmc->max_blk_count = 0xFFFF; /* No. of Blocks is 16 bits */ mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count; mmc->max_seg_size = mmc->max_req_size; which ends up telling the block layer that we support a maximum segment size of 65535*512, which exceeds the advertised DMA engine capabilities. Fix this by clamping the maximum segment size to the lower of the maximum request size and of the DMA engine device used for either DMA channel. Signed-off-by: Russell King Cc: Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/omap_hsmmc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 9e03fada16dc..3f3ff7530b76 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -2083,7 +2083,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev) mmc->max_blk_size = 512; /* Block Length at max can be 1024 */ mmc->max_blk_count = 0xFFFF; /* No. of Blocks is 16 bits */ mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count; - mmc->max_seg_size = mmc->max_req_size; mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED | MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_ERASE | MMC_CAP_CMD23; @@ -2113,6 +2112,17 @@ static int omap_hsmmc_probe(struct platform_device *pdev) goto err_irq; } + /* + * Limit the maximum segment size to the lower of the request size + * and the DMA engine device segment size limits. In reality, with + * 32-bit transfers, the DMA engine can do longer segments than this + * but there is no way to represent that in the DMA model - if we + * increase this figure here, we get warnings from the DMA API debug. + */ + mmc->max_seg_size = min3(mmc->max_req_size, + dma_get_max_seg_size(host->rx_chan->device->dev), + dma_get_max_seg_size(host->tx_chan->device->dev)); + /* Request IRQ for MMC operations */ ret = devm_request_irq(&pdev->dev, host->irq, omap_hsmmc_irq, 0, mmc_hostname(mmc), host); From 07cfa7ac9e52d3250061e3cb91acfecb905afefb Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 7 Dec 2018 13:07:55 +0000 Subject: [PATCH 1159/2608] gpio: max7301: fix driver for use with CONFIG_VMAP_STACK commit abf221d2f51b8ce7b9959a8953f880a8b0a1400d upstream. spi_read() and spi_write() require DMA-safe memory. When CONFIG_VMAP_STACK is selected, those functions cannot be used with buffers on stack. This patch replaces calls to spi_read() and spi_write() by spi_write_then_read() which doesn't require DMA-safe buffers. Fixes: 0c36ec314735 ("gpio: gpio driver for max7301 SPI GPIO expander") Cc: Signed-off-by: Christophe Leroy Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-max7301.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpio/gpio-max7301.c b/drivers/gpio/gpio-max7301.c index 05813fbf3daf..647dfbbc4e1c 100644 --- a/drivers/gpio/gpio-max7301.c +++ b/drivers/gpio/gpio-max7301.c @@ -25,7 +25,7 @@ static int max7301_spi_write(struct device *dev, unsigned int reg, struct spi_device *spi = to_spi_device(dev); u16 word = ((reg & 0x7F) << 8) | (val & 0xFF); - return spi_write(spi, (const u8 *)&word, sizeof(word)); + return spi_write_then_read(spi, &word, sizeof(word), NULL, 0); } /* A read from the MAX7301 means two transfers; here, one message each */ @@ -37,14 +37,8 @@ static int max7301_spi_read(struct device *dev, unsigned int reg) struct spi_device *spi = to_spi_device(dev); word = 0x8000 | (reg << 8); - ret = spi_write(spi, (const u8 *)&word, sizeof(word)); - if (ret) - return ret; - /* - * This relies on the fact, that a transfer with NULL tx_buf shifts out - * zero bytes (=NOOP for MAX7301) - */ - ret = spi_read(spi, (u8 *)&word, sizeof(word)); + ret = spi_write_then_read(spi, &word, sizeof(word), &word, + sizeof(word)); if (ret) return ret; return word & 0xff; From 23572a68e828374d4076c296ced075340253b435 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 28 Nov 2018 17:57:55 +0100 Subject: [PATCH 1160/2608] gpiolib-acpi: Only defer request_irq for GpioInt ACPI event handlers commit e59f5e08ece1060073d92c66ded52e1f2c43b5bb upstream. Commit 78d3a92edbfb ("gpiolib-acpi: Register GpioInt ACPI event handlers from a late_initcall") deferred the entire acpi_gpiochip_request_interrupt call for each event resource. This means it also delays the gpiochip_request_own_desc(..., "ACPI:Event") call. This is a problem if some AML code reads the GPIO pin before we run the deferred acpi_gpiochip_request_interrupt, because in that case acpi_gpio_adr_space_handler() will already have called gpiochip_request_own_desc(..., "ACPI:OpRegion") causing the call from acpi_gpiochip_request_interrupt to fail with -EBUSY and we will fail to register an event handler. acpi_gpio_adr_space_handler is prepared for acpi_gpiochip_request_interrupt already having claimed the pin, but the other way around does not work. One example of a problem this causes, is the event handler for the OTG ID pin on a Prowise PT301 tablet not registering, keeping the port stuck in whatever mode it was in during boot and e.g. only allowing charging after a reboot. This commit fixes this by only deferring the request_irq call and the initial run of edge-triggered IRQs instead of deferring all of acpi_gpiochip_request_interrupt. Cc: stable@vger.kernel.org Fixes: 78d3a92edbfb ("gpiolib-acpi: Register GpioInt ACPI event ...") Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib-acpi.c | 164 +++++++++++++++++++++--------------- 1 file changed, 94 insertions(+), 70 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 33d4bd505b5b..57d157e94bd6 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -23,11 +23,28 @@ #include "gpiolib.h" +/** + * struct acpi_gpio_event - ACPI GPIO event handler data + * + * @node: list-entry of the events list of the struct acpi_gpio_chip + * @handle: handle of ACPI method to execute when the IRQ triggers + * @handler: irq_handler to pass to request_irq when requesting the IRQ + * @pin: GPIO pin number on the gpio_chip + * @irq: Linux IRQ number for the event, for request_ / free_irq + * @irqflags: flags to pass to request_irq when requesting the IRQ + * @irq_is_wake: If the ACPI flags indicate the IRQ is a wakeup source + * @is_requested: True if request_irq has been done + * @desc: gpio_desc for the GPIO pin for this event + */ struct acpi_gpio_event { struct list_head node; acpi_handle handle; + irq_handler_t handler; unsigned int pin; unsigned int irq; + unsigned long irqflags; + bool irq_is_wake; + bool irq_requested; struct gpio_desc *desc; }; @@ -53,10 +70,10 @@ struct acpi_gpio_chip { /* * For gpiochips which call acpi_gpiochip_request_interrupts() before late_init - * (so builtin drivers) we register the ACPI GpioInt event handlers from a + * (so builtin drivers) we register the ACPI GpioInt IRQ handlers from a * late_initcall_sync handler, so that other builtin drivers can register their * OpRegions before the event handlers can run. This list contains gpiochips - * for which the acpi_gpiochip_request_interrupts() has been deferred. + * for which the acpi_gpiochip_request_irqs() call has been deferred. */ static DEFINE_MUTEX(acpi_gpio_deferred_req_irqs_lock); static LIST_HEAD(acpi_gpio_deferred_req_irqs_list); @@ -194,8 +211,42 @@ bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, } EXPORT_SYMBOL_GPL(acpi_gpio_get_irq_resource); -static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, - void *context) +static void acpi_gpiochip_request_irq(struct acpi_gpio_chip *acpi_gpio, + struct acpi_gpio_event *event) +{ + int ret, value; + + ret = request_threaded_irq(event->irq, NULL, event->handler, + event->irqflags, "ACPI:Event", event); + if (ret) { + dev_err(acpi_gpio->chip->parent, + "Failed to setup interrupt handler for %d\n", + event->irq); + return; + } + + if (event->irq_is_wake) + enable_irq_wake(event->irq); + + event->irq_requested = true; + + /* Make sure we trigger the initial state of edge-triggered IRQs */ + value = gpiod_get_raw_value_cansleep(event->desc); + if (((event->irqflags & IRQF_TRIGGER_RISING) && value == 1) || + ((event->irqflags & IRQF_TRIGGER_FALLING) && value == 0)) + event->handler(event->irq, event); +} + +static void acpi_gpiochip_request_irqs(struct acpi_gpio_chip *acpi_gpio) +{ + struct acpi_gpio_event *event; + + list_for_each_entry(event, &acpi_gpio->events, node) + acpi_gpiochip_request_irq(acpi_gpio, event); +} + +static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, + void *context) { struct acpi_gpio_chip *acpi_gpio = context; struct gpio_chip *chip = acpi_gpio->chip; @@ -204,8 +255,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, struct acpi_gpio_event *event; irq_handler_t handler = NULL; struct gpio_desc *desc; - unsigned long irqflags; - int ret, pin, irq, value; + int ret, pin, irq; if (!acpi_gpio_get_irq_resource(ares, &agpio)) return AE_OK; @@ -240,8 +290,6 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, gpiod_direction_input(desc); - value = gpiod_get_value_cansleep(desc); - ret = gpiochip_lock_as_irq(chip, pin); if (ret) { dev_err(chip->parent, "Failed to lock GPIO as interrupt\n"); @@ -254,64 +302,42 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, goto fail_unlock_irq; } - irqflags = IRQF_ONESHOT; - if (agpio->triggering == ACPI_LEVEL_SENSITIVE) { - if (agpio->polarity == ACPI_ACTIVE_HIGH) - irqflags |= IRQF_TRIGGER_HIGH; - else - irqflags |= IRQF_TRIGGER_LOW; - } else { - switch (agpio->polarity) { - case ACPI_ACTIVE_HIGH: - irqflags |= IRQF_TRIGGER_RISING; - break; - case ACPI_ACTIVE_LOW: - irqflags |= IRQF_TRIGGER_FALLING; - break; - default: - irqflags |= IRQF_TRIGGER_RISING | - IRQF_TRIGGER_FALLING; - break; - } - } - event = kzalloc(sizeof(*event), GFP_KERNEL); if (!event) goto fail_unlock_irq; + event->irqflags = IRQF_ONESHOT; + if (agpio->triggering == ACPI_LEVEL_SENSITIVE) { + if (agpio->polarity == ACPI_ACTIVE_HIGH) + event->irqflags |= IRQF_TRIGGER_HIGH; + else + event->irqflags |= IRQF_TRIGGER_LOW; + } else { + switch (agpio->polarity) { + case ACPI_ACTIVE_HIGH: + event->irqflags |= IRQF_TRIGGER_RISING; + break; + case ACPI_ACTIVE_LOW: + event->irqflags |= IRQF_TRIGGER_FALLING; + break; + default: + event->irqflags |= IRQF_TRIGGER_RISING | + IRQF_TRIGGER_FALLING; + break; + } + } + event->handle = evt_handle; + event->handler = handler; event->irq = irq; + event->irq_is_wake = agpio->wake_capable == ACPI_WAKE_CAPABLE; event->pin = pin; event->desc = desc; - ret = request_threaded_irq(event->irq, NULL, handler, irqflags, - "ACPI:Event", event); - if (ret) { - dev_err(chip->parent, - "Failed to setup interrupt handler for %d\n", - event->irq); - goto fail_free_event; - } - - if (agpio->wake_capable == ACPI_WAKE_CAPABLE) - enable_irq_wake(irq); - list_add_tail(&event->node, &acpi_gpio->events); - /* - * Make sure we trigger the initial state of the IRQ when using RISING - * or FALLING. Note we run the handlers on late_init, the AML code - * may refer to OperationRegions from other (builtin) drivers which - * may be probed after us. - */ - if (((irqflags & IRQF_TRIGGER_RISING) && value == 1) || - ((irqflags & IRQF_TRIGGER_FALLING) && value == 0)) - handler(event->irq, event); - return AE_OK; -fail_free_event: - kfree(event); fail_unlock_irq: gpiochip_unlock_as_irq(chip, pin); fail_free_desc: @@ -348,6 +374,9 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) if (ACPI_FAILURE(status)) return; + acpi_walk_resources(handle, "_AEI", + acpi_gpiochip_alloc_event, acpi_gpio); + mutex_lock(&acpi_gpio_deferred_req_irqs_lock); defer = !acpi_gpio_deferred_req_irqs_done; if (defer) @@ -358,8 +387,7 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) if (defer) return; - acpi_walk_resources(handle, "_AEI", - acpi_gpiochip_request_interrupt, acpi_gpio); + acpi_gpiochip_request_irqs(acpi_gpio); } EXPORT_SYMBOL_GPL(acpi_gpiochip_request_interrupts); @@ -396,10 +424,13 @@ void acpi_gpiochip_free_interrupts(struct gpio_chip *chip) list_for_each_entry_safe_reverse(event, ep, &acpi_gpio->events, node) { struct gpio_desc *desc; - if (irqd_is_wakeup_set(irq_get_irq_data(event->irq))) - disable_irq_wake(event->irq); + if (event->irq_requested) { + if (event->irq_is_wake) + disable_irq_wake(event->irq); + + free_irq(event->irq, event); + } - free_irq(event->irq, event); desc = event->desc; if (WARN_ON(IS_ERR(desc))) continue; @@ -1253,23 +1284,16 @@ bool acpi_can_fallback_to_crs(struct acpi_device *adev, const char *con_id) return con_id == NULL; } -/* Run deferred acpi_gpiochip_request_interrupts() */ -static int acpi_gpio_handle_deferred_request_interrupts(void) +/* Run deferred acpi_gpiochip_request_irqs() */ +static int acpi_gpio_handle_deferred_request_irqs(void) { struct acpi_gpio_chip *acpi_gpio, *tmp; mutex_lock(&acpi_gpio_deferred_req_irqs_lock); list_for_each_entry_safe(acpi_gpio, tmp, &acpi_gpio_deferred_req_irqs_list, - deferred_req_irqs_list_entry) { - acpi_handle handle; - - handle = ACPI_HANDLE(acpi_gpio->chip->parent); - acpi_walk_resources(handle, "_AEI", - acpi_gpiochip_request_interrupt, acpi_gpio); - - list_del_init(&acpi_gpio->deferred_req_irqs_list_entry); - } + deferred_req_irqs_list_entry) + acpi_gpiochip_request_irqs(acpi_gpio); acpi_gpio_deferred_req_irqs_done = true; mutex_unlock(&acpi_gpio_deferred_req_irqs_lock); @@ -1277,4 +1301,4 @@ static int acpi_gpio_handle_deferred_request_interrupts(void) return 0; } /* We must use _sync so that this runs after the first deferred_probe run */ -late_initcall_sync(acpi_gpio_handle_deferred_request_interrupts); +late_initcall_sync(acpi_gpio_handle_deferred_request_irqs); From 2f8f9e280abcb7b2a933091b65ceb19549d85e3e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Dec 2018 13:31:05 +0100 Subject: [PATCH 1161/2608] posix-timers: Fix division by zero bug commit 0e334db6bb4b1fd1e2d72c1f3d8f004313cd9f94 upstream. The signal delivery path of posix-timers can try to rearm the timer even if the interval is zero. That's handled for the common case (hrtimer) but not for alarm timers. In that case the forwarding function raises a division by zero exception. The handling for hrtimer based posix timers is wrong because it marks the timer as active despite the fact that it is stopped. Move the check from common_hrtimer_rearm() to posixtimer_rearm() to cure both issues. Reported-by: syzbot+9d38bedac9cc77b8ad5e@syzkaller.appspotmail.com Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: sboyd@kernel.org Cc: stable@vger.kernel.org Cc: syzkaller-bugs@googlegroups.com Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1812171328050.1880@nanos.tec.linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/time/posix-timers.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 55d45fe2cc17..d7e478a430e9 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -298,9 +298,6 @@ static void common_hrtimer_rearm(struct k_itimer *timr) { struct hrtimer *timer = &timr->it.real.timer; - if (!timr->it_interval) - return; - timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(), timr->it_interval); hrtimer_restart(timer); @@ -326,7 +323,7 @@ void posixtimer_rearm(struct siginfo *info) if (!timr) return; - if (timr->it_requeue_pending == info->si_sys_private) { + if (timr->it_interval && timr->it_requeue_pending == info->si_sys_private) { timr->kclock->timer_rearm(timr); timr->it_active = 1; From df4ee073061999ebaed0d2ce0a6cd3380a2ed67f Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 17 Dec 2018 22:34:18 -0200 Subject: [PATCH 1162/2608] kvm: x86: Add AMD's EX_CFG to the list of ignored MSRs commit 0e1b869fff60c81b510c2d00602d778f8f59dd9a upstream. Some guests OSes (including Windows 10) write to MSR 0xc001102c on some cases (possibly while trying to apply a CPU errata). Make KVM ignore reads and writes to that MSR, so the guest won't crash. The MSR is documented as "Execution Unit Configuration (EX_CFG)", at AMD's "BIOS and Kernel Developer's Guide (BKDG) for AMD Family 15h Models 00h-0Fh Processors". Cc: stable@vger.kernel.org Signed-off-by: Eduardo Habkost Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/kvm/x86.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 62c62d3eb0ff..fed3636dce9a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -372,6 +372,7 @@ #define MSR_F15H_NB_PERF_CTR 0xc0010241 #define MSR_F15H_PTSC 0xc0010280 #define MSR_F15H_IC_CFG 0xc0011021 +#define MSR_F15H_EX_CFG 0xc001102c /* Fam 10h MSRs */ #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f24329659bea..ac431fa778aa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2227,6 +2227,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_AMD64_PATCH_LOADER: case MSR_AMD64_BU_CFG2: case MSR_AMD64_DC_CFG: + case MSR_F15H_EX_CFG: break; case MSR_IA32_UCODE_REV: @@ -2508,6 +2509,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_AMD64_BU_CFG2: case MSR_IA32_PERF_CTL: case MSR_AMD64_DC_CFG: + case MSR_F15H_EX_CFG: msr_info->data = 0; break; case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: From cc70f14956fa0ea84ee1a3a5b79347730c6c2d08 Mon Sep 17 00:00:00 2001 From: Cfir Cohen Date: Tue, 18 Dec 2018 08:18:41 -0800 Subject: [PATCH 1163/2608] KVM: Fix UAF in nested posted interrupt processing commit c2dd5146e9fe1f22c77c1b011adf84eea0245806 upstream. nested_get_vmcs12_pages() processes the posted_intr address in vmcs12. It caches the kmap()ed page object and pointer, however, it doesn't handle errors correctly: it's possible to cache a valid pointer, then release the page and later dereference the dangling pointer. I was able to reproduce with the following steps: 1. Call vmlaunch with valid posted_intr_desc_addr but an invalid MSR_EFER. This causes nested_get_vmcs12_pages() to cache the kmap()ed pi_desc_page and pi_desc. Later the invalid EFER value fails check_vmentry_postreqs() which fails the first vmlaunch. 2. Call vmlanuch with a valid EFER but an invalid posted_intr_desc_addr (I set it to 2G - 0x80). The second time we call nested_get_vmcs12_pages pi_desc_page is unmapped and released and pi_desc_page is set to NULL (the "shouldn't happen" clause). Due to the invalid posted_intr_desc_addr, kvm_vcpu_gpa_to_page() fails and nested_get_vmcs12_pages() returns. It doesn't return an error value so vmlaunch proceeds. Note that at this time we have a dangling pointer in vmx->nested.pi_desc and POSTED_INTR_DESC_ADDR in L0's vmcs. 3. Issue an IPI in L2 guest code. This triggers a call to vmx_complete_nested_posted_interrupt() and pi_test_and_clear_on() which dereferences the dangling pointer. Vulnerable code requires nested and enable_apicv variables to be set to true. The host CPU must also support posted interrupts. Fixes: 5e2f30b756a37 "KVM: nVMX: get rid of nested_get_page()" Cc: stable@vger.kernel.org Reviewed-by: Andy Honig Signed-off-by: Cfir Cohen Reviewed-by: Liran Alon Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4353580b659a..8eec37d37c3d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10447,6 +10447,8 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, kunmap(vmx->nested.pi_desc_page); kvm_release_page_dirty(vmx->nested.pi_desc_page); vmx->nested.pi_desc_page = NULL; + vmx->nested.pi_desc = NULL; + vmcs_write64(POSTED_INTR_DESC_ADDR, -1ull); } page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr); if (is_error_page(page)) From ff53cc3576d3d728dec87e866c3ae84e33ef07a8 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 13 Dec 2018 16:35:43 +0000 Subject: [PATCH 1164/2608] Drivers: hv: vmbus: Return -EINVAL for the sys files for unopened channels commit fc96df16a1ce80cbb3c316ab7d4dc8cd5c2852ce upstream. Before 98f4c651762c, we returned zeros for unopened channels. With 98f4c651762c, we started to return random on-stack values. We'd better return -EINVAL instead. Fixes: 98f4c651762c ("hv: move ringbuffer bus attributes to dev_groups") Cc: stable@vger.kernel.org Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: Stephen Hemminger Signed-off-by: Dexuan Cui Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hv/vmbus_drv.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 2cd134dd94d2..4218a616f1d3 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -300,6 +300,8 @@ static ssize_t out_intr_mask_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); return sprintf(buf, "%d\n", outbound.current_interrupt_mask); } @@ -313,6 +315,8 @@ static ssize_t out_read_index_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); return sprintf(buf, "%d\n", outbound.current_read_index); } @@ -327,6 +331,8 @@ static ssize_t out_write_index_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); return sprintf(buf, "%d\n", outbound.current_write_index); } @@ -341,6 +347,8 @@ static ssize_t out_read_bytes_avail_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); return sprintf(buf, "%d\n", outbound.bytes_avail_toread); } @@ -355,6 +363,8 @@ static ssize_t out_write_bytes_avail_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); } @@ -368,6 +378,8 @@ static ssize_t in_intr_mask_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); return sprintf(buf, "%d\n", inbound.current_interrupt_mask); } @@ -381,6 +393,8 @@ static ssize_t in_read_index_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); return sprintf(buf, "%d\n", inbound.current_read_index); } @@ -394,6 +408,8 @@ static ssize_t in_write_index_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); return sprintf(buf, "%d\n", inbound.current_write_index); } @@ -408,6 +424,8 @@ static ssize_t in_read_bytes_avail_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); return sprintf(buf, "%d\n", inbound.bytes_avail_toread); } @@ -422,6 +440,8 @@ static ssize_t in_write_bytes_avail_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); } From 0761921fa6cbe6859b7994eb77811f100adb9ffe Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 18 Dec 2018 17:29:56 +0000 Subject: [PATCH 1165/2608] x86/mtrr: Don't copy uninitialized gentry fields back to userspace commit 32043fa065b51e0b1433e48d118821c71b5cd65d upstream. Currently the copy_to_user of data in the gentry struct is copying uninitiaized data in field _pad from the stack to userspace. Fix this by explicitly memset'ing gentry to zero, this also will zero any compiler added padding fields that may be in struct (currently there are none). Detected by CoverityScan, CID#200783 ("Uninitialized scalar variable") Fixes: b263b31e8ad6 ("x86, mtrr: Use explicit sizing and padding for the 64-bit ioctls") Signed-off-by: Colin Ian King Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Cc: security@kernel.org Link: https://lkml.kernel.org/r/20181218172956.1440-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mtrr/if.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 558444b23923..c2987daa6a6b 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -173,6 +173,8 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) struct mtrr_gentry gentry; void __user *arg = (void __user *) __arg; + memset(&gentry, 0, sizeof(gentry)); + switch (cmd) { case MTRRIOC_ADD_ENTRY: case MTRRIOC_SET_ENTRY: From e1240a10f34d452bf9f50da7cc062022b586c2f4 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 25 Oct 2018 19:10:36 +0900 Subject: [PATCH 1166/2608] panic: avoid deadlocks in re-entrant console drivers commit c7c3f05e341a9a2bd1a92993d4f996cfd6e7348e upstream. From printk()/serial console point of view panic() is special, because it may force CPU to re-enter printk() or/and serial console driver. Therefore, some of serial consoles drivers are re-entrant. E.g. 8250: serial8250_console_write() { if (port->sysrq) locked = 0; else if (oops_in_progress) locked = spin_trylock_irqsave(&port->lock, flags); else spin_lock_irqsave(&port->lock, flags); ... } panic() does set oops_in_progress via bust_spinlocks(1), so in theory we should be able to re-enter serial console driver from panic(): CPU0 uart_console_write() serial8250_console_write() // if (oops_in_progress) // spin_trylock_irqsave() call_console_drivers() console_unlock() console_flush_on_panic() bust_spinlocks(1) // oops_in_progress++ panic() spin_lock_irqsave(&port->lock, flags) // spin_lock_irqsave() serial8250_console_write() call_console_drivers() console_unlock() printk() ... However, this does not happen and we deadlock in serial console on port->lock spinlock. And the problem is that console_flush_on_panic() called after bust_spinlocks(0): void panic(const char *fmt, ...) { bust_spinlocks(1); ... bust_spinlocks(0); console_flush_on_panic(); ... } bust_spinlocks(0) decrements oops_in_progress, so oops_in_progress can go back to zero. Thus even re-entrant console drivers will simply spin on port->lock spinlock. Given that port->lock may already be locked either by a stopped CPU, or by the very same CPU we execute panic() on (for instance, NMI panic() on printing CPU) the system deadlocks and does not reboot. Fix this by removing bust_spinlocks(0), so oops_in_progress is always set in panic() now and, thus, re-entrant console drivers will trylock the port->lock instead of spinning on it forever, when we call them from console_flush_on_panic(). Link: http://lkml.kernel.org/r/20181025101036.6823-1-sergey.senozhatsky@gmail.com Cc: Steven Rostedt Cc: Daniel Wang Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Alan Cox Cc: Jiri Slaby Cc: Peter Feiner Cc: linux-serial@vger.kernel.org Cc: Sergey Senozhatsky Cc: stable@vger.kernel.org Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Signed-off-by: Greg Kroah-Hartman --- kernel/panic.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/panic.c b/kernel/panic.c index bdd18afa19a4..32ff6fd30201 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -230,7 +231,10 @@ void panic(const char *fmt, ...) if (_crash_kexec_post_notifiers) __crash_kexec(NULL); - bust_spinlocks(0); +#ifdef CONFIG_VT + unblank_screen(); +#endif + console_unblank(); /* * We may have ended up stopping the CPU holding the lock (in From adb7ea126e6315895aaa47f7cb376bfb6360be5e Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 14 Dec 2018 18:30:22 +0200 Subject: [PATCH 1167/2608] iwlwifi: mvm: don't send GEO_TX_POWER_LIMIT to old firmwares commit eca1e56ceedd9cc185eb18baf307d3ff2e4af376 upstream. Old firmware versions don't support this command. Sending it to any firmware before -41.ucode will crash the firmware. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=201975 Fixes: 66e839030fd6 ("iwlwifi: fix wrong WGDS_WIFI_DATA_SIZE") CC: #4.19+ Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index cebf0ce76d27..e9e466cae322 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -952,6 +952,15 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm) int ret, i, j; u16 cmd_wide_id = WIDE_ID(PHY_OPS_GROUP, GEO_TX_POWER_LIMIT); + /* + * This command is not supported on earlier firmware versions. + * Unfortunately, we don't have a TLV API flag to rely on, so + * rely on the major version which is in the first byte of + * ucode_ver. + */ + if (IWL_UCODE_SERIAL(mvm->fw->ucode_ver) < 41) + return 0; + ret = iwl_mvm_sar_get_wgds_table(mvm); if (ret < 0) { IWL_DEBUG_RADIO(mvm, From 20e985263c31166c639163116e681b1aa8775c8f Mon Sep 17 00:00:00 2001 From: Ihab Zhaika Date: Tue, 31 Jul 2018 09:53:09 +0300 Subject: [PATCH 1168/2608] iwlwifi: add new cards for 9560, 9462, 9461 and killer series commit f108703cb5f199d0fc98517ac29a997c4c646c94 upstream. add few PCI ID'S for 9560, 9462, 9461 and killer series. Cc: stable@vger.kernel.org Signed-off-by: Ihab Zhaika Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 4cbc6cb8bf89..0ff247326d6c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -517,6 +517,56 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24FD, 0x9074, iwl8265_2ac_cfg)}, /* 9000 Series */ + {IWL_PCI_DEVICE(0x02F0, 0x0030, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x0034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x0038, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x003C, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x0060, iwl9461_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x0064, iwl9461_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x00A0, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x00A4, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x0230, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x0234, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x0238, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x023C, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x0260, iwl9461_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x0264, iwl9461_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x02A0, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x02A4, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x2030, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x2034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x4030, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x4034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x40A4, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x4234, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x42A4, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x0030, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x0034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x0038, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x003C, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x0060, iwl9461_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x0064, iwl9461_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x00A0, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x00A4, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x0230, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x0234, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x0238, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x023C, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x0260, iwl9461_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x0264, iwl9461_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x02A0, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x02A4, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x2030, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x2034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x4030, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x4034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x40A4, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x4234, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x42A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0014, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0018, iwl9260_2ac_cfg)}, From 0cc6feb5c4b5e6e1d1fa98f97b28ba79d6b5cb1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Dec 2018 22:21:21 +0100 Subject: [PATCH 1169/2608] spi: imx: add a device specific prepare_message callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is just preparatory work which allows to move some initialisation that currently is done in the per transfer hook .config to an earlier point in time in the next few patches. There is no change in behaviour introduced by this patch. Signed-off-by: Uwe Kleine-König Signed-off-by: Mark Brown [ukleinek: backport to v4.14.x] Signed-off-by: Uwe Kleine-König Signed-off-by: Sasha Levin --- drivers/spi/spi-imx.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index d51ca243a028..3fdb0652429b 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -72,6 +72,7 @@ struct spi_imx_data; struct spi_imx_devtype_data { void (*intctrl)(struct spi_imx_data *, int); + int (*prepare_message)(struct spi_imx_data *, struct spi_message *); int (*config)(struct spi_device *); void (*trigger)(struct spi_imx_data *); int (*rx_available)(struct spi_imx_data *); @@ -439,6 +440,12 @@ static void mx51_ecspi_trigger(struct spi_imx_data *spi_imx) writel(reg, spi_imx->base + MX51_ECSPI_CTRL); } +static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx, + struct spi_message *msg) +{ + return 0; +} + static int mx51_ecspi_config(struct spi_device *spi) { struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); @@ -599,6 +606,12 @@ static void mx31_trigger(struct spi_imx_data *spi_imx) writel(reg, spi_imx->base + MXC_CSPICTRL); } +static int mx31_prepare_message(struct spi_imx_data *spi_imx, + struct spi_message *msg) +{ + return 0; +} + static int mx31_config(struct spi_device *spi) { struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); @@ -695,6 +708,12 @@ static void mx21_trigger(struct spi_imx_data *spi_imx) writel(reg, spi_imx->base + MXC_CSPICTRL); } +static int mx21_prepare_message(struct spi_imx_data *spi_imx, + struct spi_message *msg) +{ + return 0; +} + static int mx21_config(struct spi_device *spi) { struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); @@ -764,6 +783,12 @@ static void mx1_trigger(struct spi_imx_data *spi_imx) writel(reg, spi_imx->base + MXC_CSPICTRL); } +static int mx1_prepare_message(struct spi_imx_data *spi_imx, + struct spi_message *msg) +{ + return 0; +} + static int mx1_config(struct spi_device *spi) { struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); @@ -798,6 +823,7 @@ static void mx1_reset(struct spi_imx_data *spi_imx) static struct spi_imx_devtype_data imx1_cspi_devtype_data = { .intctrl = mx1_intctrl, + .prepare_message = mx1_prepare_message, .config = mx1_config, .trigger = mx1_trigger, .rx_available = mx1_rx_available, @@ -810,6 +836,7 @@ static struct spi_imx_devtype_data imx1_cspi_devtype_data = { static struct spi_imx_devtype_data imx21_cspi_devtype_data = { .intctrl = mx21_intctrl, + .prepare_message = mx21_prepare_message, .config = mx21_config, .trigger = mx21_trigger, .rx_available = mx21_rx_available, @@ -823,6 +850,7 @@ static struct spi_imx_devtype_data imx21_cspi_devtype_data = { static struct spi_imx_devtype_data imx27_cspi_devtype_data = { /* i.mx27 cspi shares the functions with i.mx21 one */ .intctrl = mx21_intctrl, + .prepare_message = mx21_prepare_message, .config = mx21_config, .trigger = mx21_trigger, .rx_available = mx21_rx_available, @@ -835,6 +863,7 @@ static struct spi_imx_devtype_data imx27_cspi_devtype_data = { static struct spi_imx_devtype_data imx31_cspi_devtype_data = { .intctrl = mx31_intctrl, + .prepare_message = mx31_prepare_message, .config = mx31_config, .trigger = mx31_trigger, .rx_available = mx31_rx_available, @@ -848,6 +877,7 @@ static struct spi_imx_devtype_data imx31_cspi_devtype_data = { static struct spi_imx_devtype_data imx35_cspi_devtype_data = { /* i.mx35 and later cspi shares the functions with i.mx31 one */ .intctrl = mx31_intctrl, + .prepare_message = mx31_prepare_message, .config = mx31_config, .trigger = mx31_trigger, .rx_available = mx31_rx_available, @@ -860,6 +890,7 @@ static struct spi_imx_devtype_data imx35_cspi_devtype_data = { static struct spi_imx_devtype_data imx51_ecspi_devtype_data = { .intctrl = mx51_ecspi_intctrl, + .prepare_message = mx51_ecspi_prepare_message, .config = mx51_ecspi_config, .trigger = mx51_ecspi_trigger, .rx_available = mx51_ecspi_rx_available, @@ -872,6 +903,7 @@ static struct spi_imx_devtype_data imx51_ecspi_devtype_data = { static struct spi_imx_devtype_data imx53_ecspi_devtype_data = { .intctrl = mx51_ecspi_intctrl, + .prepare_message = mx51_ecspi_prepare_message, .config = mx51_ecspi_config, .trigger = mx51_ecspi_trigger, .rx_available = mx51_ecspi_rx_available, @@ -1310,7 +1342,13 @@ spi_imx_prepare_message(struct spi_master *master, struct spi_message *msg) return ret; } - return 0; + ret = spi_imx->devtype_data->prepare_message(spi_imx, msg); + if (ret) { + clk_disable(spi_imx->clk_ipg); + clk_disable(spi_imx->clk_per); + } + + return ret; } static int From 089651ef038eb4af77b0847615f909d2edce1bc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Dec 2018 22:21:22 +0100 Subject: [PATCH 1170/2608] spi: imx: mx51-ecspi: Move some initialisation to prepare_message hook. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The relevant difference between prepare_message and config is that the former is run before the CS signal is asserted. So the polarity of the CLK line must be configured in prepare_message as an edge generated by config might already result in a latch of the MOSI line. Signed-off-by: Uwe Kleine-König Signed-off-by: Mark Brown [ukleinek: backport to v4.14.x] Signed-off-by: Uwe Kleine-König Signed-off-by: Sasha Levin --- drivers/spi/spi-imx.c | 59 ++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 3fdb0652429b..df18d07d544d 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -443,14 +443,9 @@ static void mx51_ecspi_trigger(struct spi_imx_data *spi_imx) static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx, struct spi_message *msg) { - return 0; -} - -static int mx51_ecspi_config(struct spi_device *spi) -{ - struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); + struct spi_device *spi = msg->spi; u32 ctrl = MX51_ECSPI_CTRL_ENABLE; - u32 clk = spi_imx->speed_hz, delay, reg; + u32 testreg; u32 cfg = readl(spi_imx->base + MX51_ECSPI_CONFIG); /* @@ -468,14 +463,21 @@ static int mx51_ecspi_config(struct spi_device *spi) if (spi->mode & SPI_READY) ctrl |= MX51_ECSPI_CTRL_DRCTL(spi_imx->spi_drctl); - /* set clock speed */ - ctrl |= mx51_ecspi_clkdiv(spi_imx, spi_imx->speed_hz, &clk); - spi_imx->spi_bus_clk = clk; - /* set chip select to use */ ctrl |= MX51_ECSPI_CTRL_CS(spi->chip_select); - ctrl |= (spi_imx->bits_per_word - 1) << MX51_ECSPI_CTRL_BL_OFFSET; + /* + * The ctrl register must be written first, with the EN bit set other + * registers must not be written to. + */ + writel(ctrl, spi_imx->base + MX51_ECSPI_CTRL); + + testreg = readl(spi_imx->base + MX51_ECSPI_TESTREG); + if (spi->mode & SPI_LOOP) + testreg |= MX51_ECSPI_TESTREG_LBC; + else + testreg &= ~MX51_ECSPI_TESTREG_LBC; + writel(testreg, spi_imx->base + MX51_ECSPI_TESTREG); cfg |= MX51_ECSPI_CONFIG_SBBCTRL(spi->chip_select); @@ -491,26 +493,38 @@ static int mx51_ecspi_config(struct spi_device *spi) cfg &= ~MX51_ECSPI_CONFIG_SCLKPOL(spi->chip_select); cfg &= ~MX51_ECSPI_CONFIG_SCLKCTL(spi->chip_select); } + if (spi->mode & SPI_CS_HIGH) cfg |= MX51_ECSPI_CONFIG_SSBPOL(spi->chip_select); else cfg &= ~MX51_ECSPI_CONFIG_SSBPOL(spi->chip_select); + writel(cfg, spi_imx->base + MX51_ECSPI_CONFIG); + + return 0; +} + +static int mx51_ecspi_config(struct spi_device *spi) +{ + struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master); + u32 ctrl = readl(spi_imx->base + MX51_ECSPI_CTRL); + u32 clk = spi_imx->speed_hz, delay; + + /* Clear BL field and set the right value */ + ctrl &= ~MX51_ECSPI_CTRL_BL_MASK; + ctrl |= (spi_imx->bits_per_word - 1) << MX51_ECSPI_CTRL_BL_OFFSET; + + /* set clock speed */ + ctrl &= ~(0xf << MX51_ECSPI_CTRL_POSTDIV_OFFSET | + 0xf << MX51_ECSPI_CTRL_PREDIV_OFFSET); + ctrl |= mx51_ecspi_clkdiv(spi_imx, spi_imx->speed_hz, &clk); + spi_imx->spi_bus_clk = clk; + if (spi_imx->usedma) ctrl |= MX51_ECSPI_CTRL_SMC; - /* CTRL register always go first to bring out controller from reset */ writel(ctrl, spi_imx->base + MX51_ECSPI_CTRL); - reg = readl(spi_imx->base + MX51_ECSPI_TESTREG); - if (spi->mode & SPI_LOOP) - reg |= MX51_ECSPI_TESTREG_LBC; - else - reg &= ~MX51_ECSPI_TESTREG_LBC; - writel(reg, spi_imx->base + MX51_ECSPI_TESTREG); - - writel(cfg, spi_imx->base + MX51_ECSPI_CONFIG); - /* * Wait until the changes in the configuration register CONFIGREG * propagate into the hardware. It takes exactly one tick of the @@ -532,7 +546,6 @@ static int mx51_ecspi_config(struct spi_device *spi) * Configure the DMA register: setup the watermark * and enable DMA request. */ - writel(MX51_ECSPI_DMA_RX_WML(spi_imx->wml) | MX51_ECSPI_DMA_TX_WML(spi_imx->wml) | MX51_ECSPI_DMA_RXT_WML(spi_imx->wml) | From ed0d232df97fcba2ae621c4eb1283b1c790b7989 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 26 Dec 2018 13:32:11 +0100 Subject: [PATCH 1171/2608] ubifs: Handle re-linking of inodes correctly while recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e58725d51fa8da9133f3f1c54170aa2e43056b91 upstream. UBIFS's recovery code strictly assumes that a deleted inode will never come back, therefore it removes all data which belongs to that inode as soon it faces an inode with link count 0 in the replay list. Before O_TMPFILE this assumption was perfectly fine. With O_TMPFILE it can lead to data loss upon a power-cut. Consider a journal with entries like: 0: inode X (nlink = 0) /* O_TMPFILE was created */ 1: data for inode X /* Someone writes to the temp file */ 2: inode X (nlink = 0) /* inode was changed, xattr, chmod, … */ 3: inode X (nlink = 1) /* inode was re-linked via linkat() */ Upon replay of entry #2 UBIFS will drop all data that belongs to inode X, this will lead to an empty file after mounting. As solution for this problem, scan the replay list for a re-link entry before dropping data. Fixes: 474b93704f32 ("ubifs: Implement O_TMPFILE") Cc: stable@vger.kernel.org # 4.9-4.18 Cc: Russell Senior Cc: Rafał Miłecki Reported-by: Russell Senior Reported-by: Rafał Miłecki Tested-by: Rafał Miłecki Signed-off-by: Richard Weinberger [rmilecki: update ubifs_assert() calls to compile with 4.18 and older] Signed-off-by: Rafał Miłecki (cherry picked from commit e58725d51fa8da9133f3f1c54170aa2e43056b91) Signed-off-by: Sasha Levin --- fs/ubifs/replay.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index ae5c02f22f3e..d998fbf7de30 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -209,6 +209,38 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) return ubifs_tnc_remove_range(c, &min_key, &max_key); } +/** + * inode_still_linked - check whether inode in question will be re-linked. + * @c: UBIFS file-system description object + * @rino: replay entry to test + * + * O_TMPFILE files can be re-linked, this means link count goes from 0 to 1. + * This case needs special care, otherwise all references to the inode will + * be removed upon the first replay entry of an inode with link count 0 + * is found. + */ +static bool inode_still_linked(struct ubifs_info *c, struct replay_entry *rino) +{ + struct replay_entry *r; + + ubifs_assert(rino->deletion); + ubifs_assert(key_type(c, &rino->key) == UBIFS_INO_KEY); + + /* + * Find the most recent entry for the inode behind @rino and check + * whether it is a deletion. + */ + list_for_each_entry_reverse(r, &c->replay_list, list) { + ubifs_assert(r->sqnum >= rino->sqnum); + if (key_inum(c, &r->key) == key_inum(c, &rino->key)) + return r->deletion == 0; + + } + + ubifs_assert(0); + return false; +} + /** * apply_replay_entry - apply a replay entry to the TNC. * @c: UBIFS file-system description object @@ -239,6 +271,11 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) { ino_t inum = key_inum(c, &r->key); + if (inode_still_linked(c, r)) { + err = 0; + break; + } + err = ubifs_tnc_remove_ino(c, inum); break; } From 36f93a2e7dce0a4f58b96a7ecb3af4e5897a60d4 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 26 Oct 2018 15:03:27 -0700 Subject: [PATCH 1172/2608] mm: don't miss the last page because of round-off error commit 68600f623d69da428c6163275f97ca126e1a8ec5 upstream. I've noticed, that dying memory cgroups are often pinned in memory by a single pagecache page. Even under moderate memory pressure they sometimes stayed in such state for a long time. That looked strange. My investigation showed that the problem is caused by applying the LRU pressure balancing math: scan = div64_u64(scan * fraction[lru], denominator), where denominator = fraction[anon] + fraction[file] + 1. Because fraction[lru] is always less than denominator, if the initial scan size is 1, the result is always 0. This means the last page is not scanned and has no chances to be reclaimed. Fix this by rounding up the result of the division. In practice this change significantly improves the speed of dying cgroups reclaim. [guro@fb.com: prevent double calculation of DIV64_U64_ROUND_UP() arguments] Link: http://lkml.kernel.org/r/20180829213311.GA13501@castle Link: http://lkml.kernel.org/r/20180827162621.30187-3-guro@fb.com Signed-off-by: Roman Gushchin Reviewed-by: Andrew Morton Cc: Johannes Weiner Cc: Michal Hocko Cc: Tejun Heo Cc: Rik van Riel Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/math64.h | 3 +++ mm/vmscan.c | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/linux/math64.h b/include/linux/math64.h index 082de345b73c..3a7a14062668 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -254,4 +254,7 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor) } #endif /* mul_u64_u32_div */ +#define DIV64_U64_ROUND_UP(ll, d) \ + ({ u64 _tmp = (d); div64_u64((ll) + _tmp - 1, _tmp); }) + #endif /* _LINUX_MATH64_H */ diff --git a/mm/vmscan.c b/mm/vmscan.c index be56e2e1931e..9734e62654fa 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2367,9 +2367,11 @@ out: /* * Scan types proportional to swappiness and * their relative recent reclaim efficiency. + * Make sure we don't miss the last page + * because of a round-off error. */ - scan = div64_u64(scan * fraction[file], - denominator); + scan = DIV64_U64_ROUND_UP(scan * fraction[file], + denominator); break; case SCAN_FILE: case SCAN_ANON: From 5cebd962c97f3ee64555718169cc00c8fe7a228a Mon Sep 17 00:00:00 2001 From: Ivan Delalande Date: Thu, 13 Dec 2018 15:20:52 -0800 Subject: [PATCH 1173/2608] proc/sysctl: don't return ENOMEM on lookup when a table is unregistering commit ea5751ccd665a2fd1b24f9af81f6167f0718c5f6 upstream. proc_sys_lookup can fail with ENOMEM instead of ENOENT when the corresponding sysctl table is being unregistered. In our case we see this upon opening /proc/sys/net/*/conf files while network interfaces are being deleted, which confuses our configuration daemon. The problem was successfully reproduced and this fix tested on v4.9.122 and v4.20-rc6. v2: return ERR_PTRs in all cases when proc_sys_make_inode fails instead of mixing them with NULL. Thanks Al Viro for the feedback. Fixes: ace0c791e6c3 ("proc/sysctl: Don't grab i_lock under sysctl_lock.") Cc: stable@vger.kernel.org Signed-off-by: Ivan Delalande Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/proc/proc_sysctl.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 82ac5f682b73..f69c545f5868 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -464,7 +464,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, inode = new_inode(sb); if (!inode) - goto out; + return ERR_PTR(-ENOMEM); inode->i_ino = get_next_ino(); @@ -474,8 +474,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, if (unlikely(head->unregistering)) { spin_unlock(&sysctl_lock); iput(inode); - inode = NULL; - goto out; + return ERR_PTR(-ENOENT); } ei->sysctl = head; ei->sysctl_entry = table; @@ -500,7 +499,6 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, if (root->set_ownership) root->set_ownership(head, table, &inode->i_uid, &inode->i_gid); -out: return inode; } @@ -549,10 +547,11 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, goto out; } - err = ERR_PTR(-ENOMEM); inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); - if (!inode) + if (IS_ERR(inode)) { + err = ERR_CAST(inode); goto out; + } err = NULL; d_set_d_op(dentry, &proc_sys_dentry_operations); @@ -685,7 +684,7 @@ static bool proc_sys_fill_cache(struct file *file, return false; if (d_in_lookup(child)) { inode = proc_sys_make_inode(dir->d_sb, head, table); - if (!inode) { + if (IS_ERR(inode)) { d_lookup_done(child); dput(child); return false; From d04e6ea0cec9e7d6cba806508f657d2d0dc6cacf Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 19 Dec 2018 18:00:15 -0600 Subject: [PATCH 1174/2608] drm/ioctl: Fix Spectre v1 vulnerabilities commit 505b5240329b922f21f91d5b5d1e535c805eca6d upstream. nr is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/gpu/drm/drm_ioctl.c:805 drm_ioctl() warn: potential spectre issue 'dev->driver->ioctls' [r] drivers/gpu/drm/drm_ioctl.c:810 drm_ioctl() warn: potential spectre issue 'drm_ioctls' [r] (local cap) drivers/gpu/drm/drm_ioctl.c:892 drm_ioctl_flags() warn: potential spectre issue 'drm_ioctls' [r] (local cap) Fix this by sanitizing nr before using it to index dev->driver->ioctls and drm_ioctls. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20181220000015.GA18973@embeddedor Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_ioctl.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index a9ae6dd2d593..53f319369de5 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -37,6 +37,7 @@ #include #include +#include /** * DOC: getunique and setversion story @@ -778,13 +779,17 @@ long drm_ioctl(struct file *filp, if (is_driver_ioctl) { /* driver ioctl */ - if (nr - DRM_COMMAND_BASE >= dev->driver->num_ioctls) + unsigned int index = nr - DRM_COMMAND_BASE; + + if (index >= dev->driver->num_ioctls) goto err_i1; - ioctl = &dev->driver->ioctls[nr - DRM_COMMAND_BASE]; + index = array_index_nospec(index, dev->driver->num_ioctls); + ioctl = &dev->driver->ioctls[index]; } else { /* core ioctl */ if (nr >= DRM_CORE_IOCTL_COUNT) goto err_i1; + nr = array_index_nospec(nr, DRM_CORE_IOCTL_COUNT); ioctl = &drm_ioctls[nr]; } @@ -866,6 +871,7 @@ bool drm_ioctl_flags(unsigned int nr, unsigned int *flags) if (nr >= DRM_CORE_IOCTL_COUNT) return false; + nr = array_index_nospec(nr, DRM_CORE_IOCTL_COUNT); *flags = drm_ioctls[nr].flags; return true; From d2dd9f1593dc4d5ceb5cf4a973ed2c6e3a49d799 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 29 Dec 2018 13:39:11 +0100 Subject: [PATCH 1175/2608] Linux 4.14.91 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 280c7193e246..a6fb3b158a19 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 90 +SUBLEVEL = 91 EXTRAVERSION = NAME = Petit Gorille From 26a697a9a56ccba99f5ed900dcb0973ff828e75f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 10 Dec 2018 12:41:24 -0600 Subject: [PATCH 1176/2608] ipv4: Fix potential Spectre v1 vulnerability [ Upstream commit 5648451e30a0d13d11796574919a359025d52cce ] vr.vifi is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: net/ipv4/ipmr.c:1616 ipmr_ioctl() warn: potential spectre issue 'mrt->vif_table' [r] (local cap) net/ipv4/ipmr.c:1690 ipmr_compat_ioctl() warn: potential spectre issue 'mrt->vif_table' [r] (local cap) Fix this by sanitizing vr.vifi before using it to index mrt->vif_table' Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipmr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9f314a5e9f27..ce3d5f734fdb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -68,6 +68,8 @@ #include #include +#include + struct ipmr_rule { struct fib_rule common; }; @@ -1620,6 +1622,7 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) return -EFAULT; if (vr.vifi >= mrt->maxvif) return -EINVAL; + vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); read_lock(&mrt_lock); vif = &mrt->vif_table[vr.vifi]; if (VIF_EXISTS(mrt, vr.vifi)) { From 18e260fd2a4c7433e2daa861678f2dd58501904b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 11 Dec 2018 14:10:08 -0600 Subject: [PATCH 1177/2608] ip6mr: Fix potential Spectre v1 vulnerability [ Upstream commit 69d2c86766da2ded2b70281f1bf242cb0d58a778 ] vr.mifi is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: net/ipv6/ip6mr.c:1845 ip6mr_ioctl() warn: potential spectre issue 'mrt->vif_table' [r] (local cap) net/ipv6/ip6mr.c:1919 ip6mr_compat_ioctl() warn: potential spectre issue 'mrt->vif_table' [r] (local cap) Fix this by sanitizing vr.mifi before using it to index mrt->vif_table' Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6mr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8015e74fd7d9..b2fdb3fdd217 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -72,6 +72,8 @@ struct mr6_table { #endif }; +#include + struct ip6mr_rule { struct fib_rule common; }; @@ -1883,6 +1885,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) return -EFAULT; if (vr.mifi >= mrt->maxvif) return -EINVAL; + vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); read_lock(&mrt_lock); vif = &mrt->vif6_table[vr.mifi]; if (MIF_EXISTS(mrt, vr.mifi)) { @@ -1957,6 +1960,7 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) return -EFAULT; if (vr.mifi >= mrt->maxvif) return -EINVAL; + vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); read_lock(&mrt_lock); vif = &mrt->vif6_table[vr.mifi]; if (MIF_EXISTS(mrt, vr.mifi)) { From 26a5adc8eb26d170058645c3cccd4d19165bec16 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Dec 2018 13:56:36 -0800 Subject: [PATCH 1178/2608] ax25: fix a use-after-free in ax25_fillin_cb() [ Upstream commit c433570458e49bccea5c551df628d058b3526289 ] There are multiple issues here: 1. After freeing dev->ax25_ptr, we need to set it to NULL otherwise we may use a dangling pointer. 2. There is a race between ax25_setsockopt() and device notifier as reported by syzbot. Close it by holding RTNL lock. 3. We need to test if dev->ax25_ptr is NULL before using it. Reported-and-tested-by: syzbot+ae6bb869cbed29b29040@syzkaller.appspotmail.com Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ax25/af_ax25.c | 11 +++++++++-- net/ax25/ax25_dev.c | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index f3f9d18891de..d783d90c20f1 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -654,15 +654,22 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } - dev = dev_get_by_name(&init_net, devname); + rtnl_lock(); + dev = __dev_get_by_name(&init_net, devname); if (!dev) { + rtnl_unlock(); res = -ENODEV; break; } ax25->ax25_dev = ax25_dev_ax25dev(dev); + if (!ax25->ax25_dev) { + rtnl_unlock(); + res = -ENODEV; + break; + } ax25_fillin_cb(ax25, ax25->ax25_dev); - dev_put(dev); + rtnl_unlock(); break; default: diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 9a3a301e1e2f..d92195cd7834 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -116,6 +116,7 @@ void ax25_dev_device_down(struct net_device *dev) if ((s = ax25_dev_list) == ax25_dev) { ax25_dev_list = s->next; spin_unlock_bh(&ax25_dev_lock); + dev->ax25_ptr = NULL; dev_put(dev); kfree(ax25_dev); return; @@ -125,6 +126,7 @@ void ax25_dev_device_down(struct net_device *dev) if (s->next == ax25_dev) { s->next = ax25_dev->next; spin_unlock_bh(&ax25_dev_lock); + dev->ax25_ptr = NULL; dev_put(dev); kfree(ax25_dev); return; From 79d222d8d2595b2bef609b19120fbc375d6b309d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 19 Dec 2018 23:23:00 +0100 Subject: [PATCH 1179/2608] gro_cell: add napi_disable in gro_cells_destroy [ Upstream commit 8e1da73acded4751a93d4166458a7e640f37d26c ] Add napi_disable routine in gro_cells_destroy since starting from commit c42858eaf492 ("gro_cells: remove spinlock protecting receive queues") gro_cell_poll and gro_cells_destroy can run concurrently on napi_skbs list producing a kernel Oops if the tunnel interface is removed while gro_cell_poll is running. The following Oops has been triggered removing a vxlan device while the interface is receiving traffic [ 5628.948853] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 5628.949981] PGD 0 P4D 0 [ 5628.950308] Oops: 0002 [#1] SMP PTI [ 5628.950748] CPU: 0 PID: 9 Comm: ksoftirqd/0 Not tainted 4.20.0-rc6+ #41 [ 5628.952940] RIP: 0010:gro_cell_poll+0x49/0x80 [ 5628.955615] RSP: 0018:ffffc9000004fdd8 EFLAGS: 00010202 [ 5628.956250] RAX: 0000000000000000 RBX: ffffe8ffffc08150 RCX: 0000000000000000 [ 5628.957102] RDX: 0000000000000000 RSI: ffff88802356bf00 RDI: ffffe8ffffc08150 [ 5628.957940] RBP: 0000000000000026 R08: 0000000000000000 R09: 0000000000000000 [ 5628.958803] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000040 [ 5628.959661] R13: ffffe8ffffc08100 R14: 0000000000000000 R15: 0000000000000040 [ 5628.960682] FS: 0000000000000000(0000) GS:ffff88803ea00000(0000) knlGS:0000000000000000 [ 5628.961616] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5628.962359] CR2: 0000000000000008 CR3: 000000000221c000 CR4: 00000000000006b0 [ 5628.963188] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 5628.964034] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 5628.964871] Call Trace: [ 5628.965179] net_rx_action+0xf0/0x380 [ 5628.965637] __do_softirq+0xc7/0x431 [ 5628.966510] run_ksoftirqd+0x24/0x30 [ 5628.966957] smpboot_thread_fn+0xc5/0x160 [ 5628.967436] kthread+0x113/0x130 [ 5628.968283] ret_from_fork+0x3a/0x50 [ 5628.968721] Modules linked in: [ 5628.969099] CR2: 0000000000000008 [ 5628.969510] ---[ end trace 9d9dedc7181661fe ]--- [ 5628.970073] RIP: 0010:gro_cell_poll+0x49/0x80 [ 5628.972965] RSP: 0018:ffffc9000004fdd8 EFLAGS: 00010202 [ 5628.973611] RAX: 0000000000000000 RBX: ffffe8ffffc08150 RCX: 0000000000000000 [ 5628.974504] RDX: 0000000000000000 RSI: ffff88802356bf00 RDI: ffffe8ffffc08150 [ 5628.975462] RBP: 0000000000000026 R08: 0000000000000000 R09: 0000000000000000 [ 5628.976413] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000040 [ 5628.977375] R13: ffffe8ffffc08100 R14: 0000000000000000 R15: 0000000000000040 [ 5628.978296] FS: 0000000000000000(0000) GS:ffff88803ea00000(0000) knlGS:0000000000000000 [ 5628.979327] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5628.980044] CR2: 0000000000000008 CR3: 000000000221c000 CR4: 00000000000006b0 [ 5628.980929] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 5628.981736] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 5628.982409] Kernel panic - not syncing: Fatal exception in interrupt [ 5628.983307] Kernel Offset: disabled Fixes: c42858eaf492 ("gro_cells: remove spinlock protecting receive queues") Signed-off-by: Lorenzo Bianconi Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/gro_cells.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index 4b54e5f107c6..acf45ddbe924 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -84,6 +84,7 @@ void gro_cells_destroy(struct gro_cells *gcells) for_each_possible_cpu(i) { struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); + napi_disable(&cell->napi); netif_napi_del(&cell->napi); __skb_queue_purge(&cell->napi_skbs); } From 8d349348cb2eb2e913716b929c871932bd7ac38d Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Mon, 31 Dec 2018 15:43:01 -0600 Subject: [PATCH 1180/2608] ibmveth: fix DMA unmap error in ibmveth_xmit_start error path [ Upstream commit 756af9c642329d54f048bac2a62f829b391f6944 ] Commit 33a48ab105a7 ("ibmveth: Fix DMA unmap error") fixed an issue in the normal code path of ibmveth_xmit_start() that was originally introduced by Commit 6e8ab30ec677 ("ibmveth: Add scatter-gather support"). This original fix missed the error path where dma_unmap_page is wrongly called on the header portion in descs[0] which was mapped with dma_map_single. As a result a failure to DMA map any of the frags results in a dmesg warning when CONFIG_DMA_API_DEBUG is enabled. ------------[ cut here ]------------ DMA-API: ibmveth 30000002: device driver frees DMA memory with wrong function [device address=0x000000000a430000] [size=172 bytes] [mapped as page] [unmapped as single] WARNING: CPU: 1 PID: 8426 at kernel/dma/debug.c:1085 check_unmap+0x4fc/0xe10 ... ... DMA-API: Mapped at: ibmveth_start_xmit+0x30c/0xb60 dev_hard_start_xmit+0x100/0x450 sch_direct_xmit+0x224/0x490 __qdisc_run+0x20c/0x980 __dev_queue_xmit+0x1bc/0xf20 This fixes the API misuse by unampping descs[0] with dma_unmap_single. Fixes: 6e8ab30ec677 ("ibmveth: Add scatter-gather support") Signed-off-by: Tyrel Datwyler Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/ibmveth.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index f210398200ec..6c05819d995e 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1172,11 +1172,15 @@ out: map_failed_frags: last = i+1; - for (i = 0; i < last; i++) + for (i = 1; i < last; i++) dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address, descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK, DMA_TO_DEVICE); + dma_unmap_single(&adapter->vdev->dev, + descs[0].fields.address, + descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK, + DMA_TO_DEVICE); map_failed: if (!firmware_has_feature(FW_FEATURE_CMO)) netdev_err(netdev, "tx: unable to map xmit buffer\n"); From 59a3d2dc594ae27edbdb164b770ab287020dce93 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sun, 23 Dec 2018 12:52:18 -0500 Subject: [PATCH 1181/2608] ieee802154: lowpan_header_create check must check daddr [ Upstream commit 40c3ff6d5e0809505a067dd423c110c5658c478c ] Packet sockets may call dev_header_parse with NULL daddr. Make lowpan_header_ops.create fail. Fixes: 87a93e4eceb4 ("ieee802154: change needed headroom/tailroom") Signed-off-by: Willem de Bruijn Acked-by: Alexander Aring Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/6lowpan/tx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c index ca53efa17be1..8bec827081cd 100644 --- a/net/ieee802154/6lowpan/tx.c +++ b/net/ieee802154/6lowpan/tx.c @@ -48,6 +48,9 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *ldev, const struct ipv6hdr *hdr = ipv6_hdr(skb); struct neighbour *n; + if (!daddr) + return -EINVAL; + /* TODO: * if this package isn't ipv6 one, where should it be routed? */ From c68815c120a91f02a77b894ed45ad10fa9d5e5ff Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 18 Dec 2018 21:17:44 -0800 Subject: [PATCH 1182/2608] ipv6: explicitly initialize udp6_addr in udp_sock_create6() [ Upstream commit fb24274546310872eeeaf3d1d53799d8414aa0f2 ] syzbot reported the use of uninitialized udp6_addr::sin6_scope_id. We can just set ::sin6_scope_id to zero, as tunnels are unlikely to use an IPv6 address that needs a scope id and there is no interface to bind in this context. For net-next, it looks different as we have cfg->bind_ifindex there so we can probably call ipv6_iface_scope_id(). Same for ::sin6_flowinfo, tunnels don't use it. Fixes: 8024e02879dd ("udp: Add udp_sock_create for UDP tunnels to open listener socket") Reported-by: syzbot+c56449ed3652e6720f30@syzkaller.appspotmail.com Cc: Jon Maloy Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_udp_tunnel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index b283f293ee4a..caad40d6e74d 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -15,7 +15,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { - struct sockaddr_in6 udp6_addr; + struct sockaddr_in6 udp6_addr = {}; int err; struct socket *sock = NULL; @@ -42,6 +42,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, goto error; if (cfg->peer_udp_port) { + memset(&udp6_addr, 0, sizeof(udp6_addr)); udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6, sizeof(udp6_addr.sin6_addr)); From 64d22404999601afdd6117e0140d7dbe5ddd9057 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Dec 2018 07:47:51 -0800 Subject: [PATCH 1183/2608] ipv6: tunnels: fix two use-after-free [ Upstream commit cbb49697d5512ce9e61b45ce75d3ee43d7ea5524 ] xfrm6_policy_check() might have re-allocated skb->head, we need to reload ipv6 header pointer. sysbot reported : BUG: KASAN: use-after-free in __ipv6_addr_type+0x302/0x32f net/ipv6/addrconf_core.c:40 Read of size 4 at addr ffff888191b8cb70 by task syz-executor2/1304 CPU: 0 PID: 1304 Comm: syz-executor2 Not tainted 4.20.0-rc7+ #356 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x244/0x39d lib/dump_stack.c:113 print_address_description.cold.7+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.8+0x242/0x309 mm/kasan/report.c:412 __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:432 __ipv6_addr_type+0x302/0x32f net/ipv6/addrconf_core.c:40 ipv6_addr_type include/net/ipv6.h:403 [inline] ip6_tnl_get_cap+0x27/0x190 net/ipv6/ip6_tunnel.c:727 ip6_tnl_rcv_ctl+0xdb/0x2a0 net/ipv6/ip6_tunnel.c:757 vti6_rcv+0x336/0x8f3 net/ipv6/ip6_vti.c:321 xfrm6_ipcomp_rcv+0x1a5/0x3a0 net/ipv6/xfrm6_protocol.c:132 ip6_protocol_deliver_rcu+0x372/0x1940 net/ipv6/ip6_input.c:394 ip6_input_finish+0x84/0x170 net/ipv6/ip6_input.c:434 NF_HOOK include/linux/netfilter.h:289 [inline] ip6_input+0xe9/0x600 net/ipv6/ip6_input.c:443 IPVS: ftp: loaded support on port[0] = 21 ip6_mc_input+0x514/0x11c0 net/ipv6/ip6_input.c:537 dst_input include/net/dst.h:450 [inline] ip6_rcv_finish+0x17a/0x330 net/ipv6/ip6_input.c:76 NF_HOOK include/linux/netfilter.h:289 [inline] ipv6_rcv+0x115/0x640 net/ipv6/ip6_input.c:272 __netif_receive_skb_one_core+0x14d/0x200 net/core/dev.c:4973 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:5083 process_backlog+0x24e/0x7a0 net/core/dev.c:5923 napi_poll net/core/dev.c:6346 [inline] net_rx_action+0x7fa/0x19b0 net/core/dev.c:6412 __do_softirq+0x308/0xb7e kernel/softirq.c:292 do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1027 do_softirq.part.14+0x126/0x160 kernel/softirq.c:337 do_softirq+0x19/0x20 kernel/softirq.c:340 netif_rx_ni+0x521/0x860 net/core/dev.c:4569 dev_loopback_xmit+0x287/0x8c0 net/core/dev.c:3576 NF_HOOK include/linux/netfilter.h:289 [inline] ip6_finish_output2+0x193a/0x2930 net/ipv6/ip6_output.c:84 ip6_fragment+0x2b06/0x3850 net/ipv6/ip6_output.c:727 ip6_finish_output+0x6b7/0xc50 net/ipv6/ip6_output.c:152 NF_HOOK_COND include/linux/netfilter.h:278 [inline] ip6_output+0x232/0x9d0 net/ipv6/ip6_output.c:171 dst_output include/net/dst.h:444 [inline] ip6_local_out+0xc5/0x1b0 net/ipv6/output_core.c:176 ip6_send_skb+0xbc/0x340 net/ipv6/ip6_output.c:1727 ip6_push_pending_frames+0xc5/0xf0 net/ipv6/ip6_output.c:1747 rawv6_push_pending_frames net/ipv6/raw.c:615 [inline] rawv6_sendmsg+0x3a3e/0x4b40 net/ipv6/raw.c:945 kobject: 'queues' (0000000089e6eea2): kobject_add_internal: parent: 'tunl0', set: '' kobject: 'queues' (0000000089e6eea2): kobject_uevent_env inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798 kobject: 'queues' (0000000089e6eea2): kobject_uevent_env: filter function caused the event to drop! sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:631 sock_write_iter+0x35e/0x5c0 net/socket.c:900 call_write_iter include/linux/fs.h:1857 [inline] new_sync_write fs/read_write.c:474 [inline] __vfs_write+0x6b8/0x9f0 fs/read_write.c:487 kobject: 'rx-0' (00000000e2d902d9): kobject_add_internal: parent: 'queues', set: 'queues' kobject: 'rx-0' (00000000e2d902d9): kobject_uevent_env vfs_write+0x1fc/0x560 fs/read_write.c:549 ksys_write+0x101/0x260 fs/read_write.c:598 kobject: 'rx-0' (00000000e2d902d9): fill_kobj_path: path = '/devices/virtual/net/tunl0/queues/rx-0' __do_sys_write fs/read_write.c:610 [inline] __se_sys_write fs/read_write.c:607 [inline] __x64_sys_write+0x73/0xb0 fs/read_write.c:607 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 kobject: 'tx-0' (00000000443b70ac): kobject_add_internal: parent: 'queues', set: 'queues' entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457669 Code: fd b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f9bd200bc78 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457669 RDX: 000000000000058f RSI: 00000000200033c0 RDI: 0000000000000003 kobject: 'tx-0' (00000000443b70ac): kobject_uevent_env RBP: 000000000072bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f9bd200c6d4 R13: 00000000004c2dcc R14: 00000000004da398 R15: 00000000ffffffff Allocated by task 1304: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 __do_kmalloc_node mm/slab.c:3684 [inline] __kmalloc_node_track_caller+0x50/0x70 mm/slab.c:3698 __kmalloc_reserve.isra.41+0x41/0xe0 net/core/skbuff.c:140 __alloc_skb+0x155/0x760 net/core/skbuff.c:208 kobject: 'tx-0' (00000000443b70ac): fill_kobj_path: path = '/devices/virtual/net/tunl0/queues/tx-0' alloc_skb include/linux/skbuff.h:1011 [inline] __ip6_append_data.isra.49+0x2f1a/0x3f50 net/ipv6/ip6_output.c:1450 ip6_append_data+0x1bc/0x2d0 net/ipv6/ip6_output.c:1619 rawv6_sendmsg+0x15ab/0x4b40 net/ipv6/raw.c:938 inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:631 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2116 __sys_sendmsg+0x11d/0x280 net/socket.c:2154 __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg net/socket.c:2161 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2161 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe kobject: 'gre0' (00000000cb1b2d7b): kobject_add_internal: parent: 'net', set: 'devices' Freed by task 1304: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kfree+0xcf/0x230 mm/slab.c:3817 skb_free_head+0x93/0xb0 net/core/skbuff.c:553 pskb_expand_head+0x3b2/0x10d0 net/core/skbuff.c:1498 __pskb_pull_tail+0x156/0x18a0 net/core/skbuff.c:1896 pskb_may_pull include/linux/skbuff.h:2188 [inline] _decode_session6+0xd11/0x14d0 net/ipv6/xfrm6_policy.c:150 __xfrm_decode_session+0x71/0x140 net/xfrm/xfrm_policy.c:3272 kobject: 'gre0' (00000000cb1b2d7b): kobject_uevent_env __xfrm_policy_check+0x380/0x2c40 net/xfrm/xfrm_policy.c:3322 __xfrm_policy_check2 include/net/xfrm.h:1170 [inline] xfrm_policy_check include/net/xfrm.h:1175 [inline] xfrm6_policy_check include/net/xfrm.h:1185 [inline] vti6_rcv+0x4bd/0x8f3 net/ipv6/ip6_vti.c:316 xfrm6_ipcomp_rcv+0x1a5/0x3a0 net/ipv6/xfrm6_protocol.c:132 ip6_protocol_deliver_rcu+0x372/0x1940 net/ipv6/ip6_input.c:394 ip6_input_finish+0x84/0x170 net/ipv6/ip6_input.c:434 NF_HOOK include/linux/netfilter.h:289 [inline] ip6_input+0xe9/0x600 net/ipv6/ip6_input.c:443 ip6_mc_input+0x514/0x11c0 net/ipv6/ip6_input.c:537 dst_input include/net/dst.h:450 [inline] ip6_rcv_finish+0x17a/0x330 net/ipv6/ip6_input.c:76 NF_HOOK include/linux/netfilter.h:289 [inline] ipv6_rcv+0x115/0x640 net/ipv6/ip6_input.c:272 __netif_receive_skb_one_core+0x14d/0x200 net/core/dev.c:4973 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:5083 process_backlog+0x24e/0x7a0 net/core/dev.c:5923 kobject: 'gre0' (00000000cb1b2d7b): fill_kobj_path: path = '/devices/virtual/net/gre0' napi_poll net/core/dev.c:6346 [inline] net_rx_action+0x7fa/0x19b0 net/core/dev.c:6412 __do_softirq+0x308/0xb7e kernel/softirq.c:292 The buggy address belongs to the object at ffff888191b8cac0 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 176 bytes inside of 512-byte region [ffff888191b8cac0, ffff888191b8ccc0) The buggy address belongs to the page: page:ffffea000646e300 count:1 mapcount:0 mapping:ffff8881da800940 index:0x0 flags: 0x2fffc0000000200(slab) raw: 02fffc0000000200 ffffea0006eaaa48 ffffea00065356c8 ffff8881da800940 raw: 0000000000000000 ffff888191b8c0c0 0000000100000006 0000000000000000 page dumped because: kasan: bad access detected kobject: 'queues' (000000005fd6226e): kobject_add_internal: parent: 'gre0', set: '' Memory state around the buggy address: ffff888191b8ca00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff888191b8ca80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb >ffff888191b8cb00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888191b8cb80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888191b8cc00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 0d3c703a9d17 ("ipv6: Cleanup IPv6 tunnel receive path") Fixes: ed1efb2aefbb ("ipv6: Add support for IPsec virtual tunnel interfaces") Signed-off-by: Eric Dumazet Cc: Steffen Klassert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_tunnel.c | 1 + net/ipv6/ip6_vti.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 948f304db0a3..1812c2a748ff 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -908,6 +908,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, goto drop; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; + ipv6h = ipv6_hdr(skb); if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) goto drop; if (iptunnel_pull_header(skb, 0, tpi->proto, false)) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index db5a24f09335..6b2416b4a53e 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -318,6 +318,7 @@ static int vti6_rcv(struct sk_buff *skb) return 0; } + ipv6h = ipv6_hdr(skb); if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) { t->dev->stats.rx_dropped++; rcu_read_unlock(); From 1e9f6b66b2f2633cd2cb2a66843f9c5e6f42e615 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Jan 2019 09:20:27 -0800 Subject: [PATCH 1184/2608] isdn: fix kernel-infoleak in capi_unlocked_ioctl [ Upstream commit d63967e475ae10f286dbd35e189cb241e0b1f284 ] Since capi_ioctl() copies 64 bytes after calling capi20_get_manufacturer() we need to ensure to not leak information to user. BUG: KMSAN: kernel-infoleak in _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32 CPU: 0 PID: 11245 Comm: syz-executor633 Not tainted 4.20.0-rc7+ #2 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:613 kmsan_internal_check_memory+0x9d4/0xb00 mm/kmsan/kmsan.c:704 kmsan_copy_to_user+0xab/0xc0 mm/kmsan/kmsan_hooks.c:601 _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32 capi_ioctl include/linux/uaccess.h:177 [inline] capi_unlocked_ioctl+0x1a0b/0x1bf0 drivers/isdn/capi/capi.c:939 do_vfs_ioctl+0xebd/0x2bf0 fs/ioctl.c:46 ksys_ioctl fs/ioctl.c:713 [inline] __do_sys_ioctl fs/ioctl.c:720 [inline] __se_sys_ioctl+0x1da/0x270 fs/ioctl.c:718 __x64_sys_ioctl+0x4a/0x70 fs/ioctl.c:718 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x440019 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffdd4659fb8 EFLAGS: 00000213 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440019 RDX: 0000000020000080 RSI: 00000000c0044306 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 0000000000000000 R09: 00000000004002c8 R10: 0000000000000000 R11: 0000000000000213 R12: 00000000004018a0 R13: 0000000000401930 R14: 0000000000000000 R15: 0000000000000000 Local variable description: ----data.i@capi_unlocked_ioctl Variable was created at: capi_ioctl drivers/isdn/capi/capi.c:747 [inline] capi_unlocked_ioctl+0x82/0x1bf0 drivers/isdn/capi/capi.c:939 do_vfs_ioctl+0xebd/0x2bf0 fs/ioctl.c:46 Bytes 12-63 of 64 are uninitialized Memory access of size 64 starts at ffff88807ac5fce8 Data copied to user address 0000000020000080 Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Karsten Keil Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/capi/kcapi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index 46c189ad8d94..28a1c6b5095d 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -851,7 +851,7 @@ u16 capi20_get_manufacturer(u32 contr, u8 *buf) u16 ret; if (contr == 0) { - strlcpy(buf, capi_manufakturer, CAPI_MANUFACTURER_LEN); + strncpy(buf, capi_manufakturer, CAPI_MANUFACTURER_LEN); return CAPI_NOERROR; } @@ -859,7 +859,7 @@ u16 capi20_get_manufacturer(u32 contr, u8 *buf) ctr = get_capi_ctr_by_nr(contr); if (ctr && ctr->state == CAPI_CTR_RUNNING) { - strlcpy(buf, ctr->manu, CAPI_MANUFACTURER_LEN); + strncpy(buf, ctr->manu, CAPI_MANUFACTURER_LEN); ret = CAPI_NOERROR; } else ret = CAPI_REGNOTINSTALLED; From 95b4b711444a4414bccfc0b1fe3c5096675ab8f7 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 13 Dec 2018 17:23:32 +0100 Subject: [PATCH 1185/2608] net: ipv4: do not handle duplicate fragments as overlapping [ Upstream commit ade446403bfb79d3528d56071a84b15351a139ad ] Since commit 7969e5c40dfd ("ip: discard IPv4 datagrams with overlapping segments.") IPv4 reassembly code drops the whole queue whenever an overlapping fragment is received. However, the test is written in a way which detects duplicate fragments as overlapping so that in environments with many duplicate packets, fragmented packets may be undeliverable. Add an extra test and for (potentially) duplicate fragment, only drop the new fragment rather than the whole queue. Only starting offset and length are checked, not the contents of the fragments as that would be too expensive. For similar reason, linear list ("run") of a rbtree node is not iterated, we only check if the new fragment is a subset of the interval covered by existing consecutive fragments. v2: instead of an exact check iterating through linear list of an rbtree node, only check if the new fragment is subset of the "run" (suggested by Eric Dumazet) Fixes: 7969e5c40dfd ("ip: discard IPv4 datagrams with overlapping segments.") Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index f686d7761acb..f8bbd693c19c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -347,10 +347,10 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) struct net *net = container_of(qp->q.net, struct net, ipv4.frags); struct rb_node **rbn, *parent; struct sk_buff *skb1, *prev_tail; + int ihl, end, skb1_run_end; struct net_device *dev; unsigned int fragsize; int flags, offset; - int ihl, end; int err = -ENOENT; u8 ecn; @@ -420,7 +420,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) * overlapping fragment, the entire datagram (and any constituent * fragments) MUST be silently discarded. * - * We do the same here for IPv4 (and increment an snmp counter). + * We do the same here for IPv4 (and increment an snmp counter) but + * we do not want to drop the whole queue in response to a duplicate + * fragment. */ /* Find out where to put this fragment. */ @@ -444,13 +446,17 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) do { parent = *rbn; skb1 = rb_to_skb(parent); + skb1_run_end = skb1->ip_defrag_offset + + FRAG_CB(skb1)->frag_run_len; if (end <= skb1->ip_defrag_offset) rbn = &parent->rb_left; - else if (offset >= skb1->ip_defrag_offset + - FRAG_CB(skb1)->frag_run_len) + else if (offset >= skb1_run_end) rbn = &parent->rb_right; - else /* Found an overlap with skb1. */ - goto discard_qp; + else if (offset >= skb1->ip_defrag_offset && + end <= skb1_run_end) + goto err; /* No new data, potential duplicate */ + else + goto discard_qp; /* Found an overlap */ } while (*rbn); /* Here we have parent properly set, and rbn pointing to * one of its NULL left/right children. Insert skb. From 6e12baae0001e0dee2c349b2d5cd8f3f463e3856 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 17 Dec 2018 10:02:42 +0000 Subject: [PATCH 1186/2608] net: macb: restart tx after tx used bit read [ Upstream commit 4298388574dae6168fa8940b3edc7ba965e8a7ab ] On some platforms (currently detected only on SAMA5D4) TX might stuck even the pachets are still present in DMA memories and TX start was issued for them. This happens due to race condition between MACB driver updating next TX buffer descriptor to be used and IP reading the same descriptor. In such a case, the "TX USED BIT READ" interrupt is asserted. GEM/MACB user guide specifies that if a "TX USED BIT READ" interrupt is asserted TX must be restarted. Restart TX if used bit is read and packets are present in software TX queue. Packets are removed from software TX queue if TX was successful for them (see macb_tx_interrupt()). Signed-off-by: Claudiu Beznea Acked-by: Nicolas Ferre Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb_main.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index d6f8d6c8b0f1..0b2f9ddfb1c4 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -60,7 +60,8 @@ #define MACB_TX_ERR_FLAGS (MACB_BIT(ISR_TUND) \ | MACB_BIT(ISR_RLE) \ | MACB_BIT(TXERR)) -#define MACB_TX_INT_FLAGS (MACB_TX_ERR_FLAGS | MACB_BIT(TCOMP)) +#define MACB_TX_INT_FLAGS (MACB_TX_ERR_FLAGS | MACB_BIT(TCOMP) \ + | MACB_BIT(TXUBR)) /* Max length of transmit frame must be a multiple of 8 bytes */ #define MACB_TX_LEN_ALIGN 8 @@ -1243,6 +1244,21 @@ static int macb_poll(struct napi_struct *napi, int budget) return work_done; } +static void macb_tx_restart(struct macb_queue *queue) +{ + unsigned int head = queue->tx_head; + unsigned int tail = queue->tx_tail; + struct macb *bp = queue->bp; + + if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) + queue_writel(queue, ISR, MACB_BIT(TXUBR)); + + if (head == tail) + return; + + macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); +} + static irqreturn_t macb_interrupt(int irq, void *dev_id) { struct macb_queue *queue = dev_id; @@ -1300,6 +1316,9 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) if (status & MACB_BIT(TCOMP)) macb_tx_interrupt(queue); + if (status & MACB_BIT(TXUBR)) + macb_tx_restart(queue); + /* Link change detection isn't possible with RMII, so we'll * add that if/when we get our hands on a full-blown MII PHY. */ From 309bc341e54e20ba41d8f88979d182661be97e35 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Tue, 18 Dec 2018 16:57:04 +0900 Subject: [PATCH 1187/2608] net: phy: Fix the issue that netif always links up after resuming [ Upstream commit 8742beb50f2db903d3b6d69ddd81d67ce9914453 ] Even though the link is down before entering hibernation, there is an issue that the network interface always links up after resuming from hibernation. If the link is still down before enabling the network interface, and after resuming from hibernation, the phydev->state is forcibly set to PHY_UP in mdio_bus_phy_restore(), and the link becomes up. In suspend sequence, only if the PHY is attached, mdio_bus_phy_suspend() calls phy_stop_machine(), and mdio_bus_phy_resume() calls phy_start_machine(). In resume sequence, it's enough to do the same as mdio_bus_phy_resume() because the state has been preserved. This patch fixes the issue by calling phy_start_machine() in mdio_bus_phy_restore() in the same way as mdio_bus_phy_resume(). Fixes: bc87922ff59d ("phy: Move PHY PM operations into phy_device") Suggested-by: Heiner Kallweit Signed-off-by: Kunihiko Hayashi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 5b56a86e88ff..c433be573e0d 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -159,11 +159,8 @@ static int mdio_bus_phy_restore(struct device *dev) if (ret < 0) return ret; - /* The PHY needs to renegotiate. */ - phydev->link = 0; - phydev->state = PHY_UP; - - phy_start_machine(phydev); + if (phydev->attached_dev && phydev->adjust_link) + phy_start_machine(phydev); return 0; } From bb98e55e8bb62627973732a3966d687977fcacb6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Dec 2018 13:56:38 -0800 Subject: [PATCH 1188/2608] netrom: fix locking in nr_find_socket() [ Upstream commit 7314f5480f3e37e570104dc5e0f28823ef849e72 ] nr_find_socket(), nr_find_peer() and nr_find_listener() lock the sock after finding it in the global list. However, the call path requires BH disabled for the sock lock consistently. Actually the locking is unnecessary at this point, we can just hold the sock refcnt to make sure it is not gone after we unlock the global list, and lock it later only when needed. Reported-and-tested-by: syzbot+f621cda8b7e598908efa@syzkaller.appspotmail.com Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netrom/af_netrom.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ebf16f7f9089..fc876b0c3e06 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -153,7 +153,7 @@ static struct sock *nr_find_listener(ax25_address *addr) sk_for_each(s, &nr_list) if (!ax25cmp(&nr_sk(s)->source_addr, addr) && s->sk_state == TCP_LISTEN) { - bh_lock_sock(s); + sock_hold(s); goto found; } s = NULL; @@ -174,7 +174,7 @@ static struct sock *nr_find_socket(unsigned char index, unsigned char id) struct nr_sock *nr = nr_sk(s); if (nr->my_index == index && nr->my_id == id) { - bh_lock_sock(s); + sock_hold(s); goto found; } } @@ -198,7 +198,7 @@ static struct sock *nr_find_peer(unsigned char index, unsigned char id, if (nr->your_index == index && nr->your_id == id && !ax25cmp(&nr->dest_addr, dest)) { - bh_lock_sock(s); + sock_hold(s); goto found; } } @@ -224,7 +224,7 @@ static unsigned short nr_find_next_circuit(void) if (i != 0 && j != 0) { if ((sk=nr_find_socket(i, j)) == NULL) break; - bh_unlock_sock(sk); + sock_put(sk); } id++; @@ -919,6 +919,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) } if (sk != NULL) { + bh_lock_sock(sk); skb_reset_transport_header(skb); if (frametype == NR_CONNACK && skb->len == 22) @@ -928,6 +929,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) ret = nr_process_rx_frame(sk, skb); bh_unlock_sock(sk); + sock_put(sk); return ret; } @@ -959,10 +961,12 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) (make = nr_make_new(sk)) == NULL) { nr_transmit_refusal(skb, 0); if (sk) - bh_unlock_sock(sk); + sock_put(sk); return 0; } + bh_lock_sock(sk); + window = skb->data[20]; skb->sk = make; @@ -1015,6 +1019,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) sk->sk_data_ready(sk); bh_unlock_sock(sk); + sock_put(sk); nr_insert_socket(make); From 65b3480236d80d69792b1ac378139459fb79afee Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Dec 2018 13:56:37 -0800 Subject: [PATCH 1189/2608] net/wan: fix a double free in x25_asy_open_tty() [ Upstream commit d5c7c745f254c6cb98b3b3f15fe789b8bd770c72 ] When x25_asy_open() fails, it already cleans up by itself, so its caller doesn't need to free the memory again. It seems we still have to call x25_asy_free() to clear the SLF_INUSE bit, so just set these pointers to NULL after kfree(). Reported-and-tested-by: syzbot+5e5e969e525129229052@syzkaller.appspotmail.com Fixes: 3b780bed3138 ("x25_asy: Free x25_asy on x25_asy_open() failure.") Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/wan/x25_asy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 40ee80c03c94..3eaefecd4448 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -485,8 +485,10 @@ static int x25_asy_open(struct net_device *dev) /* Cleanup */ kfree(sl->xbuff); + sl->xbuff = NULL; noxbuff: kfree(sl->rbuff); + sl->rbuff = NULL; norbuff: return -ENOMEM; } From 9eee85ed843fbec123e39a330495cfc4ca2b871f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 21 Dec 2018 12:06:59 -0500 Subject: [PATCH 1190/2608] packet: validate address length [ Upstream commit 99137b7888f4058087895d035d81c6b2d31015c5 ] Packet sockets with SOCK_DGRAM may pass an address for use in dev_hard_header. Ensure that it is of sufficient length. Reported-by: syzbot Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 88d5b2645bb0..e63d207047ce 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2665,6 +2665,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); + if (addr && dev && saddr->sll_halen < dev->addr_len) + goto out; } err = -ENXIO; @@ -2863,6 +2865,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); + if (addr && dev && saddr->sll_halen < dev->addr_len) + goto out; } err = -ENXIO; From 41783853bd1a62045c2474a3ab9f1682d5e470d8 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 22 Dec 2018 16:53:45 -0500 Subject: [PATCH 1191/2608] packet: validate address length if non-zero [ Upstream commit 6b8d95f1795c42161dc0984b6863e95d6acf24ed ] Validate packet socket address length if a length is given. Zero length is equivalent to not setting an address. Fixes: 99137b7888f4 ("packet: validate address length") Reported-by: Ido Schimmel Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e63d207047ce..91a323f99d47 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2663,7 +2663,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_addr; + addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); if (addr && dev && saddr->sll_halen < dev->addr_len) goto out; @@ -2863,7 +2863,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_addr; + addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); if (addr && dev && saddr->sll_halen < dev->addr_len) goto out; From e553166251bfd189758f3da6ba540ebdc32ac917 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 30 Dec 2018 12:43:42 -0800 Subject: [PATCH 1192/2608] ptr_ring: wrap back ->producer in __ptr_ring_swap_queue() [ Upstream commit aff6db454599d62191aabc208930e891748e4322 ] __ptr_ring_swap_queue() tries to move pointers from the old ring to the new one, but it forgets to check if ->producer is beyond the new size at the end of the operation. This leads to an out-of-bound access in __ptr_ring_produce() as reported by syzbot. Reported-by: syzbot+8993c0fa96d57c399735@syzkaller.appspotmail.com Fixes: 5d49de532002 ("ptr_ring: resize support") Cc: "Michael S. Tsirkin" Cc: John Fastabend Cc: Jason Wang Signed-off-by: Cong Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/ptr_ring.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index e8b12b79a0de..dc396196585a 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -551,6 +551,8 @@ static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, else if (destroy) destroy(ptr); + if (producer >= size) + producer = 0; __ptr_ring_set_size(r, size); r->producer = producer; r->consumer_head = 0; From e6625bf0ce1d9da98ba295bcf141236c417607de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rgen=20Storvist?= Date: Thu, 13 Dec 2018 17:00:35 +0100 Subject: [PATCH 1193/2608] qmi_wwan: Added support for Telit LN940 series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1986af16e8ed355822600c24b3d2f0be46b573df ] Added support for the Telit LN940 series cellular modules QMI interface. QMI_QUIRK_SET_DTR quirk requied for Qualcomm MDM9x40 chipset. Signed-off-by: Jörgen Storvist Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 11a25cef113f..d88e6a15022b 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1221,6 +1221,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1900, 1)}, /* Telit LN940 series */ {QMI_FIXED_INTF(0x1c9e, 0x9801, 3)}, /* Telewell TW-3G HSPA+ */ {QMI_FIXED_INTF(0x1c9e, 0x9803, 4)}, /* Telewell TW-3G HSPA+ */ {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */ From a98fbfc3e37e5ba11226ccf3c0aa15e0126d4ec7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 10 Dec 2018 18:00:52 +0800 Subject: [PATCH 1194/2608] sctp: initialize sin6_flowinfo for ipv6 addrs in sctp_inet6addr_event [ Upstream commit 4a2eb0c37b4759416996fbb4c45b932500cf06d3 ] syzbot reported a kernel-infoleak, which is caused by an uninitialized field(sin6_flowinfo) of addr->a.v6 in sctp_inet6addr_event(). The call trace is as below: BUG: KMSAN: kernel-infoleak in _copy_to_user+0x19a/0x230 lib/usercopy.c:33 CPU: 1 PID: 8164 Comm: syz-executor2 Not tainted 4.20.0-rc3+ #95 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x32d/0x480 lib/dump_stack.c:113 kmsan_report+0x12c/0x290 mm/kmsan/kmsan.c:683 kmsan_internal_check_memory+0x32a/0xa50 mm/kmsan/kmsan.c:743 kmsan_copy_to_user+0x78/0xd0 mm/kmsan/kmsan_hooks.c:634 _copy_to_user+0x19a/0x230 lib/usercopy.c:33 copy_to_user include/linux/uaccess.h:183 [inline] sctp_getsockopt_local_addrs net/sctp/socket.c:5998 [inline] sctp_getsockopt+0x15248/0x186f0 net/sctp/socket.c:7477 sock_common_getsockopt+0x13f/0x180 net/core/sock.c:2937 __sys_getsockopt+0x489/0x550 net/socket.c:1939 __do_sys_getsockopt net/socket.c:1950 [inline] __se_sys_getsockopt+0xe1/0x100 net/socket.c:1947 __x64_sys_getsockopt+0x62/0x80 net/socket.c:1947 do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 sin6_flowinfo is not really used by SCTP, so it will be fixed by simply setting it to 0. The issue exists since very beginning. Thanks Alexander for the reproducer provided. Reported-by: syzbot+ad5d327e6936a2e284be@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/ipv6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 853fecdf6374..8002a72aae1a 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -101,6 +101,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, if (addr) { addr->a.v6.sin6_family = AF_INET6; addr->a.v6.sin6_port = 0; + addr->a.v6.sin6_flowinfo = 0; addr->a.v6.sin6_addr = ifa->addr; addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; addr->valid = 1; From 3bade4b76a4a36e07699f1c7efe765407ba61854 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Dec 2018 15:28:56 -0800 Subject: [PATCH 1195/2608] tcp: fix a race in inet_diag_dump_icsk() [ Upstream commit f0c928d878e7d01b613c9ae5c971a6b1e473a938 ] Alexei reported use after frees in inet_diag_dump_icsk() [1] Because we use refcount_set() when various sockets are setup and inserted into ehash, we also need to make sure inet_diag_dump_icsk() wont race with the refcount_set() operations. Jonathan Lemon sent a patch changing net_twsk_hashdance() but other spots would need risky changes. Instead, fix inet_diag_dump_icsk() as this bug came with linux-4.10 only. [1] Quoting Alexei : First something iterating over sockets finds already freed tw socket: refcount_t: increment on 0; use-after-free. WARNING: CPU: 2 PID: 2738 at lib/refcount.c:153 refcount_inc+0x26/0x30 RIP: 0010:refcount_inc+0x26/0x30 RSP: 0018:ffffc90004c8fbc0 EFLAGS: 00010282 RAX: 000000000000002b RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff88085ee9d680 RSI: ffff88085ee954c8 RDI: ffff88085ee954c8 RBP: ffff88010ecbd2c0 R08: 0000000000000000 R09: 000000000000174c R10: ffffffff81e7c5a0 R11: 0000000000000000 R12: 0000000000000000 R13: ffff8806ba9bf210 R14: ffffffff82304600 R15: ffff88010ecbd328 FS: 00007f81f5a7d700(0000) GS:ffff88085ee80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f81e2a95000 CR3: 000000069b2eb006 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: inet_diag_dump_icsk+0x2b3/0x4e0 [inet_diag] // sock_hold(sk); in net/ipv4/inet_diag.c:1002 ? kmalloc_large_node+0x37/0x70 ? __kmalloc_node_track_caller+0x1cb/0x260 ? __alloc_skb+0x72/0x1b0 ? __kmalloc_reserve.isra.40+0x2e/0x80 __inet_diag_dump+0x3b/0x80 [inet_diag] netlink_dump+0x116/0x2a0 netlink_recvmsg+0x205/0x3c0 sock_read_iter+0x89/0xd0 __vfs_read+0xf7/0x140 vfs_read+0x8a/0x140 SyS_read+0x3f/0xa0 do_syscall_64+0x5a/0x100 then a minute later twsk timer fires and hits two bad refcnts for this freed socket: refcount_t: decrement hit 0; leaking memory. WARNING: CPU: 31 PID: 0 at lib/refcount.c:228 refcount_dec+0x2e/0x40 Modules linked in: RIP: 0010:refcount_dec+0x2e/0x40 RSP: 0018:ffff88085f5c3ea8 EFLAGS: 00010296 RAX: 000000000000002c RBX: ffff88010ecbd2c0 RCX: 000000000000083f RDX: 0000000000000000 RSI: 00000000000000f6 RDI: 000000000000003f RBP: ffffc90003c77280 R08: 0000000000000000 R09: 00000000000017d3 R10: ffffffff81e7c5a0 R11: 0000000000000000 R12: ffffffff82ad2d80 R13: ffffffff8182de00 R14: ffff88085f5c3ef8 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88085f5c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fbe42685250 CR3: 0000000002209001 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: inet_twsk_kill+0x9d/0xc0 // inet_twsk_bind_unhash(tw, hashinfo); call_timer_fn+0x29/0x110 run_timer_softirq+0x36b/0x3a0 refcount_t: underflow; use-after-free. WARNING: CPU: 31 PID: 0 at lib/refcount.c:187 refcount_sub_and_test+0x46/0x50 RIP: 0010:refcount_sub_and_test+0x46/0x50 RSP: 0018:ffff88085f5c3eb8 EFLAGS: 00010296 RAX: 0000000000000026 RBX: ffff88010ecbd2c0 RCX: 000000000000083f RDX: 0000000000000000 RSI: 00000000000000f6 RDI: 000000000000003f RBP: ffff88010ecbd358 R08: 0000000000000000 R09: 000000000000185b R10: ffffffff81e7c5a0 R11: 0000000000000000 R12: ffff88010ecbd358 R13: ffffffff8182de00 R14: ffff88085f5c3ef8 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88085f5c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fbe42685250 CR3: 0000000002209001 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: inet_twsk_put+0x12/0x20 // inet_twsk_put(tw); call_timer_fn+0x29/0x110 run_timer_softirq+0x36b/0x3a0 Fixes: 67db3e4bfbc9 ("tcp: no longer hold ehash lock while calling tcp_get_info()") Signed-off-by: Eric Dumazet Reported-by: Alexei Starovoitov Cc: Jonathan Lemon Acked-by: Jonathan Lemon Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_diag.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index c9c35b61a027..857ec3dbb742 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -991,7 +991,9 @@ next_chunk: if (!inet_diag_bc_sk(bc, sk)) goto next_normal; - sock_hold(sk); + if (!refcount_inc_not_zero(&sk->sk_refcnt)) + goto next_normal; + num_arr[accum] = num; sk_arr[accum] = sk; if (++accum == SKARR_SZ) From 3009452fb260459deff51533f284dd0d004cbbac Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 10 Dec 2018 12:45:45 -0800 Subject: [PATCH 1196/2608] tipc: fix a double kfree_skb() [ Upstream commit acb4a33e9856d5fa3384b87d3d8369229be06d31 ] tipc_udp_xmit() drops the packet on error, there is no need to drop it again. Fixes: ef20cd4dd163 ("tipc: introduce UDP replicast") Reported-and-tested-by: syzbot+eae585ba2cc2752d3704@syzkaller.appspotmail.com Cc: Ying Xue Cc: Jon Maloy Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/udp_media.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 3deabcab4882..8bf40cd9fbf1 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -243,10 +243,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, } err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr); - if (err) { - kfree_skb(_skb); + if (err) goto out; - } } err = 0; out: From 2510d91bdafe6ebe95864290866bb6c3e714b6e8 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 13 Dec 2018 10:53:37 +0800 Subject: [PATCH 1197/2608] vhost: make sure used idx is seen before log in vhost_add_used_n() [ Upstream commit 841df922417eb82c835e93d4b93eb6a68c99d599 ] We miss a write barrier that guarantees used idx is updated and seen before log. This will let userspace sync and copy used ring before used idx is update. Fix this by adding a barrier before log_write(). Fixes: 8dd014adfea6f ("vhost-net: mergeable buffers support") Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ffdd4e937d1d..97518685ab58 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2231,6 +2231,8 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, return -EFAULT; } if (unlikely(vq->log_used)) { + /* Make sure used idx is seen before log. */ + smp_wmb(); /* Log used index update. */ log_write(vq->log_base, vq->log_addr + offsetof(struct vring_used, idx), From 80f098070a63222f68f7f92262abfbdf5c4bc4a1 Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Tue, 18 Dec 2018 00:34:06 -0800 Subject: [PATCH 1198/2608] VSOCK: Send reset control packet when socket is partially bound [ Upstream commit a915b982d8f5e4295f64b8dd37ce753874867e88 ] If a server side socket is bound to an address, but not in the listening state yet, incoming connection requests should receive a reset control packet in response. However, the function used to send the reset silently drops the reset packet if the sending socket isn't bound to a remote address (as is the case for a bound socket not yet in the listening state). This change fixes this by using the src of the incoming packet as destination for the reset packet in this case. Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Reviewed-by: Adit Ranadive Reviewed-by: Vishnu Dasa Signed-off-by: Jorgen Hansen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/vmci_transport.c | 67 +++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 17 deletions(-) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index d5be519b0271..bf7c51644446 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -264,6 +264,31 @@ vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src, false); } +static int +vmci_transport_alloc_send_control_pkt(struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + struct vmci_transport_packet *pkt; + int err; + + pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + err = __vmci_transport_send_control_pkt(pkt, src, dst, type, size, + mode, wait, proto, handle, + true); + kfree(pkt); + + return err; +} + static int vmci_transport_send_control_pkt(struct sock *sk, enum vmci_transport_packet_type type, @@ -273,9 +298,7 @@ vmci_transport_send_control_pkt(struct sock *sk, u16 proto, struct vmci_handle handle) { - struct vmci_transport_packet *pkt; struct vsock_sock *vsk; - int err; vsk = vsock_sk(sk); @@ -285,17 +308,10 @@ vmci_transport_send_control_pkt(struct sock *sk, if (!vsock_addr_bound(&vsk->remote_addr)) return -EINVAL; - pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) - return -ENOMEM; - - err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr, - &vsk->remote_addr, type, size, - mode, wait, proto, handle, - true); - kfree(pkt); - - return err; + return vmci_transport_alloc_send_control_pkt(&vsk->local_addr, + &vsk->remote_addr, + type, size, mode, + wait, proto, handle); } static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst, @@ -313,12 +329,29 @@ static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst, static int vmci_transport_send_reset(struct sock *sk, struct vmci_transport_packet *pkt) { + struct sockaddr_vm *dst_ptr; + struct sockaddr_vm dst; + struct vsock_sock *vsk; + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) return 0; - return vmci_transport_send_control_pkt(sk, - VMCI_TRANSPORT_PACKET_TYPE_RST, - 0, 0, NULL, VSOCK_PROTO_INVALID, - VMCI_INVALID_HANDLE); + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + if (vsock_addr_bound(&vsk->remote_addr)) { + dst_ptr = &vsk->remote_addr; + } else { + vsock_addr_init(&dst, pkt->dg.src.context, + pkt->src_port); + dst_ptr = &dst; + } + return vmci_transport_alloc_send_control_pkt(&vsk->local_addr, dst_ptr, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); } static int vmci_transport_send_negotiate(struct sock *sk, size_t size) From f033fbac9f0ac57ac0d4c3bbd74a0c2c9e4342fc Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 18 Dec 2018 16:06:19 +0100 Subject: [PATCH 1199/2608] xen/netfront: tolerate frags with no data [ Upstream commit d81c5054a5d1d4999c7cdead7636b6cd4af83d36 ] At least old Xen net backends seem to send frags with no real data sometimes. In case such a fragment happens to occur with the frag limit already reached the frontend will BUG currently even if this situation is easily recoverable. Modify the BUG_ON() condition accordingly. Tested-by: Dietmar Hahn Signed-off-by: Juergen Gross Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 6ea95b316256..4af4e5c12d53 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -904,7 +904,7 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) { unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to; - BUG_ON(pull_to <= skb_headlen(skb)); + BUG_ON(pull_to < skb_headlen(skb)); __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); } if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) { From d5d0496e9456c1e16ea136bd85086ef50fd7c2ac Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Thu, 13 Dec 2018 02:26:46 +0200 Subject: [PATCH 1200/2608] net/mlx5: Typo fix in del_sw_hw_rule [ Upstream commit f0337889147c956721696553ffcc97212b0948fe ] Expression terminated with "," instead of ";", resulted in set_fte getting bad value for modify_enable_mask field. Fixes: bd5251dbf156 ("net/mlx5_core: Introduce flow steering destination of type counter") Signed-off-by: Yuval Avnery Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index dd05cf148845..6538b7b943f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -425,7 +425,7 @@ static void del_rule(struct fs_node *node) if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && --fte->dests_size) { - modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST), + modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); update_fte = true; } out: From f84ce33bf347ab52bf7bd1099526e7067640c6b8 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 2 Dec 2018 15:45:53 +0200 Subject: [PATCH 1201/2608] net/mlx5e: RX, Fix wrong early return in receive queue poll [ Upstream commit bfc698254ba97b3e3e4ebbfae0ffa1f7e2fa0717 ] When the completion queue of the RQ is empty, do not immediately return. If left-over decompressed CQEs (from the previous cycle) were processed, need to go to the finalization part of the poll function. Bug exists only when CQE compression is turned ON. This solves the following issue: mlx5_core 0000:82:00.1: mlx5_eq_int:544:(pid 0): CQ error on CQN 0xc08, syndrome 0x1 mlx5_core 0000:82:00.1 p4p2: mlx5e_cq_error_event: cqn=0x000c08 event=0x04 Fixes: 4b7dfc992514 ("net/mlx5e: Early-return on empty completion queues") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3d3fd03fa450..8b7b52c7512e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1072,7 +1072,7 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - struct mlx5e_xdpsq *xdpsq; + struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; struct mlx5_cqe64 *cqe; int work_done = 0; @@ -1083,10 +1083,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); cqe = mlx5_cqwq_get_cqe(&cq->wq); - if (!cqe) + if (!cqe) { + if (unlikely(work_done)) + goto out; return 0; - - xdpsq = &rq->xdpsq; + } do { if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { @@ -1101,6 +1102,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) rq->handle_rx_cqe(rq, cqe); } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); +out: if (xdpsq->db.doorbell) { mlx5e_xmit_xdp_doorbell(xdpsq); xdpsq->db.doorbell = false; From c207568b20b10ca609c852542a06865328fede4a Mon Sep 17 00:00:00 2001 From: Shalom Toledo Date: Tue, 18 Dec 2018 15:59:20 +0000 Subject: [PATCH 1202/2608] mlxsw: core: Increase timeout during firmware flash process [ Upstream commit cf0b70e71b32137ccf9c1f3dd9fb30cbf89b4322 ] During the firmware flash process, some of the EMADs get timed out, which causes the driver to send them again with a limit of 5 retries. There are some situations in which 5 retries is not enough and the EMAD access fails. If the failed EMAD was related to the flashing process, the driver fails the flashing. The reason for these timeouts during firmware flashing is cache misses in the CPU running the firmware. In case the CPU needs to fetch instructions from the flash when a firmware is flashed, it needs to wait for the flashing to complete. Since flashing takes time, it is possible for pending EMADs to timeout. Fix by increasing EMADs' timeout while flashing firmware. Fixes: ce6ef68f433f ("mlxsw: spectrum: Implement the ethtool flash_device callback") Signed-off-by: Shalom Toledo Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/core.c | 19 ++++++++++++++++++- drivers/net/ethernet/mellanox/mlxsw/core.h | 3 +++ .../net/ethernet/mellanox/mlxsw/spectrum.c | 7 ++++++- 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index f3315bc874ad..cced009da869 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -113,6 +113,7 @@ struct mlxsw_core { struct mlxsw_thermal *thermal; struct mlxsw_core_port *ports; unsigned int max_ports; + bool fw_flash_in_progress; unsigned long driver_priv[0]; /* driver_priv has to be always the last item */ }; @@ -460,12 +461,16 @@ struct mlxsw_reg_trans { struct rcu_head rcu; }; -#define MLXSW_EMAD_TIMEOUT_MS 200 +#define MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS 3000 +#define MLXSW_EMAD_TIMEOUT_MS 200 static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans) { unsigned long timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS); + if (trans->core->fw_flash_in_progress) + timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS); + queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, timeout); } @@ -1791,6 +1796,18 @@ void mlxsw_core_flush_owq(void) } EXPORT_SYMBOL(mlxsw_core_flush_owq); +void mlxsw_core_fw_flash_start(struct mlxsw_core *mlxsw_core) +{ + mlxsw_core->fw_flash_in_progress = true; +} +EXPORT_SYMBOL(mlxsw_core_fw_flash_start); + +void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core) +{ + mlxsw_core->fw_flash_in_progress = false; +} +EXPORT_SYMBOL(mlxsw_core_fw_flash_end); + static int __init mlxsw_core_module_init(void) { int err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 6e966af72fc4..3fa04da38fd0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -312,6 +312,9 @@ struct mlxsw_driver { const struct mlxsw_config_profile *profile; }; +void mlxsw_core_fw_flash_start(struct mlxsw_core *mlxsw_core); +void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core); + bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core, enum mlxsw_res_id res_id); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 18bb6798937b..84864fdcb0e8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -333,8 +333,13 @@ static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp, }, .mlxsw_sp = mlxsw_sp }; + int err; - return mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, firmware); + mlxsw_core_fw_flash_start(mlxsw_sp->core); + err = mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, firmware); + mlxsw_core_fw_flash_end(mlxsw_sp->core); + + return err; } static bool mlxsw_sp_fw_rev_ge(const struct mlxsw_fw_rev *a, From 08264146ae5989f301dd8efb67aad950e3d0c319 Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Sun, 25 Nov 2018 11:46:09 +0200 Subject: [PATCH 1203/2608] net/mlx5e: Remove the false indication of software timestamping support [ Upstream commit 4765420439e758bfa4808392d18b0a4cb6f06065 ] mlx5 driver falsely advertises support of software timestamping. Fix it by removing the false indication. Fixes: ef9814deafd0 ("net/mlx5e: Add HW timestamping (TS) support") Signed-off-by: Alaa Hleihel Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d12e9fc0d76b..d9db3ad3d765 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1417,21 +1417,15 @@ static int mlx5e_set_pauseparam(struct net_device *netdev, int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info) { - int ret; - - ret = ethtool_op_get_ts_info(priv->netdev, info); - if (ret) - return ret; - info->phc_index = priv->tstamp.ptp ? ptp_clock_index(priv->tstamp.ptp) : -1; if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) return 0; - info->so_timestamping |= SOF_TIMESTAMPING_TX_HARDWARE | - SOF_TIMESTAMPING_RX_HARDWARE | - SOF_TIMESTAMPING_RAW_HARDWARE; + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON); From f943aeb0a7d429ed4a6acf28b910e8955371e4e8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 10 Dec 2018 11:49:55 -0800 Subject: [PATCH 1204/2608] tipc: use lock_sock() in tipc_sk_reinit() [ Upstream commit 15ef70e286176165d28b0b8a969b422561a68dfc ] lock_sock() must be used in process context to be race-free with other lock_sock() callers, for example, tipc_release(). Otherwise using the spinlock directly can't serialize a parallel tipc_release(). As it is blocking, we have to hold the sock refcnt before rhashtable_walk_stop() and release it after rhashtable_walk_start(). Fixes: 07f6c4bc048a ("tipc: convert tipc reference table to use generic rhashtable") Reported-by: Dmitry Vyukov Cc: Ying Xue Cc: Jon Maloy Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/socket.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 4d2125d258fe..e5f9f43ff15b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2261,11 +2261,15 @@ void tipc_sk_reinit(struct net *net) goto walk_stop; while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { - spin_lock_bh(&tsk->sk.sk_lock.slock); + sock_hold(&tsk->sk); + rhashtable_walk_stop(&iter); + lock_sock(&tsk->sk); msg = &tsk->phdr; msg_set_prevnode(msg, tn->own_addr); msg_set_orignode(msg, tn->own_addr); - spin_unlock_bh(&tsk->sk.sk_lock.slock); + release_sock(&tsk->sk); + rhashtable_walk_start(&iter); + sock_put(&tsk->sk); } walk_stop: rhashtable_walk_stop(&iter); From 417483631811b7d642523f51e1b9d522edf897ad Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 10 Dec 2018 15:23:30 -0800 Subject: [PATCH 1205/2608] tipc: compare remote and local protocols in tipc_udp_enable() [ Upstream commit fb83ed496b9a654f60cd1d58a0e1e79ec5694808 ] When TIPC_NLA_UDP_REMOTE is an IPv6 mcast address but TIPC_NLA_UDP_LOCAL is an IPv4 address, a NULL-ptr deref is triggered as the UDP tunnel sock is initialized to IPv4 or IPv6 sock merely based on the protocol in local address. We should just error out when the remote address and local address have different protocols. Reported-by: syzbot+eb4da3a20fad2e52555d@syzkaller.appspotmail.com Cc: Ying Xue Cc: Jon Maloy Signed-off-by: Cong Wang Acked-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/udp_media.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 8bf40cd9fbf1..e3cff9d6c092 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -675,6 +675,11 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, if (err) goto err; + if (remote.proto != local.proto) { + err = -EINVAL; + goto err; + } + b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP; b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT; rcu_assign_pointer(b->media_ptr, ub); From 1283f1a4513029203936aecefd096e0914108830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rgen=20Storvist?= Date: Wed, 12 Dec 2018 22:45:34 +0100 Subject: [PATCH 1206/2608] qmi_wwan: Added support for Fibocom NL668 series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 110a1cc28bc383adb4885eff27e18c61ddebffb4 ] Added support for Fibocom NL668 series QMI interface. Using QMI_QUIRK_SET_DTR required for Qualcomm MDM9x07 chipsets. Signed-off-by: Jörgen Storvist Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index d88e6a15022b..1e7f2e5b74cd 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1109,6 +1109,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1435, 0xd181, 3)}, /* Wistron NeWeb D18Q1 */ {QMI_FIXED_INTF(0x1435, 0xd181, 4)}, /* Wistron NeWeb D18Q1 */ {QMI_FIXED_INTF(0x1435, 0xd181, 5)}, /* Wistron NeWeb D18Q1 */ + {QMI_QUIRK_SET_DTR(0x1508, 0x1001, 4)}, /* Fibocom NL668 series */ {QMI_FIXED_INTF(0x16d8, 0x6003, 0)}, /* CMOTech 6003 */ {QMI_FIXED_INTF(0x16d8, 0x6007, 0)}, /* CMOTech CHE-628S */ {QMI_FIXED_INTF(0x16d8, 0x6008, 0)}, /* CMOTech CMU-301 */ From 256e0b8a98a9fc541c3920c4e91a545bd2158f56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rgen=20Storvist?= Date: Fri, 21 Dec 2018 15:38:52 +0100 Subject: [PATCH 1207/2608] qmi_wwan: Add support for Fibocom NL678 series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7c3db4105ce8d69bcb5c04bfa9acd1e9119af8d5 ] Added support for Fibocom NL678 series cellular module QMI interface. Using QMI_QUIRK_SET_DTR required for Qualcomm MDM9x40 series chipsets. Signed-off-by: Jörgen Storvist Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 1e7f2e5b74cd..7e8f3b417530 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1258,6 +1258,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0306, 4)}, /* Quectel EP06 Mini PCIe */ + {QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ From 952dea5bc0b43bd26412455f32651f34f8e7ccff Mon Sep 17 00:00:00 2001 From: Myungho Jung Date: Tue, 18 Dec 2018 09:02:25 -0800 Subject: [PATCH 1208/2608] net/smc: fix TCP fallback socket release [ Upstream commit 78abe3d0dfad196959b1246003366e2610775ea6 ] clcsock can be released while kernel_accept() references it in TCP listen worker. Also, clcsock needs to wake up before released if TCP fallback is used and the clcsock is blocked by accept. Add a lock to safely release clcsock and call kernel_sock_shutdown() to wake up clcsock from accept in smc_release(). Reported-by: syzbot+0bf2e01269f1274b4b03@syzkaller.appspotmail.com Reported-by: syzbot+e3132895630f957306bc@syzkaller.appspotmail.com Signed-off-by: Myungho Jung Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/smc/af_smc.c | 14 ++++++++++++-- net/smc/smc.h | 4 ++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 43ef7be69428..8c71f0929fbb 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -133,8 +133,14 @@ static int smc_release(struct socket *sock) sk->sk_shutdown |= SHUTDOWN_MASK; } if (smc->clcsock) { + if (smc->use_fallback && sk->sk_state == SMC_LISTEN) { + /* wake up clcsock accept */ + rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); + } + mutex_lock(&smc->clcsock_release_lock); sock_release(smc->clcsock); smc->clcsock = NULL; + mutex_unlock(&smc->clcsock_release_lock); } /* detach socket */ @@ -184,6 +190,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock) INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work); sk->sk_prot->hash(sk); sk_refcnt_debug_inc(sk); + mutex_init(&smc->clcsock_release_lock); return sk; } @@ -577,7 +584,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) struct sock *sk = &lsmc->sk; struct socket *new_clcsock; struct sock *new_sk; - int rc; + int rc = -EINVAL; release_sock(&lsmc->sk); new_sk = smc_sock_alloc(sock_net(sk), NULL); @@ -590,7 +597,10 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) } *new_smc = smc_sk(new_sk); - rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); + mutex_lock(&lsmc->clcsock_release_lock); + if (lsmc->clcsock) + rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); + mutex_unlock(&lsmc->clcsock_release_lock); lock_sock(&lsmc->sk); if (rc < 0) { lsmc->sk.sk_err = -rc; diff --git a/net/smc/smc.h b/net/smc/smc.h index 0bee9d16cf29..926a97cc511a 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -185,6 +185,10 @@ struct smc_sock { /* smc sock container */ * started, waiting for unsent * data to be sent */ + struct mutex clcsock_release_lock; + /* protects clcsock of a listen + * socket + * */ }; static inline struct smc_sock *smc_sk(const struct sock *sk) From e5af70e98abbdbf7a22f897344d806494715cfb3 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Thu, 27 Dec 2018 18:55:09 -0800 Subject: [PATCH 1209/2608] sock: Make sock->sk_stamp thread-safe [ Upstream commit 3a0ed3e9619738067214871e9cb826fa23b2ddb9 ] Al Viro mentioned (Message-ID <20170626041334.GZ10672@ZenIV.linux.org.uk>) that there is probably a race condition lurking in accesses of sk_stamp on 32-bit machines. sock->sk_stamp is of type ktime_t which is always an s64. On a 32 bit architecture, we might run into situations of unsafe access as the access to the field becomes non atomic. Use seqlocks for synchronization. This allows us to avoid using spinlocks for readers as readers do not need mutual exclusion. Another approach to solve this is to require sk_lock for all modifications of the timestamps. The current approach allows for timestamps to have their own lock: sk_stamp_lock. This allows for the patch to not compete with already existing critical sections, and side effects are limited to the paths in the patch. The addition of the new field maintains the data locality optimizations from commit 9115e8cd2a0c ("net: reorganize struct sock for better data locality") Note that all the instances of the sk_stamp accesses are either through the ioctl or the syscall recvmsg. Signed-off-by: Deepa Dinamani Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sock.h | 38 +++++++++++++++++++++++++++++++++++--- net/compat.c | 15 +++++++++------ net/core/sock.c | 3 +++ net/sunrpc/svcsock.c | 2 +- 4 files changed, 48 insertions(+), 10 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 64a330544dad..4280e96d4b46 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -292,6 +292,7 @@ struct sock_common { * @sk_filter: socket filtering instructions * @sk_timer: sock cleanup timer * @sk_stamp: time stamp of last packet received + * @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only * @sk_tsflags: SO_TIMESTAMPING socket options * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_zckey: counter to order MSG_ZEROCOPY notifications @@ -457,6 +458,9 @@ struct sock { const struct cred *sk_peer_cred; long sk_rcvtimeo; ktime_t sk_stamp; +#if BITS_PER_LONG==32 + seqlock_t sk_stamp_seq; +#endif u16 sk_tsflags; u8 sk_shutdown; u32 sk_tskey; @@ -2201,6 +2205,34 @@ static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb) atomic_add(segs, &sk->sk_drops); } +static inline ktime_t sock_read_timestamp(struct sock *sk) +{ +#if BITS_PER_LONG==32 + unsigned int seq; + ktime_t kt; + + do { + seq = read_seqbegin(&sk->sk_stamp_seq); + kt = sk->sk_stamp; + } while (read_seqretry(&sk->sk_stamp_seq, seq)); + + return kt; +#else + return sk->sk_stamp; +#endif +} + +static inline void sock_write_timestamp(struct sock *sk, ktime_t kt) +{ +#if BITS_PER_LONG==32 + write_seqlock(&sk->sk_stamp_seq); + sk->sk_stamp = kt; + write_sequnlock(&sk->sk_stamp_seq); +#else + sk->sk_stamp = kt; +#endif +} + void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, @@ -2225,7 +2257,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE))) __sock_recv_timestamp(msg, sk, skb); else - sk->sk_stamp = kt; + sock_write_timestamp(sk, kt); if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid) __sock_recv_wifi_status(msg, sk, skb); @@ -2246,9 +2278,9 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY) __sock_recv_ts_and_drops(msg, sk, skb); else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) - sk->sk_stamp = skb->tstamp; + sock_write_timestamp(sk, skb->tstamp); else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP)) - sk->sk_stamp = 0; + sock_write_timestamp(sk, 0); } void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); diff --git a/net/compat.c b/net/compat.c index 32ed993588d6..790851e70dab 100644 --- a/net/compat.c +++ b/net/compat.c @@ -462,12 +462,14 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) err = -ENOENT; if (!sock_flag(sk, SOCK_TIMESTAMP)) sock_enable_timestamp(sk, SOCK_TIMESTAMP); - tv = ktime_to_timeval(sk->sk_stamp); + tv = ktime_to_timeval(sock_read_timestamp(sk)); + if (tv.tv_sec == -1) return err; if (tv.tv_sec == 0) { - sk->sk_stamp = ktime_get_real(); - tv = ktime_to_timeval(sk->sk_stamp); + ktime_t kt = ktime_get_real(); + sock_write_timestamp(sk, kt); + tv = ktime_to_timeval(kt); } err = 0; if (put_user(tv.tv_sec, &ctv->tv_sec) || @@ -490,12 +492,13 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta err = -ENOENT; if (!sock_flag(sk, SOCK_TIMESTAMP)) sock_enable_timestamp(sk, SOCK_TIMESTAMP); - ts = ktime_to_timespec(sk->sk_stamp); + ts = ktime_to_timespec(sock_read_timestamp(sk)); if (ts.tv_sec == -1) return err; if (ts.tv_sec == 0) { - sk->sk_stamp = ktime_get_real(); - ts = ktime_to_timespec(sk->sk_stamp); + ktime_t kt = ktime_get_real(); + sock_write_timestamp(sk, kt); + ts = ktime_to_timespec(kt); } err = 0; if (put_user(ts.tv_sec, &ctv->tv_sec) || diff --git a/net/core/sock.c b/net/core/sock.c index 36f19458e2fe..01cae48d6eef 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2730,6 +2730,9 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_stamp = SK_DEFAULT_STAMP; +#if BITS_PER_LONG==32 + seqlock_init(&sk->sk_stamp_seq); +#endif atomic_set(&sk->sk_zckey, 0); #ifdef CONFIG_NET_RX_BUSY_POLL diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index ff8e06cd067e..c83df30e9655 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -585,7 +585,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) /* Don't enable netstamp, sunrpc doesn't need that much accuracy */ } - svsk->sk_sk->sk_stamp = skb->tstamp; + sock_write_timestamp(svsk->sk_sk, skb->tstamp); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ len = skb->len; From 9eb8b278cb191e8eaeb83eee17cff8cd37fcf0af Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Wed, 28 Nov 2018 10:19:36 -0800 Subject: [PATCH 1210/2608] IB/hfi1: Incorrect sizing of sge for PIO will OOPs commit dbc2970caef74e8ff41923d302aa6fb5a4812d0e upstream. An incorrect sge sizing in the HFI PIO path will cause an OOPs similar to this: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] hfi1_verbs_send_pio+0x3d8/0x530 [hfi1] PGD 0 Oops: 0000 1 SMP Call Trace: ? hfi1_verbs_send_dma+0xad0/0xad0 [hfi1] hfi1_verbs_send+0xdf/0x250 [hfi1] ? make_rc_ack+0xa80/0xa80 [hfi1] hfi1_do_send+0x192/0x430 [hfi1] hfi1_do_send_from_rvt+0x10/0x20 [hfi1] rvt_post_send+0x369/0x820 [rdmavt] ib_uverbs_post_send+0x317/0x570 [ib_uverbs] ib_uverbs_write+0x26f/0x420 [ib_uverbs] ? security_file_permission+0x21/0xa0 vfs_write+0xbd/0x1e0 ? mntput+0x24/0x40 SyS_write+0x7f/0xe0 system_call_fastpath+0x16/0x1b Fix by adding the missing sizing check to correctly determine the sge length. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 12cf0f7ca7bb..2e8854ba18cf 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1123,6 +1123,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (slen > len) slen = len; + if (slen > ss->sge.sge_length) + slen = ss->sge.sge_length; rvt_update_sge(ss, slen, false); seg_pio_copy_mid(pbuf, addr, slen); len -= slen; From 076097b2847be84be34e70ae6f19690b28b2d122 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 18 Dec 2018 11:18:34 -0600 Subject: [PATCH 1211/2608] ALSA: rme9652: Fix potential Spectre v1 vulnerability commit 0b84304ef5da92add8dc75a1b07879c5374cdb05 upstream. info->channel is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: sound/pci/rme9652/hdsp.c:4100 snd_hdsp_channel_info() warn: potential spectre issue 'hdsp->channel_map' [r] (local cap) Fix this by sanitizing info->channel before using it to index hdsp->channel_map Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. Also, notice that I refactored the code a bit in order to get rid of the following checkpatch warning: ERROR: do not use assignment in if condition FILE: sound/pci/rme9652/hdsp.c:4103: if ((mapped_channel = hdsp->channel_map[info->channel]) < 0) [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/rme9652/hdsp.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index 9f0f73875f01..e41bb4100306 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -4092,15 +4093,16 @@ static int snd_hdsp_channel_info(struct snd_pcm_substream *substream, struct snd_pcm_channel_info *info) { struct hdsp *hdsp = snd_pcm_substream_chip(substream); - int mapped_channel; + unsigned int channel = info->channel; - if (snd_BUG_ON(info->channel >= hdsp->max_channels)) + if (snd_BUG_ON(channel >= hdsp->max_channels)) + return -EINVAL; + channel = array_index_nospec(channel, hdsp->max_channels); + + if (hdsp->channel_map[channel] < 0) return -EINVAL; - if ((mapped_channel = hdsp->channel_map[info->channel]) < 0) - return -EINVAL; - - info->offset = mapped_channel * HDSP_CHANNEL_BUFFER_BYTES; + info->offset = hdsp->channel_map[channel] * HDSP_CHANNEL_BUFFER_BYTES; info->first = 0; info->step = 32; return 0; From c4d65a3afd071608e1b9b86112a4f0cf01932e4d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 18 Dec 2018 11:52:16 -0600 Subject: [PATCH 1212/2608] ALSA: emu10k1: Fix potential Spectre v1 vulnerabilities commit 5ae4f61f012a097df93de2285070ec8e34716d29 upstream. ipcm->substream is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: sound/pci/emu10k1/emufx.c:1031 snd_emu10k1_ipcm_poke() warn: potential spectre issue 'emu->fx8010.pcm' [r] (local cap) sound/pci/emu10k1/emufx.c:1075 snd_emu10k1_ipcm_peek() warn: potential spectre issue 'emu->fx8010.pcm' [r] (local cap) Fix this by sanitizing ipcm->substream before using it to index emu->fx8010.pcm Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/emu10k1/emufx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c index d68bb40d3676..5c00e3536767 100644 --- a/sound/pci/emu10k1/emufx.c +++ b/sound/pci/emu10k1/emufx.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -1033,6 +1034,8 @@ static int snd_emu10k1_ipcm_poke(struct snd_emu10k1 *emu, if (ipcm->substream >= EMU10K1_FX8010_PCM_COUNT) return -EINVAL; + ipcm->substream = array_index_nospec(ipcm->substream, + EMU10K1_FX8010_PCM_COUNT); if (ipcm->channels > 32) return -EINVAL; pcm = &emu->fx8010.pcm[ipcm->substream]; @@ -1079,6 +1082,8 @@ static int snd_emu10k1_ipcm_peek(struct snd_emu10k1 *emu, if (ipcm->substream >= EMU10K1_FX8010_PCM_COUNT) return -EINVAL; + ipcm->substream = array_index_nospec(ipcm->substream, + EMU10K1_FX8010_PCM_COUNT); pcm = &emu->fx8010.pcm[ipcm->substream]; mutex_lock(&emu->fx8010.lock); spin_lock_irq(&emu->reg_lock); From bbf036122f6400396febc4e545dc24ebf35fa341 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 12 Dec 2018 15:36:28 -0600 Subject: [PATCH 1213/2608] ALSA: pcm: Fix potential Spectre v1 vulnerability commit 94ffb030b6d31ec840bb811be455dd2e26a4f43e upstream. stream is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: sound/core/pcm.c:140 snd_pcm_control_ioctl() warn: potential spectre issue 'pcm->streams' [r] (local cap) Fix this by sanitizing stream before using it to index pcm->streams Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Signed-off-by: Gustavo A. R. Silva Cc: stable@vger.kernel.org Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/pcm.c b/sound/core/pcm.c index e8dc1a5afe66..2b5caa8dea2e 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -129,6 +130,7 @@ static int snd_pcm_control_ioctl(struct snd_card *card, return -EFAULT; if (stream < 0 || stream > 1) return -EINVAL; + stream = array_index_nospec(stream, 2); if (get_user(subdevice, &info->subdevice)) return -EFAULT; mutex_lock(®ister_mutex); From a53ba068e2db23e44df4020ef6e7eaf33743e24a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 12 Dec 2018 11:20:49 -0600 Subject: [PATCH 1214/2608] ALSA: emux: Fix potential Spectre v1 vulnerabilities commit 4aea96f4237cea0c51a8bc87c0db31f0f932f1f0 upstream. info.mode and info.port are indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. These issues were detected with the help of Smatch: sound/synth/emux/emux_hwdep.c:72 snd_emux_hwdep_misc_mode() warn: potential spectre issue 'emu->portptrs[i]->ctrls' [w] (local cap) sound/synth/emux/emux_hwdep.c:75 snd_emux_hwdep_misc_mode() warn: potential spectre issue 'emu->portptrs' [w] (local cap) sound/synth/emux/emux_hwdep.c:75 snd_emux_hwdep_misc_mode() warn: potential spectre issue 'emu->portptrs[info.port]->ctrls' [w] (local cap) Fix this by sanitizing both info.mode and info.port before using them to index emu->portptrs[i]->ctrls, emu->portptrs[info.port]->ctrls and emu->portptrs. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Signed-off-by: Gustavo A. R. Silva Cc: stable@vger.kernel.org Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/synth/emux/emux_hwdep.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/synth/emux/emux_hwdep.c b/sound/synth/emux/emux_hwdep.c index e557946718a9..d9fcae071b47 100644 --- a/sound/synth/emux/emux_hwdep.c +++ b/sound/synth/emux/emux_hwdep.c @@ -22,9 +22,9 @@ #include #include #include +#include #include "emux_voice.h" - #define TMP_CLIENT_ID 0x1001 /* @@ -66,13 +66,16 @@ snd_emux_hwdep_misc_mode(struct snd_emux *emu, void __user *arg) return -EFAULT; if (info.mode < 0 || info.mode >= EMUX_MD_END) return -EINVAL; + info.mode = array_index_nospec(info.mode, EMUX_MD_END); if (info.port < 0) { for (i = 0; i < emu->num_ports; i++) emu->portptrs[i]->ctrls[info.mode] = info.value; } else { - if (info.port < emu->num_ports) + if (info.port < emu->num_ports) { + info.port = array_index_nospec(info.port, emu->num_ports); emu->portptrs[info.port]->ctrls[info.mode] = info.value; + } } return 0; } From fb7ccf9daebbe021794eeae08c79538b6471c090 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 10 Dec 2018 21:38:16 +0100 Subject: [PATCH 1215/2608] mtd: atmel-quadspi: disallow building on ebsa110 commit 2a9d92fb3a1282a4659f1bb6d5684018846537b7 upstream. I ran into a link-time error with the atmel-quadspi driver on the EBSA110 platform: drivers/mtd/built-in.o: In function `atmel_qspi_run_command': :(.text+0x1ee3c): undefined reference to `_memcpy_toio' :(.text+0x1ee48): undefined reference to `_memcpy_fromio' The problem is that _memcpy_toio/_memcpy_fromio are not available on that platform, and we have to prevent building the driver there. In case we want to backport this to older kernels: between linux-4.8 and linux-4.20, the Kconfig entry was in drivers/mtd/spi-nor/Kconfig but had the same problem. Link: https://lore.kernel.org/patchwork/patch/812860/ Fixes: 161aaab8a067 ("mtd: atmel-quadspi: add driver for Atmel QSPI controller") Signed-off-by: Arnd Bergmann Reviewed-by: Boris Brezillon Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/Kconfig b/drivers/mtd/spi-nor/Kconfig index 69c638dd0484..14e8967c1dea 100644 --- a/drivers/mtd/spi-nor/Kconfig +++ b/drivers/mtd/spi-nor/Kconfig @@ -41,7 +41,7 @@ config SPI_ASPEED_SMC config SPI_ATMEL_QUADSPI tristate "Atmel Quad SPI Controller" - depends on ARCH_AT91 || (ARM && COMPILE_TEST) + depends on ARCH_AT91 || (ARM && COMPILE_TEST && !ARCH_EBSA110) depends on OF && HAS_IOMEM help This enables support for the Quad SPI controller in master mode. From 91056ae41b44e84057457aee0c1bf30dcc565c8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mantas=20Mikul=C4=97nas?= Date: Sun, 16 Dec 2018 15:44:47 +0200 Subject: [PATCH 1216/2608] ALSA: hda: add mute LED support for HP EliteBook 840 G4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 40906ebe3af6a48457151b3c6726b480f6a6cb13 upstream. Tested with 4.19.9. v2: Changed from CXT_FIXUP_MUTE_LED_GPIO to CXT_FIXUP_HP_DOCK because that's what the existing fixups for EliteBooks use. Signed-off-by: Mantas Mikulėnas Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 0cc0ced1f2ed..0a225dc85044 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -961,6 +961,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x807C, "HP EliteBook 820 G3", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x80FD, "HP ProBook 640 G2", CXT_FIXUP_HP_DOCK), + SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x83b3, "HP EliteBook 830 G5", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x83d3, "HP ProBook 640 G4", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE), From 47cafb1329697c3217b99b8f7373ac93b1c2d80b Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sat, 15 Dec 2018 19:06:48 +0900 Subject: [PATCH 1217/2608] ALSA: fireface: fix for state to fetch PCM frames commit 3d16200a3e55a39caa1c88419cb559c00316f721 upstream. According to my memo at hand and saved records, writing 0x00000001 to SND_FF_REG_FETCH_PCM_FRAMES disables fetching PCM frames in corresponding channel, however current implement uses reversed logic. This results in muted volume in device side during playback. This commit corrects the bug. Cc: # v4.12+ Fixes: 76fdb3a9e13a ('ALSA: fireface: add support for Fireface 400') Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/fireface/ff-protocol-ff400.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/fireface/ff-protocol-ff400.c b/sound/firewire/fireface/ff-protocol-ff400.c index b47954a6b8ab..bb0c1abf4718 100644 --- a/sound/firewire/fireface/ff-protocol-ff400.c +++ b/sound/firewire/fireface/ff-protocol-ff400.c @@ -152,7 +152,7 @@ static int ff400_switch_fetching_mode(struct snd_ff *ff, bool enable) if (reg == NULL) return -ENOMEM; - if (enable) { + if (!enable) { /* * Each quadlet is corresponding to data channels in a data * blocks in reverse order. Precisely, quadlets for available From deec542ba516f655bcde3f05b3ba867264457889 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sat, 15 Dec 2018 19:03:19 +0900 Subject: [PATCH 1218/2608] ALSA: firewire-lib: fix wrong handling payload_length as payload_quadlet commit ada79fa5a0b374dd2c2262137c734da7524a8263 upstream. In IEC 61883-1/6 engine of ALSA firewire stack, a packet handler has a second argument for 'the number of bytes in payload of isochronous packet'. However, an incoming packet handler without CIP header uses the value as 'the number of quadlets in the payload'. This brings userspace applications to receive the number of PCM frames as four times against real time. This commit fixes the bug. Cc: # v4.12+ Fixes: 3b196c394dd ('ALSA: firewire-lib: add no-header packet processing') Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/amdtp-stream.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index d7d47dc8b5f1..bca1ce8f1c54 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -629,15 +629,17 @@ end: } static int handle_in_packet_without_header(struct amdtp_stream *s, - unsigned int payload_quadlets, unsigned int cycle, + unsigned int payload_length, unsigned int cycle, unsigned int index) { __be32 *buffer; + unsigned int payload_quadlets; unsigned int data_blocks; struct snd_pcm_substream *pcm; unsigned int pcm_frames; buffer = s->buffer.packets[s->packet_index].buffer; + payload_quadlets = payload_length / 4; data_blocks = payload_quadlets / s->data_block_quadlets; trace_in_packet_without_header(s, cycle, payload_quadlets, data_blocks, From cb3343d81495c82b36b907a0430c5cc230d15d55 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sat, 15 Dec 2018 19:03:20 +0900 Subject: [PATCH 1219/2608] ALSA: firewire-lib: fix wrong assignment for 'out_packet_without_header' tracepoint commit aa9a9e39b4f65733bf19d90cbd026e85a74efb99 upstream. An initial commit to add tracepoints for packets without CIP headers introduces a wrong assignment to 'data_blocks' value of 'out_packet_without_header' tracepoint. This commit fixes the bug. Cc: # v4.12+ Fixes: b164d2fd6e49 ('ALSA: firewire_lib: add tracepoints for packets without CIP headers') Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/amdtp-stream-trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/amdtp-stream-trace.h b/sound/firewire/amdtp-stream-trace.h index ea0d486652c8..636a9e725205 100644 --- a/sound/firewire/amdtp-stream-trace.h +++ b/sound/firewire/amdtp-stream-trace.h @@ -169,7 +169,7 @@ TRACE_EVENT(out_packet_without_header, __entry->dest = fw_parent_device(s->unit)->node_id; __entry->payload_quadlets = payload_length / 4; __entry->data_blocks = data_blocks, - __entry->data_blocks = s->data_block_counter, + __entry->data_block_counter = s->data_block_counter, __entry->packet_index = s->packet_index; __entry->irq = !!in_interrupt(); __entry->index = index; From 1b7682a02161371a0cd85096da932c864165c552 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sat, 15 Dec 2018 19:03:21 +0900 Subject: [PATCH 1220/2608] ALSA: firewire-lib: use the same print format for 'without_header' tracepoints commit 5ef108c53e6efd695e32aad969638ccbc35b4be9 upstream. An initial commit to add tracepoints for packets without CIP headers uses different print formats for added tracepoints. However this is not convenient for users/developers to prepare debug tools. This commit uses the same format for the two tracepoints. Cc: # v4.12+ Fixes: b164d2fd6e49 ('ALSA: firewire_lib: add tracepoints for packets without CIP headers') Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/amdtp-stream-trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/amdtp-stream-trace.h b/sound/firewire/amdtp-stream-trace.h index 636a9e725205..ed704cd37b5f 100644 --- a/sound/firewire/amdtp-stream-trace.h +++ b/sound/firewire/amdtp-stream-trace.h @@ -131,7 +131,7 @@ TRACE_EVENT(in_packet_without_header, __entry->index = index; ), TP_printk( - "%02u %04u %04x %04x %02d %03u %3u %3u %02u %01u %02u", + "%02u %04u %04x %04x %02d %03u %02u %03u %02u %01u %02u", __entry->second, __entry->cycle, __entry->src, From 30930698ba795914c602adbe52e4106db92d6ae6 Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Wed, 26 Dec 2018 16:04:49 +0530 Subject: [PATCH 1221/2608] ALSA: hda/tegra: clear pending irq handlers commit 63d2a9ec310d8bcc955574220d4631aa55c1a80c upstream. Even after disabling interrupts on the module, it could be possible that irq handlers are still running. System hang is seen during suspend path. It was found that, there were pending writes on the HDA bus and clock was disabled by that time. Above mentioned issue is fixed by clearing any pending irq handlers before disabling clocks and returning from hda suspend. Suggested-by: Mohan Kumar Suggested-by: Dara Ramesh Signed-off-by: Sameer Pujar Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_tegra.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c index 0621920f7617..e85fb04ec7be 100644 --- a/sound/pci/hda/hda_tegra.c +++ b/sound/pci/hda/hda_tegra.c @@ -249,10 +249,12 @@ static int hda_tegra_suspend(struct device *dev) struct snd_card *card = dev_get_drvdata(dev); struct azx *chip = card->private_data; struct hda_tegra *hda = container_of(chip, struct hda_tegra, chip); + struct hdac_bus *bus = azx_bus(chip); snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); azx_stop_chip(chip); + synchronize_irq(bus->irq); azx_enter_link_reset(chip); hda_tegra_disable_clocks(hda); From e7d9ee97e44181f1019366be4a55cb23e7150d6d Mon Sep 17 00:00:00 2001 From: Scott Chen Date: Thu, 13 Dec 2018 06:01:47 -0500 Subject: [PATCH 1222/2608] USB: serial: pl2303: add ids for Hewlett-Packard HP POS pole displays commit 8d503f206c336677954160ac62f0c7d9c219cd89 upstream. Add device ids to pl2303 for the HP POS pole displays: LM920: 03f0:026b TD620: 03f0:0956 LD960TA: 03f0:4439 LD220TA: 03f0:4349 LM940: 03f0:5039 Signed-off-by: Scott Chen Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 5 +++++ drivers/usb/serial/pl2303.h | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 2153e67eeeee..5fa1e6fb49a6 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -94,9 +94,14 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(YCCABLE_VENDOR_ID, YCCABLE_PRODUCT_ID) }, { USB_DEVICE(SUPERIAL_VENDOR_ID, SUPERIAL_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD220_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LD220TA_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD960_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LD960TA_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LCM960_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LM920_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LM940_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_TD620_PRODUCT_ID) }, { USB_DEVICE(CRESSI_VENDOR_ID, CRESSI_EDY_PRODUCT_ID) }, { USB_DEVICE(ZEAGLE_VENDOR_ID, ZEAGLE_N2ITION3_PRODUCT_ID) }, { USB_DEVICE(SONY_VENDOR_ID, SONY_QN3USB_PRODUCT_ID) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index cec7141245ef..b46e74a90af2 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -124,10 +124,15 @@ /* Hewlett-Packard POS Pole Displays */ #define HP_VENDOR_ID 0x03f0 +#define HP_LM920_PRODUCT_ID 0x026b +#define HP_TD620_PRODUCT_ID 0x0956 #define HP_LD960_PRODUCT_ID 0x0b39 #define HP_LCM220_PRODUCT_ID 0x3139 #define HP_LCM960_PRODUCT_ID 0x3239 #define HP_LD220_PRODUCT_ID 0x3524 +#define HP_LD220TA_PRODUCT_ID 0x4349 +#define HP_LD960TA_PRODUCT_ID 0x4439 +#define HP_LM940_PRODUCT_ID 0x5039 /* Cressi Edy (diving computer) PC interface */ #define CRESSI_VENDOR_ID 0x04b8 From f99dfabbd7bd92e5cde239d284ce2ec09c51b658 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rgen=20Storvist?= Date: Fri, 21 Dec 2018 14:40:44 +0100 Subject: [PATCH 1223/2608] USB: serial: option: add Fibocom NL678 series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4b2c01ad902ec02fa962b233decd2f14be3714ba upstream. Added USB serial option driver support for Fibocom NL678 series cellular module: VID 2cb7 and PIDs 0x0104 and 0x0105. Reserved network and ADB interfaces. T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=0104 Rev=03.10 S: Manufacturer=Fibocom S: Product=Fibocom NL678-E Modem S: SerialNumber=12345678 C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=0105 Rev=03.10 S: Manufacturer=Fibocom S: Product=Fibocom NL678-E Modem S: SerialNumber=12345678 C: #Ifs= 7 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether I: If#= 5 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) Signed-off-by: Jörgen Storvist Cc: stable Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 988be9ca2b4f..8cdca3f7acaa 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1957,6 +1957,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x1b) }, { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 */ .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, + { USB_DEVICE(0x2cb7, 0x0104), /* Fibocom NL678 series */ + .driver_info = RSVD(4) | RSVD(5) }, + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ + .driver_info = RSVD(6) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); From 55b3d640c3d1e455318f3cbdce184ededa4f32e9 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Tue, 18 Dec 2018 20:04:25 +0800 Subject: [PATCH 1224/2608] usb: r8a66597: Fix a possible concurrency use-after-free bug in r8a66597_endpoint_disable() commit c85400f886e3d41e69966470879f635a2b50084c upstream. The function r8a66597_endpoint_disable() and r8a66597_urb_enqueue() may be concurrently executed. The two functions both access a possible shared variable "hep->hcpriv". This shared variable is freed by r8a66597_endpoint_disable() via the call path: r8a66597_endpoint_disable kfree(hep->hcpriv) (line 1995 in Linux-4.19) This variable is read by r8a66597_urb_enqueue() via the call path: r8a66597_urb_enqueue spin_lock_irqsave(&r8a66597->lock) init_pipe_info enable_r8a66597_pipe pipe = hep->hcpriv (line 802 in Linux-4.19) The read operation is protected by a spinlock, but the free operation is not protected by this spinlock, thus a concurrency use-after-free bug may occur. To fix this bug, the spin-lock and spin-unlock function calls in r8a66597_endpoint_disable() are moved to protect the free operation. Signed-off-by: Jia-Ju Bai Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/r8a66597-hcd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c index 5e5fc9d7d533..4c6b31a4a263 100644 --- a/drivers/usb/host/r8a66597-hcd.c +++ b/drivers/usb/host/r8a66597-hcd.c @@ -1990,6 +1990,8 @@ static int r8a66597_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, static void r8a66597_endpoint_disable(struct usb_hcd *hcd, struct usb_host_endpoint *hep) +__acquires(r8a66597->lock) +__releases(r8a66597->lock) { struct r8a66597 *r8a66597 = hcd_to_r8a66597(hcd); struct r8a66597_pipe *pipe = (struct r8a66597_pipe *)hep->hcpriv; @@ -2002,13 +2004,14 @@ static void r8a66597_endpoint_disable(struct usb_hcd *hcd, return; pipenum = pipe->info.pipenum; + spin_lock_irqsave(&r8a66597->lock, flags); if (pipenum == 0) { kfree(hep->hcpriv); hep->hcpriv = NULL; + spin_unlock_irqrestore(&r8a66597->lock, flags); return; } - spin_lock_irqsave(&r8a66597->lock, flags); pipe_stop(r8a66597, pipe); pipe_irq_disable(r8a66597, pipenum); disable_irq_empty(r8a66597, pipenum); From 5b48bec5f6c818b7e4ca7c7ae7d7adfa742e9913 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 19 Dec 2018 16:30:07 +0000 Subject: [PATCH 1225/2608] staging: wilc1000: fix missing read_write setting when reading data commit c58eef061dda7d843dcc0ad6fea7e597d4c377c0 upstream. Currently the cmd.read_write setting is not initialized so it contains garbage from the stack. Fix this by setting it to 0 to indicate a read is required. Detected by CoverityScan, CID#1357925 ("Uninitialized scalar variable") Fixes: c5c77ba18ea6 ("staging: wilc1000: Add SDIO/SPI 802.11 driver") Signed-off-by: Colin Ian King Cc: stable Acked-by: Ajay Singh Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wilc1000/wilc_sdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/wilc1000/wilc_sdio.c b/drivers/staging/wilc1000/wilc_sdio.c index 0189e3edbbbe..a4b2bfc31503 100644 --- a/drivers/staging/wilc1000/wilc_sdio.c +++ b/drivers/staging/wilc1000/wilc_sdio.c @@ -823,6 +823,7 @@ static int sdio_read_int(struct wilc *wilc, u32 *int_status) if (!g_sdio.irq_gpio) { int i; + cmd.read_write = 0; cmd.function = 1; cmd.address = 0x04; cmd.data = 0; From df088bbe01e1234b14d8366416d98fde99bbd81e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 25 May 2018 15:00:20 +0200 Subject: [PATCH 1226/2608] qmi_wwan: apply SET_DTR quirk to the SIMCOM shared device ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 102cd909635612c0be784a519651954a7924c786 upstream. SIMCOM are reusing a single device ID for many (all of their?) different modems, based on different chipsets and firmwares. Newer Qualcomm chipset generations require setting DTR to wake the QMI function. The SIM7600E modem is using such a chipset, making it fail to work with this driver despite the device ID match. Fix by unconditionally enabling the SET_DTR quirk for all SIMCOM modems using this specific device ID. This is similar to what we already have done for another case of device IDs recycled over multiple chipset generations: 14cf4a771b30 ("drivers: net: usb: qmi_wwan: add QMI_QUIRK_SET_DTR for Telit PID 0x1201") Initial testing on an older SIM7100 modem shows no immediate side effects. Reported-by: Sebastian Sjoholm Cc: Reinhard Speyerer Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 7e8f3b417530..969474c9d297 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1252,7 +1252,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ - {QMI_FIXED_INTF(0x1e0e, 0x9001, 5)}, /* SIMCom 7230E */ + {QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)}, /* SIMCom 7100E, 7230E, 7600E ++ */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */ From e1f2a8b377b0578c7dff162c084059f915900aa8 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 18 Oct 2018 11:11:08 +0200 Subject: [PATCH 1227/2608] s390/pci: fix sleeping in atomic during hotplug commit 98dfd32620e970eb576ebce5ea39d905cb005e72 upstream. When triggered by pci hotplug (PEC 0x306) clp_get_state is called with spinlocks held resulting in the following warning: zpci: n/a: Event 0x306 reconfigured PCI function 0x0 BUG: sleeping function called from invalid context at mm/page_alloc.c:4324 in_atomic(): 1, irqs_disabled(): 0, pid: 98, name: kmcheck 2 locks held by kmcheck/98: Change the allocation to use GFP_ATOMIC. Cc: stable@vger.kernel.org # 4.13+ Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/pci/pci_clp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 93cd0f1ca12b..d8dfd645bf02 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -437,7 +437,7 @@ int clp_get_state(u32 fid, enum zpci_state *state) struct clp_state_data sd = {fid, ZPCI_FN_STATE_RESERVED}; int rc; - rrb = clp_alloc_block(GFP_KERNEL); + rrb = clp_alloc_block(GFP_ATOMIC); if (!rrb) return -ENOMEM; From 9023f52fd329a6c2fe3ecd7fe206299e89b949c6 Mon Sep 17 00:00:00 2001 From: Patrick Dreyer Date: Sun, 23 Dec 2018 10:06:35 -0800 Subject: [PATCH 1228/2608] Input: elan_i2c - add ACPI ID for touchpad in ASUS Aspire F5-573G commit 7db54c89f0b30a101584e09d3729144e6170059d upstream. This adds ELAN0501 to the ACPI table to support Elan touchpad found in ASUS Aspire F5-573G. Signed-off-by: Patrick Dreyer Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 368871a398a5..f2bf8fa1ab04 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1251,6 +1251,7 @@ MODULE_DEVICE_TABLE(i2c, elan_id); static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0000", 0 }, { "ELAN0100", 0 }, + { "ELAN0501", 0 }, { "ELAN0600", 0 }, { "ELAN0602", 0 }, { "ELAN0605", 0 }, From 8c34b07190ad0ce2e8b792b922b940bef1aef1e1 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 13 Nov 2018 19:49:10 +0100 Subject: [PATCH 1229/2608] x86/speculation/l1tf: Drop the swap storage limit restriction when l1tf=off commit 5b5e4d623ec8a34689df98e42d038a3b594d2ff9 upstream. Swap storage is restricted to max_swapfile_size (~16TB on x86_64) whenever the system is deemed affected by L1TF vulnerability. Even though the limit is quite high for most deployments it seems to be too restrictive for deployments which are willing to live with the mitigation disabled. We have a customer to deploy 8x 6,4TB PCIe/NVMe SSD swap devices which is clearly out of the limit. Drop the swap restriction when l1tf=off is specified. It also doesn't make much sense to warn about too much memory for the l1tf mitigation when it is forcefully disabled by the administrator. [ tglx: Folded the documentation delta change ] Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2") Signed-off-by: Michal Hocko Signed-off-by: Thomas Gleixner Reviewed-by: Pavel Tatashin Reviewed-by: Andi Kleen Acked-by: Jiri Kosina Cc: Linus Torvalds Cc: Dave Hansen Cc: Andi Kleen Cc: Borislav Petkov Cc: Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181113184910.26697-1-mhocko@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ Documentation/admin-guide/l1tf.rst | 6 +++++- arch/x86/kernel/cpu/bugs.c | 3 ++- arch/x86/mm/init.c | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 5f3d58142600..7d8b17ce8804 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1965,6 +1965,9 @@ off Disables hypervisor mitigations and doesn't emit any warnings. + It also drops the swap size and available + RAM limit restriction on both hypervisor and + bare metal. Default is 'flush'. diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/l1tf.rst index bae52b845de0..9f5924f81f89 100644 --- a/Documentation/admin-guide/l1tf.rst +++ b/Documentation/admin-guide/l1tf.rst @@ -405,6 +405,9 @@ time with the option "l1tf=". The valid arguments for this option are: off Disables hypervisor mitigations and doesn't emit any warnings. + It also drops the swap size and available RAM limit restrictions + on both hypervisor and bare metal. + ============ ============================================================= The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`. @@ -576,7 +579,8 @@ Default mitigations The kernel default mitigations for vulnerable processors are: - PTE inversion to protect against malicious user space. This is done - unconditionally and cannot be controlled. + unconditionally and cannot be controlled. The swap storage is limited + to ~16TB. - L1D conditional flushing on VMENTER when EPT is enabled for a guest. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f7a6d6203e13..98b24d668b08 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -999,7 +999,8 @@ static void __init l1tf_select_mitigation(void) #endif half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT; - if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) { + if (l1tf_mitigation != L1TF_MITIGATION_OFF && + e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) { pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n"); pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n", half_pa); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 94b8d90830d1..32bb38f6fc18 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -890,7 +890,7 @@ unsigned long max_swapfile_size(void) pages = generic_max_swapfile_size(); - if (boot_cpu_has_bug(X86_BUG_L1TF)) { + if (boot_cpu_has_bug(X86_BUG_L1TF) && l1tf_mitigation != L1TF_MITIGATION_OFF) { /* Limit the swap file size to MAX_PA/2 for L1TF workaround */ unsigned long long l1tf_limit = l1tf_pfn_limit(); /* From 9d95653347e990cc516ebaed288893b9bfb3a288 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 4 Dec 2018 13:37:27 -0800 Subject: [PATCH 1230/2608] x86/mm: Drop usage of __flush_tlb_all() in kernel_physical_mapping_init() commit ba6f508d0ec4adb09f0a939af6d5e19cdfa8667d upstream. Commit: f77084d96355 "x86/mm/pat: Disable preemption around __flush_tlb_all()" addressed a case where __flush_tlb_all() is called without preemption being disabled. It also left a warning to catch other cases where preemption is not disabled. That warning triggers for the memory hotplug path which is also used for persistent memory enabling: WARNING: CPU: 35 PID: 911 at ./arch/x86/include/asm/tlbflush.h:460 RIP: 0010:__flush_tlb_all+0x1b/0x3a [..] Call Trace: phys_pud_init+0x29c/0x2bb kernel_physical_mapping_init+0xfc/0x219 init_memory_mapping+0x1a5/0x3b0 arch_add_memory+0x2c/0x50 devm_memremap_pages+0x3aa/0x610 pmem_attach_disk+0x585/0x700 [nd_pmem] Andy wondered why a path that can sleep was using __flush_tlb_all() [1] and Dave confirmed the expectation for TLB flush is for modifying / invalidating existing PTE entries, but not initial population [2]. Drop the usage of __flush_tlb_all() in phys_{p4d,pud,pmd}_init() on the expectation that this path is only ever populating empty entries for the linear map. Note, at linear map teardown time there is a call to the all-cpu flush_tlb_all() to invalidate the removed mappings. [1]: https://lkml.kernel.org/r/9DFD717D-857D-493D-A606-B635D72BAC21@amacapital.net [2]: https://lkml.kernel.org/r/749919a4-cdb1-48a3-adb4-adb81a5fa0b5@intel.com [ mingo: Minor readability edits. ] Suggested-by: Dave Hansen Reported-by: Andy Lutomirski Signed-off-by: Dan Williams Acked-by: Peter Zijlstra (Intel) Acked-by: Kirill A. Shutemov Cc: Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: dave.hansen@intel.com Fixes: f77084d96355 ("x86/mm/pat: Disable preemption around __flush_tlb_all()") Link: http://lkml.kernel.org/r/154395944713.32119.15611079023837132638.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init_64.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 642357aff216..624edfbff02d 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -574,7 +574,6 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, paddr_end, page_size_mask, prot); - __flush_tlb_all(); continue; } /* @@ -617,7 +616,6 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, pud_populate(&init_mm, pud, pmd); spin_unlock(&init_mm.page_table_lock); } - __flush_tlb_all(); update_page_count(PG_LEVEL_1G, pages); @@ -658,7 +656,6 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, paddr_last = phys_pud_init(pud, paddr, paddr_end, page_size_mask); - __flush_tlb_all(); continue; } @@ -670,7 +667,6 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, p4d_populate(&init_mm, p4d, pud); spin_unlock(&init_mm.page_table_lock); } - __flush_tlb_all(); return paddr_last; } @@ -723,8 +719,6 @@ kernel_physical_mapping_init(unsigned long paddr_start, if (pgd_changed) sync_global_pgds(vaddr_start, vaddr_end - 1); - __flush_tlb_all(); - return paddr_last; } From a90e097943507c6bc2f102bb9233f3760b4966f8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Dec 2018 14:21:08 -0800 Subject: [PATCH 1231/2608] KVM: x86: Use jmp to invoke kvm_spurious_fault() from .fixup commit e81434995081fd7efb755fd75576b35dbb0850b1 upstream. ____kvm_handle_fault_on_reboot() provides a generic exception fixup handler that is used to cleanly handle faults on VMX/SVM instructions during reboot (or at least try to). If there isn't a reboot in progress, ____kvm_handle_fault_on_reboot() treats any exception as fatal to KVM and invokes kvm_spurious_fault(), which in turn generates a BUG() to get a stack trace and die. When it was originally added by commit 4ecac3fd6dc2 ("KVM: Handle virtualization instruction #UD faults during reboot"), the "call" to kvm_spurious_fault() was handcoded as PUSH+JMP, where the PUSH'd value is the RIP of the faulting instructing. The PUSH+JMP trickery is necessary because the exception fixup handler code lies outside of its associated function, e.g. right after the function. An actual CALL from the .fixup code would show a slightly bogus stack trace, e.g. an extra "random" function would be inserted into the trace, as the return RIP on the stack would point to no known function (and the unwinder will likely try to guess who owns the RIP). Unfortunately, the JMP was replaced with a CALL when the macro was reworked to not spin indefinitely during reboot (commit b7c4145ba2eb "KVM: Don't spin on virt instruction faults during reboot"). This causes the aforementioned behavior where a bogus function is inserted into the stack trace, e.g. my builds like to blame free_kvm_area(). Revert the CALL back to a JMP. The changelog for commit b7c4145ba2eb ("KVM: Don't spin on virt instruction faults during reboot") contains nothing that indicates the switch to CALL was deliberate. This is backed up by the fact that the PUSH was left intact. Note that an alternative to the PUSH+JMP magic would be to JMP back to the "real" code and CALL from there, but that would require adding a JMP in the non-faulting path to avoid calling kvm_spurious_fault() and would add no value, i.e. the stack trace would be the same. Using CALL: ------------[ cut here ]------------ kernel BUG at /home/sean/go/src/kernel.org/linux/arch/x86/kvm/x86.c:356! invalid opcode: 0000 [#1] SMP CPU: 4 PID: 1057 Comm: qemu-system-x86 Not tainted 4.20.0-rc6+ #75 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:kvm_spurious_fault+0x5/0x10 [kvm] Code: <0f> 0b 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 55 49 89 fd 41 RSP: 0018:ffffc900004bbcc8 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffffffffffff RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff888273fd8000 R08: 00000000000003e8 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000784 R12: ffffc90000371fb0 R13: 0000000000000000 R14: 000000026d763cf4 R15: ffff888273fd8000 FS: 00007f3d69691700(0000) GS:ffff888277800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055f89bc56fe0 CR3: 0000000271a5a001 CR4: 0000000000362ee0 Call Trace: free_kvm_area+0x1044/0x43ea [kvm_intel] ? vmx_vcpu_run+0x156/0x630 [kvm_intel] ? kvm_arch_vcpu_ioctl_run+0x447/0x1a40 [kvm] ? kvm_vcpu_ioctl+0x368/0x5c0 [kvm] ? kvm_vcpu_ioctl+0x368/0x5c0 [kvm] ? __set_task_blocked+0x38/0x90 ? __set_current_blocked+0x50/0x60 ? __fpu__restore_sig+0x97/0x490 ? do_vfs_ioctl+0xa1/0x620 ? __x64_sys_futex+0x89/0x180 ? ksys_ioctl+0x66/0x70 ? __x64_sys_ioctl+0x16/0x20 ? do_syscall_64+0x4f/0x100 ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 Modules linked in: vhost_net vhost tap kvm_intel kvm irqbypass bridge stp llc ---[ end trace 9775b14b123b1713 ]--- Using JMP: ------------[ cut here ]------------ kernel BUG at /home/sean/go/src/kernel.org/linux/arch/x86/kvm/x86.c:356! invalid opcode: 0000 [#1] SMP CPU: 6 PID: 1067 Comm: qemu-system-x86 Not tainted 4.20.0-rc6+ #75 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:kvm_spurious_fault+0x5/0x10 [kvm] Code: <0f> 0b 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 55 49 89 fd 41 RSP: 0018:ffffc90000497cd0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffffffffffff RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff88827058bd40 R08: 00000000000003e8 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000784 R12: ffffc90000369fb0 R13: 0000000000000000 R14: 00000003c8fc6642 R15: ffff88827058bd40 FS: 00007f3d7219e700(0000) GS:ffff888277900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3d64001000 CR3: 0000000271c6b004 CR4: 0000000000362ee0 Call Trace: vmx_vcpu_run+0x156/0x630 [kvm_intel] ? kvm_arch_vcpu_ioctl_run+0x447/0x1a40 [kvm] ? kvm_vcpu_ioctl+0x368/0x5c0 [kvm] ? kvm_vcpu_ioctl+0x368/0x5c0 [kvm] ? __set_task_blocked+0x38/0x90 ? __set_current_blocked+0x50/0x60 ? __fpu__restore_sig+0x97/0x490 ? do_vfs_ioctl+0xa1/0x620 ? __x64_sys_futex+0x89/0x180 ? ksys_ioctl+0x66/0x70 ? __x64_sys_ioctl+0x16/0x20 ? do_syscall_64+0x4f/0x100 ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 Modules linked in: vhost_net vhost tap kvm_intel kvm irqbypass bridge stp llc ---[ end trace f9daedb85ab3ddba ]--- Fixes: b7c4145ba2eb ("KVM: Don't spin on virt instruction faults during reboot") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 367cdd263a5c..523308d030d2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1355,7 +1355,7 @@ asmlinkage void kvm_spurious_fault(void); "cmpb $0, kvm_rebooting \n\t" \ "jne 668b \n\t" \ __ASM_SIZE(push) " $666b \n\t" \ - "call kvm_spurious_fault \n\t" \ + "jmp kvm_spurious_fault \n\t" \ ".popsection \n\t" \ _ASM_EXTABLE(666b, 667b) From f7ed75e1bad6a6cb143d5f97cc0a5505df28ab47 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 3 Dec 2018 13:52:51 -0800 Subject: [PATCH 1232/2608] KVM: nVMX: Free the VMREAD/VMWRITE bitmaps if alloc_kvm_area() fails commit 1b3ab5ad1b8ad99bae76ec583809c5f5a31c707c upstream. Fixes: 34a1cd60d17f ("kvm: x86: vmx: move some vmx setting from vmx_init() to hardware_setup()") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8eec37d37c3d..16bb8e35605e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7275,13 +7275,16 @@ static __init int hardware_setup(void) kvm_mce_cap_supported |= MCG_LMCE_P; - return alloc_kvm_area(); + r = alloc_kvm_area(); + if (r) + goto out; + return 0; out: for (i = 0; i < VMX_BITMAP_NR; i++) free_page((unsigned long)vmx_bitmap[i]); - return r; + return r; } static __exit void hardware_unsetup(void) From 69beeb1c0f0b1fc662af78097d9a80cefa1f0090 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 11 Oct 2018 11:12:34 +0200 Subject: [PATCH 1233/2608] platform-msi: Free descriptors in platform_msi_domain_free() commit 81b1e6e6a8590a19257e37a1633bec098d499c57 upstream. Since the addition of platform MSI support, there were two helpers supposed to allocate/free IRQs for a device: platform_msi_domain_alloc_irqs() platform_msi_domain_free_irqs() In these helpers, IRQ descriptors are allocated in the "alloc" routine while they are freed in the "free" one. Later, two other helpers have been added to handle IRQ domains on top of MSI domains: platform_msi_domain_alloc() platform_msi_domain_free() Seen from the outside, the logic is pretty close with the former helpers and people used it with the same logic as before: a platform_msi_domain_alloc() call should be balanced with a platform_msi_domain_free() call. While this is probably what was intended to do, the platform_msi_domain_free() does not remove/free the IRQ descriptor(s) created/inserted in platform_msi_domain_alloc(). One effect of such situation is that removing a module that requested an IRQ will let one orphaned IRQ descriptor (with an allocated MSI entry) in the device descriptors list. Next time the module will be inserted back, one will observe that the allocation will happen twice in the MSI domain, one time for the remaining descriptor, one time for the new one. It also has the side effect to quickly overshoot the maximum number of allocated MSI and then prevent any module requesting an interrupt in the same domain to be inserted anymore. This situation has been met with loops of insertion/removal of the mvpp2.ko module (requesting 15 MSIs each time). Fixes: 552c494a7666 ("platform-msi: Allow creation of a MSI-based stacked irq domain") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform-msi.c | 6 ++++-- include/linux/msi.h | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c index e5473525e7b2..ae9b2f568879 100644 --- a/drivers/base/platform-msi.c +++ b/drivers/base/platform-msi.c @@ -374,14 +374,16 @@ void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq, unsigned int nvec) { struct platform_msi_priv_data *data = domain->host_data; - struct msi_desc *desc; - for_each_msi_entry(desc, data->dev) { + struct msi_desc *desc, *tmp; + for_each_msi_entry_safe(desc, tmp, data->dev) { if (WARN_ON(!desc->irq || desc->nvec_used != 1)) return; if (!(desc->irq >= virq && desc->irq < (virq + nvec))) continue; irq_domain_free_irqs_common(domain, desc->irq, 1); + list_del(&desc->list); + free_msi_entry(desc); } } diff --git a/include/linux/msi.h b/include/linux/msi.h index cdd069cf9ed8..a89cdea8795a 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -116,6 +116,8 @@ struct msi_desc { list_first_entry(dev_to_msi_list((dev)), struct msi_desc, list) #define for_each_msi_entry(desc, dev) \ list_for_each_entry((desc), dev_to_msi_list((dev)), list) +#define for_each_msi_entry_safe(desc, tmp, dev) \ + list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list) #ifdef CONFIG_PCI_MSI #define first_pci_msi_entry(pdev) first_msi_entry(&(pdev)->dev) From 9f0fc584b6f83d7cb0ec562e44e9ce98bd33b200 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 11 Nov 2018 18:45:24 +0000 Subject: [PATCH 1234/2608] perf pmu: Suppress potential format-truncation warning commit 11a64a05dc649815670b1be9fe63d205cb076401 upstream. Depending on which functions are inlined in util/pmu.c, the snprintf() calls in perf_pmu__parse_{scale,unit,per_pkg,snapshot}() might trigger a warning: util/pmu.c: In function 'pmu_aliases': util/pmu.c:178:31: error: '%s' directive output may be truncated writing up to 255 bytes into a region of size between 0 and 4095 [-Werror=format-truncation=] snprintf(path, PATH_MAX, "%s/%s.unit", dir, name); ^~ I found this when trying to build perf from Linux 3.16 with gcc 8. However I can reproduce the problem in mainline if I force __perf_pmu__new_alias() to be inlined. Suppress this by using scnprintf() as has been done elsewhere in perf. Signed-off-by: Ben Hutchings Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20181111184524.fux4taownc6ndbx6@decadent.org.uk Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/pmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index dceef4725d33..2deffc234932 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -143,7 +143,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * int fd, ret = -1; char path[PATH_MAX]; - snprintf(path, PATH_MAX, "%s/%s.scale", dir, name); + scnprintf(path, PATH_MAX, "%s/%s.scale", dir, name); fd = open(path, O_RDONLY); if (fd == -1) @@ -173,7 +173,7 @@ static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *n ssize_t sret; int fd; - snprintf(path, PATH_MAX, "%s/%s.unit", dir, name); + scnprintf(path, PATH_MAX, "%s/%s.unit", dir, name); fd = open(path, O_RDONLY); if (fd == -1) @@ -203,7 +203,7 @@ perf_pmu__parse_per_pkg(struct perf_pmu_alias *alias, char *dir, char *name) char path[PATH_MAX]; int fd; - snprintf(path, PATH_MAX, "%s/%s.per-pkg", dir, name); + scnprintf(path, PATH_MAX, "%s/%s.per-pkg", dir, name); fd = open(path, O_RDONLY); if (fd == -1) @@ -221,7 +221,7 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, char path[PATH_MAX]; int fd; - snprintf(path, PATH_MAX, "%s/%s.snapshot", dir, name); + scnprintf(path, PATH_MAX, "%s/%s.snapshot", dir, name); fd = open(path, O_RDONLY); if (fd == -1) From 9da1f6d06b7a6d068e68fcfd7cbbf6b586d888e1 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 25 Nov 2018 17:20:31 -0500 Subject: [PATCH 1235/2608] ext4: add ext4_sb_bread() to disambiguate ENOMEM cases commit fb265c9cb49e2074ddcdd4de99728aefdd3b3592 upstream. Today, when sb_bread() returns NULL, this can either be because of an I/O error or because the system failed to allocate the buffer. Since it's an old interface, changing would require changing many call sites. So instead we create our own ext4_sb_bread(), which also allows us to set the REQ_META flag. Also fixed a problem in the xattr code where a NULL return in a function could also mean that the xattr was not found, which could lead to the wrong error getting returned to userspace. Fixes: ac27a0ec112a ("ext4: initial copy of files from ext3") Cc: stable@kernel.org # 2.6.19 Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 2 ++ fs/ext4/migrate.c | 36 +++++++++++----------- fs/ext4/resize.c | 72 ++++++++++++++++++++++---------------------- fs/ext4/super.c | 23 ++++++++++++++ fs/ext4/xattr.c | 76 ++++++++++++++++++++++------------------------- 5 files changed, 115 insertions(+), 94 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c0c6562b3c44..02970a2e86a3 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2568,6 +2568,8 @@ extern int ext4_group_extend(struct super_block *sb, extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ +extern struct buffer_head *ext4_sb_bread(struct super_block *sb, + sector_t block, int op_flags); extern int ext4_seq_options_show(struct seq_file *seq, void *offset); extern int ext4_calculate_overhead(struct super_block *sb); extern void ext4_superblock_csum_set(struct super_block *sb); diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index cf5181b62df1..78d45c7d3fa7 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -123,9 +123,9 @@ static int update_ind_extent_range(handle_t *handle, struct inode *inode, int i, retval = 0; unsigned long max_entries = inode->i_sb->s_blocksize >> 2; - bh = sb_bread(inode->i_sb, pblock); - if (!bh) - return -EIO; + bh = ext4_sb_bread(inode->i_sb, pblock, 0); + if (IS_ERR(bh)) + return PTR_ERR(bh); i_data = (__le32 *)bh->b_data; for (i = 0; i < max_entries; i++) { @@ -152,9 +152,9 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode, int i, retval = 0; unsigned long max_entries = inode->i_sb->s_blocksize >> 2; - bh = sb_bread(inode->i_sb, pblock); - if (!bh) - return -EIO; + bh = ext4_sb_bread(inode->i_sb, pblock, 0); + if (IS_ERR(bh)) + return PTR_ERR(bh); i_data = (__le32 *)bh->b_data; for (i = 0; i < max_entries; i++) { @@ -182,9 +182,9 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode, int i, retval = 0; unsigned long max_entries = inode->i_sb->s_blocksize >> 2; - bh = sb_bread(inode->i_sb, pblock); - if (!bh) - return -EIO; + bh = ext4_sb_bread(inode->i_sb, pblock, 0); + if (IS_ERR(bh)) + return PTR_ERR(bh); i_data = (__le32 *)bh->b_data; for (i = 0; i < max_entries; i++) { @@ -231,9 +231,9 @@ static int free_dind_blocks(handle_t *handle, struct buffer_head *bh; unsigned long max_entries = inode->i_sb->s_blocksize >> 2; - bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); - if (!bh) - return -EIO; + bh = ext4_sb_bread(inode->i_sb, le32_to_cpu(i_data), 0); + if (IS_ERR(bh)) + return PTR_ERR(bh); tmp_idata = (__le32 *)bh->b_data; for (i = 0; i < max_entries; i++) { @@ -261,9 +261,9 @@ static int free_tind_blocks(handle_t *handle, struct buffer_head *bh; unsigned long max_entries = inode->i_sb->s_blocksize >> 2; - bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); - if (!bh) - return -EIO; + bh = ext4_sb_bread(inode->i_sb, le32_to_cpu(i_data), 0); + if (IS_ERR(bh)) + return PTR_ERR(bh); tmp_idata = (__le32 *)bh->b_data; for (i = 0; i < max_entries; i++) { @@ -389,9 +389,9 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, struct ext4_extent_header *eh; block = ext4_idx_pblock(ix); - bh = sb_bread(inode->i_sb, block); - if (!bh) - return -EIO; + bh = ext4_sb_bread(inode->i_sb, block, 0); + if (IS_ERR(bh)) + return PTR_ERR(bh); eh = (struct ext4_extent_header *)bh->b_data; if (eh->eh_depth != 0) { diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index deebb8842c82..a5cd9e8e5e09 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -127,10 +127,12 @@ static int verify_group_input(struct super_block *sb, else if (free_blocks_count < 0) ext4_warning(sb, "Bad blocks count %u", input->blocks_count); - else if (!(bh = sb_bread(sb, end - 1))) + else if (IS_ERR(bh = ext4_sb_bread(sb, end - 1, 0))) { + err = PTR_ERR(bh); + bh = NULL; ext4_warning(sb, "Cannot read last block (%llu)", end - 1); - else if (outside(input->block_bitmap, start, end)) + } else if (outside(input->block_bitmap, start, end)) ext4_warning(sb, "Block bitmap not in group (block %llu)", (unsigned long long)input->block_bitmap); else if (outside(input->inode_bitmap, start, end)) @@ -757,11 +759,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, struct ext4_super_block *es = EXT4_SB(sb)->s_es; unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb); ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; - struct buffer_head **o_group_desc, **n_group_desc; - struct buffer_head *dind; - struct buffer_head *gdb_bh; + struct buffer_head **o_group_desc, **n_group_desc = NULL; + struct buffer_head *dind = NULL; + struct buffer_head *gdb_bh = NULL; int gdbackups; - struct ext4_iloc iloc; + struct ext4_iloc iloc = { .bh = NULL }; __le32 *data; int err; @@ -770,21 +772,22 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", gdb_num); - gdb_bh = sb_bread(sb, gdblock); - if (!gdb_bh) - return -EIO; + gdb_bh = ext4_sb_bread(sb, gdblock, 0); + if (IS_ERR(gdb_bh)) + return PTR_ERR(gdb_bh); gdbackups = verify_reserved_gdb(sb, group, gdb_bh); if (gdbackups < 0) { err = gdbackups; - goto exit_bh; + goto errout; } data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK; - dind = sb_bread(sb, le32_to_cpu(*data)); - if (!dind) { - err = -EIO; - goto exit_bh; + dind = ext4_sb_bread(sb, le32_to_cpu(*data), 0); + if (IS_ERR(dind)) { + err = PTR_ERR(dind); + dind = NULL; + goto errout; } data = (__le32 *)dind->b_data; @@ -792,18 +795,18 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, ext4_warning(sb, "new group %u GDT block %llu not reserved", group, gdblock); err = -EINVAL; - goto exit_dind; + goto errout; } BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); if (unlikely(err)) - goto exit_dind; + goto errout; BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); if (unlikely(err)) - goto exit_dind; + goto errout; BUFFER_TRACE(dind, "get_write_access"); err = ext4_journal_get_write_access(handle, dind); @@ -813,7 +816,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, /* ext4_reserve_inode_write() gets a reference on the iloc */ err = ext4_reserve_inode_write(handle, inode, &iloc); if (unlikely(err)) - goto exit_dind; + goto errout; n_group_desc = ext4_kvmalloc((gdb_num + 1) * sizeof(struct buffer_head *), @@ -822,7 +825,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, err = -ENOMEM; ext4_warning(sb, "not enough memory for %lu groups", gdb_num + 1); - goto exit_inode; + goto errout; } /* @@ -838,7 +841,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, err = ext4_handle_dirty_metadata(handle, NULL, dind); if (unlikely(err)) { ext4_std_error(sb, err); - goto exit_inode; + goto errout; } inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; ext4_mark_iloc_dirty(handle, inode, &iloc); @@ -846,8 +849,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); if (unlikely(err)) { ext4_std_error(sb, err); - iloc.bh = NULL; - goto exit_inode; + goto errout; } brelse(dind); @@ -863,15 +865,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, err = ext4_handle_dirty_super(handle, sb); if (err) ext4_std_error(sb, err); - return err; - -exit_inode: +errout: kvfree(n_group_desc); brelse(iloc.bh); -exit_dind: brelse(dind); -exit_bh: brelse(gdb_bh); ext4_debug("leaving with error %d\n", err); @@ -891,9 +889,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb, gdblock = ext4_meta_bg_first_block_no(sb, group) + ext4_bg_has_super(sb, group); - gdb_bh = sb_bread(sb, gdblock); - if (!gdb_bh) - return -EIO; + gdb_bh = ext4_sb_bread(sb, gdblock, 0); + if (IS_ERR(gdb_bh)) + return PTR_ERR(gdb_bh); n_group_desc = ext4_kvmalloc((gdb_num + 1) * sizeof(struct buffer_head *), GFP_NOFS); @@ -949,9 +947,10 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, return -ENOMEM; data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK; - dind = sb_bread(sb, le32_to_cpu(*data)); - if (!dind) { - err = -EIO; + dind = ext4_sb_bread(sb, le32_to_cpu(*data), 0); + if (IS_ERR(dind)) { + err = PTR_ERR(dind); + dind = NULL; goto exit_free; } @@ -970,9 +969,10 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, err = -EINVAL; goto exit_bh; } - primary[res] = sb_bread(sb, blk); - if (!primary[res]) { - err = -EIO; + primary[res] = ext4_sb_bread(sb, blk, 0); + if (IS_ERR(primary[res])) { + err = PTR_ERR(primary[res]); + primary[res] = NULL; goto exit_bh; } gdbackups = verify_reserved_gdb(sb, group, primary[res]); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7fd64f5f70f0..675f4257597e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -140,6 +140,29 @@ MODULE_ALIAS_FS("ext3"); MODULE_ALIAS("ext3"); #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) +/* + * This works like sb_bread() except it uses ERR_PTR for error + * returns. Currently with sb_bread it's impossible to distinguish + * between ENOMEM and EIO situations (since both result in a NULL + * return. + */ +struct buffer_head * +ext4_sb_bread(struct super_block *sb, sector_t block, int op_flags) +{ + struct buffer_head *bh = sb_getblk(sb, block); + + if (bh == NULL) + return ERR_PTR(-ENOMEM); + if (buffer_uptodate(bh)) + return bh; + ll_rw_block(REQ_OP_READ, REQ_META | op_flags, 1, &bh); + wait_on_buffer(bh); + if (buffer_uptodate(bh)) + return bh; + put_bh(bh); + return ERR_PTR(-EIO); +} + static int ext4_verify_csum_type(struct super_block *sb, struct ext4_super_block *es) { diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index a5923a1d0ff4..d4fd6ed0d371 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -521,14 +521,13 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", name_index, name, buffer, (long)buffer_size); - error = -ENODATA; if (!EXT4_I(inode)->i_file_acl) - goto cleanup; + return -ENODATA; ea_idebug(inode, "reading block %llu", (unsigned long long)EXT4_I(inode)->i_file_acl); - bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); - if (!bh) - goto cleanup; + bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); + if (IS_ERR(bh)) + return PTR_ERR(bh); ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); error = ext4_xattr_check_block(inode, bh); @@ -695,26 +694,23 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) ea_idebug(inode, "buffer=%p, buffer_size=%ld", buffer, (long)buffer_size); - error = 0; if (!EXT4_I(inode)->i_file_acl) - goto cleanup; + return 0; ea_idebug(inode, "reading block %llu", (unsigned long long)EXT4_I(inode)->i_file_acl); - bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); - error = -EIO; - if (!bh) - goto cleanup; + bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); + if (IS_ERR(bh)) + return PTR_ERR(bh); ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); error = ext4_xattr_check_block(inode, bh); if (error) goto cleanup; ext4_xattr_block_cache_insert(EA_BLOCK_CACHE(inode), bh); - error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size); - + error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, + buffer_size); cleanup: brelse(bh); - return error; } @@ -829,9 +825,9 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage) } if (EXT4_I(inode)->i_file_acl) { - bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); - if (!bh) { - ret = -EIO; + bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); + if (IS_ERR(bh)) { + ret = PTR_ERR(bh); goto out; } @@ -1824,16 +1820,15 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, if (EXT4_I(inode)->i_file_acl) { /* The inode already has an extended attribute block. */ - bs->bh = sb_bread(sb, EXT4_I(inode)->i_file_acl); - error = -EIO; - if (!bs->bh) - goto cleanup; + bs->bh = ext4_sb_bread(sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); + if (IS_ERR(bs->bh)) + return PTR_ERR(bs->bh); ea_bdebug(bs->bh, "b_count=%d, refcount=%d", atomic_read(&(bs->bh->b_count)), le32_to_cpu(BHDR(bs->bh)->h_refcount)); error = ext4_xattr_check_block(inode, bs->bh); if (error) - goto cleanup; + return error; /* Find the named attribute. */ bs->s.base = BHDR(bs->bh); bs->s.first = BFIRST(bs->bh); @@ -1842,13 +1837,10 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, error = xattr_find_entry(inode, &bs->s.here, bs->s.end, i->name_index, i->name, 1); if (error && error != -ENODATA) - goto cleanup; + return error; bs->s.not_found = error; } - error = 0; - -cleanup: - return error; + return 0; } static int @@ -2277,9 +2269,9 @@ static struct buffer_head *ext4_xattr_get_block(struct inode *inode) if (!EXT4_I(inode)->i_file_acl) return NULL; - bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); - if (!bh) - return ERR_PTR(-EIO); + bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); + if (IS_ERR(bh)) + return bh; error = ext4_xattr_check_block(inode, bh); if (error) { brelse(bh); @@ -2749,10 +2741,11 @@ retry: if (EXT4_I(inode)->i_file_acl) { struct buffer_head *bh; - bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); - error = -EIO; - if (!bh) + bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); + if (IS_ERR(bh)) { + error = PTR_ERR(bh); goto cleanup; + } error = ext4_xattr_check_block(inode, bh); if (error) { brelse(bh); @@ -2906,11 +2899,12 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, } if (EXT4_I(inode)->i_file_acl) { - bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); - if (!bh) { - EXT4_ERROR_INODE(inode, "block %llu read error", - EXT4_I(inode)->i_file_acl); - error = -EIO; + bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); + if (IS_ERR(bh)) { + error = PTR_ERR(bh); + if (error == -EIO) + EXT4_ERROR_INODE(inode, "block %llu read error", + EXT4_I(inode)->i_file_acl); goto cleanup; } error = ext4_xattr_check_block(inode, bh); @@ -3063,8 +3057,10 @@ ext4_xattr_block_cache_find(struct inode *inode, while (ce) { struct buffer_head *bh; - bh = sb_bread(inode->i_sb, ce->e_value); - if (!bh) { + bh = ext4_sb_bread(inode->i_sb, ce->e_value, REQ_PRIO); + if (IS_ERR(bh)) { + if (PTR_ERR(bh) == -ENOMEM) + return NULL; EXT4_ERROR_INODE(inode, "block %lu read error", (unsigned long)ce->e_value); } else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) { From 34bba27d0399bee3cf65b030f2f43dde77d92248 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Mon, 3 Dec 2018 23:28:02 -0500 Subject: [PATCH 1236/2608] ext4: fix possible use after free in ext4_quota_enable commit 61157b24e60fb3cd1f85f2c76a7b1d628f970144 upstream. The function frees qf_inode via iput but then pass qf_inode to lockdep_set_quota_inode on the failure path. This may result in a use-after-free bug. The patch frees df_inode only when it is never used. Fixes: daf647d2dd5 ("ext4: add lockdep annotations for i_data_sem") Cc: stable@kernel.org # 4.6 Reviewed-by: Jan Kara Signed-off-by: Pan Bian Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 675f4257597e..fb5ed9435e42 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5636,9 +5636,9 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, qf_inode->i_flags |= S_NOQUOTA; lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA); err = dquot_enable(qf_inode, type, format_id, flags); - iput(qf_inode); if (err) lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL); + iput(qf_inode); return err; } From 92bb9b067bda14377d44c86ae5af06143e436245 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Tue, 4 Dec 2018 00:06:53 -0500 Subject: [PATCH 1237/2608] ext4: missing unlock/put_page() in ext4_try_to_write_inline_data() commit 132d00becb31e88469334e1e62751c81345280e0 upstream. In case of error, ext4_try_to_write_inline_data() should unlock and release the page it holds. Fixes: f19d5870cbf7 ("ext4: add normal write support for inline data") Cc: stable@kernel.org # 3.8 Signed-off-by: Maurizio Lombardi Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index ac2e0516c16f..40a2d1a428c2 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -711,8 +711,11 @@ int ext4_try_to_write_inline_data(struct address_space *mapping, if (!PageUptodate(page)) { ret = ext4_read_inline_page(inode, page); - if (ret < 0) + if (ret < 0) { + unlock_page(page); + put_page(page); goto out_up_read; + } } ret = 1; From 0bf8b3fdac08bc0bc15f854faab6f8c3b607a430 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ruippan=20=28=E6=BD=98=E7=9D=BF=29?= Date: Tue, 4 Dec 2018 01:04:12 -0500 Subject: [PATCH 1238/2608] ext4: fix EXT4_IOC_GROUP_ADD ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e647e29196b7f802f8242c39ecb7cc937f5ef217 upstream. Commit e2b911c53584 ("ext4: clean up feature test macros with predicate functions") broke the EXT4_IOC_GROUP_ADD ioctl. This was not noticed since only very old versions of resize2fs (before e2fsprogs 1.42) use this ioctl. However, using a new kernel with an enterprise Linux userspace will cause attempts to use online resize to fail with "No reserved GDT blocks". Fixes: e2b911c53584 ("ext4: clean up feature test macros with predicate...") Cc: stable@kernel.org # v4.4 Signed-off-by: Theodore Ts'o Signed-off-by: ruippan (潘睿) Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index a5cd9e8e5e09..703b516366fd 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1602,7 +1602,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) } if (reserved_gdb || gdb_off == 0) { - if (ext4_has_feature_resize_inode(sb) || + if (!ext4_has_feature_resize_inode(sb) || !le16_to_cpu(es->s_reserved_gdt_blocks)) { ext4_warning(sb, "No reserved GDT blocks, can't resize"); From 6ef63893e78fd20aeb12257cf3259fc50c86f7da Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 19 Dec 2018 12:28:13 -0500 Subject: [PATCH 1239/2608] ext4: include terminating u32 in size of xattr entries when expanding inodes commit a805622a757b6d7f65def4141d29317d8e37b8a1 upstream. In ext4_expand_extra_isize_ea(), we calculate the total size of the xattr header, plus the xattr entries so we know how much of the beginning part of the xattrs to move when expanding the inode extra size. We need to include the terminating u32 at the end of the xattr entries, or else if there is uninitialized, non-zero bytes after the xattr entries and before the xattr values, the list of xattr entries won't be properly terminated. Reported-by: Steve Graham Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index d4fd6ed0d371..311761a6ef6d 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2724,7 +2724,7 @@ retry: base = IFIRST(header); end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; min_offs = end - base; - total_ino = sizeof(struct ext4_xattr_ibody_header); + total_ino = sizeof(struct ext4_xattr_ibody_header) + sizeof(u32); error = xattr_check_inode(inode, header, end); if (error) From dbffc914157c45a3d2157aa09deb9307c9e2b078 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 19 Dec 2018 14:07:58 -0500 Subject: [PATCH 1240/2608] ext4: force inode writes when nfsd calls commit_metadata() commit fde872682e175743e0c3ef939c89e3c6008a1529 upstream. Some time back, nfsd switched from calling vfs_fsync() to using a new commit_metadata() hook in export_operations(). If the file system did not provide a commit_metadata() hook, it fell back to using sync_inode_metadata(). Unfortunately doesn't work on all file systems. In particular, it doesn't work on ext4 due to how the inode gets journalled --- the VFS writeback code will not always call ext4_write_inode(). So we need to provide our own ext4_nfs_commit_metdata() method which calls ext4_write_inode() directly. Google-Bug-Id: 121195940 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 11 +++++++++++ include/trace/events/ext4.h | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fb5ed9435e42..77300b8ca211 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1153,6 +1153,16 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, ext4_nfs_get_inode); } +static int ext4_nfs_commit_metadata(struct inode *inode) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL + }; + + trace_ext4_nfs_commit_metadata(inode); + return ext4_write_inode(inode, &wbc); +} + /* * Try to release metadata pages (indirect blocks, directories) which are * mapped via the block device. Since these pages could have journal heads @@ -1361,6 +1371,7 @@ static const struct export_operations ext4_export_ops = { .fh_to_dentry = ext4_fh_to_dentry, .fh_to_parent = ext4_fh_to_parent, .get_parent = ext4_get_parent, + .commit_metadata = ext4_nfs_commit_metadata, }; enum { diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 4d0e3af4e561..3902b6960db2 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -225,6 +225,26 @@ TRACE_EVENT(ext4_drop_inode, (unsigned long) __entry->ino, __entry->drop) ); +TRACE_EVENT(ext4_nfs_commit_metadata, + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + ), + + TP_printk("dev %d,%d ino %lu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino) +); + TRACE_EVENT(ext4_mark_inode_dirty, TP_PROTO(struct inode *inode, unsigned long IP), From 7f3901d80c6a688c28638f5cb53ab0d12506cc42 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 19 Dec 2018 14:36:58 -0500 Subject: [PATCH 1241/2608] ext4: check for shutdown and r/o file system in ext4_write_inode() commit 18f2c4fcebf2582f96cbd5f2238f4f354a0e4847 upstream. If the file system has been shut down or is read-only, then ext4_write_inode() needs to bail out early. Also use jbd2_complete_transaction() instead of ext4_force_commit() so we only force a commit if it is needed. Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bd2bf83b1a1f..22c9bb8c671f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5218,9 +5218,13 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) { int err; - if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) + if (WARN_ON_ONCE(current->flags & PF_MEMALLOC) || + sb_rdonly(inode->i_sb)) return 0; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + if (EXT4_SB(inode->i_sb)->s_journal) { if (ext4_journal_current_handle()) { jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); @@ -5236,7 +5240,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync) return 0; - err = ext4_force_commit(inode->i_sb); + err = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal, + EXT4_I(inode)->i_sync_tid); } else { struct ext4_iloc iloc; From 24fc3cc20b7c39a6148d1495962b6433dd13c727 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 8 Nov 2018 08:06:10 +0100 Subject: [PATCH 1242/2608] spi: bcm2835: Fix race on DMA termination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e82b0b3828451c1cd331d9f304c6078fcd43b62e upstream. If a DMA transfer finishes orderly right when spi_transfer_one_message() determines that it has timed out, the callbacks bcm2835_spi_dma_done() and bcm2835_spi_handle_err() race to call dmaengine_terminate_all(), potentially leading to double termination. Prevent by atomically changing the dma_pending flag before calling dmaengine_terminate_all(). Signed-off-by: Lukas Wunner Fixes: 3ecd37edaa2a ("spi: bcm2835: enable dma modes for transfers meeting certain conditions") Cc: stable@vger.kernel.org # v4.2+ Cc: Mathias Duckeck Cc: Frank Pavlic Cc: Martin Sperl Cc: Noralf Trønnes Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-bcm2835.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index f35cc10772f6..feb921005382 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -233,10 +233,9 @@ static void bcm2835_spi_dma_done(void *data) * is called the tx-dma must have finished - can't get to this * situation otherwise... */ - dmaengine_terminate_all(master->dma_tx); - - /* mark as no longer pending */ - bs->dma_pending = 0; + if (cmpxchg(&bs->dma_pending, true, false)) { + dmaengine_terminate_all(master->dma_tx); + } /* and mark as completed */; complete(&master->xfer_completion); @@ -617,10 +616,9 @@ static void bcm2835_spi_handle_err(struct spi_master *master, struct bcm2835_spi *bs = spi_master_get_devdata(master); /* if an error occurred and we have an active dma, then terminate */ - if (bs->dma_pending) { + if (cmpxchg(&bs->dma_pending, true, false)) { dmaengine_terminate_all(master->dma_tx); dmaengine_terminate_all(master->dma_rx); - bs->dma_pending = 0; } /* and reset */ bcm2835_spi_reset_hw(master); From fef1fb1f498752db3a8c28ee24932d349617d6e4 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 8 Nov 2018 08:06:10 +0100 Subject: [PATCH 1243/2608] spi: bcm2835: Fix book-keeping of DMA termination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit dbc944115eed48af110646992893dc43321368d8 upstream. If submission of a DMA TX transfer succeeds but submission of the corresponding RX transfer does not, the BCM2835 SPI driver terminates the TX transfer but neglects to reset the dma_pending flag to false. Thus, if the next transfer uses interrupt mode (because it is shorter than BCM2835_SPI_DMA_MIN_LENGTH) and runs into a timeout, dmaengine_terminate_all() will be called both for TX (once more) and for RX (which was never started in the first place). Fix it. Signed-off-by: Lukas Wunner Fixes: 3ecd37edaa2a ("spi: bcm2835: enable dma modes for transfers meeting certain conditions") Cc: stable@vger.kernel.org # v4.2+ Cc: Mathias Duckeck Cc: Frank Pavlic Cc: Martin Sperl Cc: Noralf Trønnes Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-bcm2835.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index feb921005382..a011189b698c 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -341,6 +341,7 @@ static int bcm2835_spi_transfer_one_dma(struct spi_master *master, if (ret) { /* need to reset on errors */ dmaengine_terminate_all(master->dma_tx); + bs->dma_pending = false; bcm2835_spi_reset_hw(master); return ret; } From 7c6ac785e623612b04a6f2806832a6969ff8bb53 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 8 Nov 2018 08:06:10 +0100 Subject: [PATCH 1244/2608] spi: bcm2835: Avoid finishing transfer prematurely in IRQ mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 56c1723426d3cfd4723bfbfce531d7b38bae6266 upstream. The IRQ handler bcm2835_spi_interrupt() first reads as much as possible from the RX FIFO, then writes as much as possible to the TX FIFO. Afterwards it decides whether the transfer is finished by checking if the TX FIFO is empty. If very few bytes were written to the TX FIFO, they may already have been transmitted by the time the FIFO's emptiness is checked. As a result, the transfer will be declared finished and the chip will be reset without reading the corresponding received bytes from the RX FIFO. The odds of this happening increase with a high clock frequency (such that the TX FIFO drains quickly) and either passing "threadirqs" on the command line or enabling CONFIG_PREEMPT_RT_BASE (such that the IRQ handler may be preempted between filling the TX FIFO and checking its emptiness). Fix by instead checking whether rx_len has reached zero, which means that the transfer has been received in full. This is also more efficient as it avoids one bus read access per interrupt. Note that bcm2835_spi_transfer_one_poll() likewise uses rx_len to determine whether the transfer has finished. Signed-off-by: Lukas Wunner Fixes: e34ff011c70e ("spi: bcm2835: move to the transfer_one driver model") Cc: stable@vger.kernel.org # v4.1+ Cc: Mathias Duckeck Cc: Frank Pavlic Cc: Martin Sperl Cc: Noralf Trønnes Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-bcm2835.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index a011189b698c..774161bbcb2e 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -155,8 +155,7 @@ static irqreturn_t bcm2835_spi_interrupt(int irq, void *dev_id) /* Write as many bytes as possible to FIFO */ bcm2835_wr_fifo(bs); - /* based on flags decide if we can finish the transfer */ - if (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE) { + if (!bs->rx_len) { /* Transfer complete - reset SPI HW */ bcm2835_spi_reset_hw(master); /* wake up the framework */ From 3f844ac99f3430c04e93d72f3f243e2ffb89e52b Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Sat, 3 Nov 2018 23:54:13 +0100 Subject: [PATCH 1245/2608] clk: rockchip: fix typo in rk3188 spdif_frac parent commit 8b19faf6fae2867e2c177212c541e8ae36aa4d32 upstream. Fix typo in common_clk_branches. Make spdif_pre parent of spdif_frac. Fixes: 667464208989 ("clk: rockchip: include downstream muxes into fractional dividers") Cc: stable@vger.kernel.org Signed-off-by: Johan Jonker Acked-by: Elaine Zhang Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- drivers/clk/rockchip/clk-rk3188.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index 00ad0e5f8d66..2b0d772b4f43 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -382,7 +382,7 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { COMPOSITE_NOMUX(0, "spdif_pre", "i2s_src", 0, RK2928_CLKSEL_CON(5), 0, 7, DFLAGS, RK2928_CLKGATE_CON(0), 13, GFLAGS), - COMPOSITE_FRACMUX(0, "spdif_frac", "spdif_pll", CLK_SET_RATE_PARENT, + COMPOSITE_FRACMUX(0, "spdif_frac", "spdif_pre", CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(9), 0, RK2928_CLKGATE_CON(0), 14, GFLAGS, &common_spdif_fracmux), From cf3168c56419289d940098eec809b793338159ef Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Thu, 18 Oct 2018 19:50:43 -0500 Subject: [PATCH 1246/2608] crypto: cavium/nitrox - fix a DMA pool free failure commit 7172122be6a4712d699da4d261f92aa5ab3a78b8 upstream. In crypto_alloc_context(), a DMA pool is allocated through dma_pool_alloc() to hold the crypto context. The meta data of the DMA pool, including the pool used for the allocation 'ndev->ctx_pool' and the base address of the DMA pool used by the device 'dma', are then stored to the beginning of the pool. These meta data are eventually used in crypto_free_context() to free the DMA pool through dma_pool_free(). However, given that the DMA pool can also be accessed by the device, a malicious device can modify these meta data, especially when the device is controlled to deploy an attack. This can cause an unexpected DMA pool free failure. To avoid the above issue, this patch introduces a new structure crypto_ctx_hdr and a new field chdr in the structure nitrox_crypto_ctx hold the meta data information of the DMA pool after the allocation. Note that the original structure ctx_hdr is not changed to ensure the compatibility. Cc: Signed-off-by: Wenwen Wang Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/cavium/nitrox/nitrox_algs.c | 12 ++++++----- drivers/crypto/cavium/nitrox/nitrox_lib.c | 24 ++++++++++++++++------ drivers/crypto/cavium/nitrox/nitrox_req.h | 7 +++++++ 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/drivers/crypto/cavium/nitrox/nitrox_algs.c b/drivers/crypto/cavium/nitrox/nitrox_algs.c index 2ae6124e5da6..5d54ebc20cb3 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_algs.c +++ b/drivers/crypto/cavium/nitrox/nitrox_algs.c @@ -73,7 +73,7 @@ static int flexi_aes_keylen(int keylen) static int nitrox_skcipher_init(struct crypto_skcipher *tfm) { struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(tfm); - void *fctx; + struct crypto_ctx_hdr *chdr; /* get the first device */ nctx->ndev = nitrox_get_first_device(); @@ -81,12 +81,14 @@ static int nitrox_skcipher_init(struct crypto_skcipher *tfm) return -ENODEV; /* allocate nitrox crypto context */ - fctx = crypto_alloc_context(nctx->ndev); - if (!fctx) { + chdr = crypto_alloc_context(nctx->ndev); + if (!chdr) { nitrox_put_device(nctx->ndev); return -ENOMEM; } - nctx->u.ctx_handle = (uintptr_t)fctx; + nctx->chdr = chdr; + nctx->u.ctx_handle = (uintptr_t)((u8 *)chdr->vaddr + + sizeof(struct ctx_hdr)); crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(tfm) + sizeof(struct nitrox_kcrypt_request)); return 0; @@ -102,7 +104,7 @@ static void nitrox_skcipher_exit(struct crypto_skcipher *tfm) memset(&fctx->crypto, 0, sizeof(struct crypto_keys)); memset(&fctx->auth, 0, sizeof(struct auth_keys)); - crypto_free_context((void *)fctx); + crypto_free_context((void *)nctx->chdr); } nitrox_put_device(nctx->ndev); diff --git a/drivers/crypto/cavium/nitrox/nitrox_lib.c b/drivers/crypto/cavium/nitrox/nitrox_lib.c index 9906c0086647..cea977e158fc 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_lib.c +++ b/drivers/crypto/cavium/nitrox/nitrox_lib.c @@ -146,20 +146,31 @@ static void destroy_crypto_dma_pool(struct nitrox_device *ndev) void *crypto_alloc_context(struct nitrox_device *ndev) { struct ctx_hdr *ctx; + struct crypto_ctx_hdr *chdr; void *vaddr; dma_addr_t dma; - vaddr = dma_pool_alloc(ndev->ctx_pool, (GFP_ATOMIC | __GFP_ZERO), &dma); - if (!vaddr) + chdr = kmalloc(sizeof(*chdr), GFP_KERNEL); + if (!chdr) return NULL; + vaddr = dma_pool_alloc(ndev->ctx_pool, (GFP_ATOMIC | __GFP_ZERO), &dma); + if (!vaddr) { + kfree(chdr); + return NULL; + } + /* fill meta data */ ctx = vaddr; ctx->pool = ndev->ctx_pool; ctx->dma = dma; ctx->ctx_dma = dma + sizeof(struct ctx_hdr); - return ((u8 *)vaddr + sizeof(struct ctx_hdr)); + chdr->pool = ndev->ctx_pool; + chdr->dma = dma; + chdr->vaddr = vaddr; + + return chdr; } /** @@ -168,13 +179,14 @@ void *crypto_alloc_context(struct nitrox_device *ndev) */ void crypto_free_context(void *ctx) { - struct ctx_hdr *ctxp; + struct crypto_ctx_hdr *ctxp; if (!ctx) return; - ctxp = (struct ctx_hdr *)((u8 *)ctx - sizeof(struct ctx_hdr)); - dma_pool_free(ctxp->pool, ctxp, ctxp->dma); + ctxp = ctx; + dma_pool_free(ctxp->pool, ctxp->vaddr, ctxp->dma); + kfree(ctxp); } /** diff --git a/drivers/crypto/cavium/nitrox/nitrox_req.h b/drivers/crypto/cavium/nitrox/nitrox_req.h index d091b6f5f5dd..19f0a20e3bb3 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_req.h +++ b/drivers/crypto/cavium/nitrox/nitrox_req.h @@ -181,12 +181,19 @@ struct flexi_crypto_context { struct auth_keys auth; }; +struct crypto_ctx_hdr { + struct dma_pool *pool; + dma_addr_t dma; + void *vaddr; +}; + struct nitrox_crypto_ctx { struct nitrox_device *ndev; union { u64 ctx_handle; struct flexi_crypto_context *fctx; } u; + struct crypto_ctx_hdr *chdr; }; struct nitrox_kcrypt_request { From 8769b27e0998fadba60754c7e53fb1d09f98d8ea Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Nov 2018 12:15:15 -0800 Subject: [PATCH 1247/2608] cgroup: fix CSS_TASK_ITER_PROCS commit e9d81a1bc2c48ea9782e3e8b53875f419766ef47 upstream. CSS_TASK_ITER_PROCS implements process-only iteration by making css_task_iter_advance() skip tasks which aren't threadgroup leaders; however, when an iteration is started css_task_iter_start() calls the inner helper function css_task_iter_advance_css_set() instead of css_task_iter_advance(). As the helper doesn't have the skip logic, when the first task to visit is a non-leader thread, it doesn't get skipped correctly as shown in the following example. # ps -L 2030 PID LWP TTY STAT TIME COMMAND 2030 2030 pts/0 Sl+ 0:00 ./test-thread 2030 2031 pts/0 Sl+ 0:00 ./test-thread # mkdir -p /sys/fs/cgroup/x/a/b # echo threaded > /sys/fs/cgroup/x/a/cgroup.type # echo threaded > /sys/fs/cgroup/x/a/b/cgroup.type # echo 2030 > /sys/fs/cgroup/x/a/cgroup.procs # cat /sys/fs/cgroup/x/a/cgroup.threads 2030 2031 # cat /sys/fs/cgroup/x/cgroup.procs 2030 # echo 2030 > /sys/fs/cgroup/x/a/b/cgroup.threads # cat /sys/fs/cgroup/x/cgroup.procs 2031 2030 The last read of cgroup.procs is incorrectly showing non-leader 2031 in cgroup.procs output. This can be fixed by updating css_task_iter_advance() to handle the first advance and css_task_iters_tart() to call css_task_iter_advance() instead of the inner helper. After the fix, the same commands result in the following (correct) result: # ps -L 2062 PID LWP TTY STAT TIME COMMAND 2062 2062 pts/0 Sl+ 0:00 ./test-thread 2062 2063 pts/0 Sl+ 0:00 ./test-thread # mkdir -p /sys/fs/cgroup/x/a/b # echo threaded > /sys/fs/cgroup/x/a/cgroup.type # echo threaded > /sys/fs/cgroup/x/a/b/cgroup.type # echo 2062 > /sys/fs/cgroup/x/a/cgroup.procs # cat /sys/fs/cgroup/x/a/cgroup.threads 2062 2063 # cat /sys/fs/cgroup/x/cgroup.procs 2062 # echo 2062 > /sys/fs/cgroup/x/a/b/cgroup.threads # cat /sys/fs/cgroup/x/cgroup.procs 2062 Signed-off-by: Tejun Heo Reported-by: "Michael Kerrisk (man-pages)" Fixes: 8cfd8147df67 ("cgroup: implement cgroup v2 thread support") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup/cgroup.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 3fc11b8851ac..109c32c56de7 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4080,20 +4080,25 @@ static void css_task_iter_advance(struct css_task_iter *it) lockdep_assert_held(&css_set_lock); repeat: - /* - * Advance iterator to find next entry. cset->tasks is consumed - * first and then ->mg_tasks. After ->mg_tasks, we move onto the - * next cset. - */ - next = it->task_pos->next; + if (it->task_pos) { + /* + * Advance iterator to find next entry. cset->tasks is + * consumed first and then ->mg_tasks. After ->mg_tasks, + * we move onto the next cset. + */ + next = it->task_pos->next; - if (next == it->tasks_head) - next = it->mg_tasks_head->next; + if (next == it->tasks_head) + next = it->mg_tasks_head->next; - if (next == it->mg_tasks_head) + if (next == it->mg_tasks_head) + css_task_iter_advance_css_set(it); + else + it->task_pos = next; + } else { + /* called from start, proceed to the first cset */ css_task_iter_advance_css_set(it); - else - it->task_pos = next; + } /* if PROCS, skip over tasks which aren't group leaders */ if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos && @@ -4133,7 +4138,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags, it->cset_head = it->cset_pos; - css_task_iter_advance_css_set(it); + css_task_iter_advance(it); spin_unlock_irq(&css_set_lock); } From 4aac41de7860516799c11b8f2e7d47963ac59a1a Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Wed, 19 Dec 2018 12:11:03 +0800 Subject: [PATCH 1248/2608] cdc-acm: fix abnormal DATA RX issue for Mediatek Preloader. commit eafb27fa5283599ce6c5492ea18cf636a28222bb upstream. Mediatek Preloader is a proprietary embedded boot loader for loading Little Kernel and Linux into device DRAM. This boot loader also handle firmware update. Mediatek Preloader will be enumerated as a virtual COM port when the device is connected to Windows or Linux OS via CDC-ACM class driver. When the USB enumeration has been done, Mediatek Preloader will send out handshake command "READY" to PC actively instead of waiting command from the download tool. Since Linux 4.12, the commit "tty: reset termios state on device registration" (93857edd9829e144acb6c7e72d593f6e01aead66) causes Mediatek Preloader receiving some abnoraml command like "READYXX" as it sent. This will be recognized as an incorrect response. The behavior change also causes the download handshake fail. This change only affects subsequent connects if the reconnected device happens to get the same minor number. By disabling the ECHO termios flag could avoid this problem. However, it cannot be done by user space configuration when download tool open /dev/ttyACM0. This is because the device running Mediatek Preloader will send handshake command "READY" immediately once the CDC-ACM driver is ready. This patch wants to fix above problem by introducing "DISABLE_ECHO" property in driver_info. When Mediatek Preloader is connected, the CDC-ACM driver could disable ECHO flag in termios to avoid the problem. Signed-off-by: Macpaul Lin Cc: stable@vger.kernel.org Reviewed-by: Johan Hovold Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 10 ++++++++++ drivers/usb/class/cdc-acm.h | 1 + 2 files changed, 11 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 5a8ef83a5c5c..423a339e53bc 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -593,6 +593,13 @@ static int acm_tty_install(struct tty_driver *driver, struct tty_struct *tty) if (retval) goto error_init_termios; + /* + * Suppress initial echoing for some devices which might send data + * immediately after acm driver has been installed. + */ + if (acm->quirks & DISABLE_ECHO) + tty->termios.c_lflag &= ~ECHO; + tty->driver_data = acm; return 0; @@ -1685,6 +1692,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@gmail.com */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, + { USB_DEVICE(0x0e8d, 0x2000), /* MediaTek Inc Preloader */ + .driver_info = DISABLE_ECHO, /* DISABLE ECHO in termios flag */ + }, { USB_DEVICE(0x0e8d, 0x3329), /* MediaTek Inc GPS */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index ca06b20d7af9..515aad0847ee 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -140,3 +140,4 @@ struct acm { #define QUIRK_CONTROL_LINE_STATE BIT(6) #define CLEAR_HALT_CONDITIONS BIT(7) #define SEND_ZERO_PACKET BIT(8) +#define DISABLE_ECHO BIT(9) From 20373d980e4543e524501d3c9d19ee717f5d2ebb Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 28 Nov 2018 14:54:28 +0000 Subject: [PATCH 1249/2608] Btrfs: fix fsync of files with multiple hard links in new directories commit 41bd60676923822de1df2c50b3f9a10171f4338a upstream. The log tree has a long standing problem that when a file is fsync'ed we only check for new ancestors, created in the current transaction, by following only the hard link for which the fsync was issued. We follow the ancestors using the VFS' dget_parent() API. This means that if we create a new link for a file in a directory that is new (or in an any other new ancestor directory) and then fsync the file using an old hard link, we end up not logging the new ancestor, and on log replay that new hard link and ancestor do not exist. In some cases, involving renames, the file will not exist at all. Example: mkfs.btrfs -f /dev/sdb mount /dev/sdb /mnt mkdir /mnt/A touch /mnt/foo ln /mnt/foo /mnt/A/bar xfs_io -c fsync /mnt/foo In this example after log replay only the hard link named 'foo' exists and directory A does not exist, which is unexpected. In other major linux filesystems, such as ext4, xfs and f2fs for example, both hard links exist and so does directory A after mounting again the filesystem. Checking if any new ancestors are new and need to be logged was added in 2009 by commit 12fcfd22fe5b ("Btrfs: tree logging unlink/rename fixes"), however only for the ancestors of the hard link (dentry) for which the fsync was issued, instead of checking for all ancestors for all of the inode's hard links. So fix this by tracking the id of the last transaction where a hard link was created for an inode and then on fsync fallback to a full transaction commit when an inode has more than one hard link and at least one new hard link was created in the current transaction. This is the simplest solution since this is not a common use case (adding frequently hard links for which there's an ancestor created in the current transaction and then fsync the file). In case it ever becomes a common use case, a solution that consists of iterating the fs/subvol btree for each hard link and check if any ancestor is new, could be implemented. This solves many unexpected scenarios reported by Jayashree Mohan and Vijay Chidambaram, and for which there is a new test case for fstests under review. Fixes: 12fcfd22fe5b ("Btrfs: tree logging unlink/rename fixes") CC: stable@vger.kernel.org # 4.4+ Reported-by: Vijay Chidambaram Reported-by: Jayashree Mohan Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/btrfs_inode.h | 6 ++++++ fs/btrfs/inode.c | 17 +++++++++++++++++ fs/btrfs/tree-log.c | 16 ++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index eccadb5f62a5..0f77ef303ae7 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -160,6 +160,12 @@ struct btrfs_inode { */ u64 last_unlink_trans; + /* + * Track the transaction id of the last transaction used to create a + * hard link for the inode. This is used by the log tree (fsync). + */ + u64 last_link_trans; + /* * Number of bytes outstanding that are going to need csums. This is * used in ENOSPC accounting. diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1c340d6c8568..09829e8d759e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3897,6 +3897,21 @@ cache_index: * inode is not a directory, logging its parent unnecessarily. */ BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans; + /* + * Similar reasoning for last_link_trans, needs to be set otherwise + * for a case like the following: + * + * mkdir A + * touch foo + * ln foo A/bar + * echo 2 > /proc/sys/vm/drop_caches + * fsync foo + * + * + * Would result in link bar and directory A not existing after the power + * failure. + */ + BTRFS_I(inode)->last_link_trans = BTRFS_I(inode)->last_trans; path->slots[0]++; if (inode->i_nlink != 1 || @@ -6813,6 +6828,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (err) goto fail; } + BTRFS_I(inode)->last_link_trans = trans->transid; d_instantiate(dentry, inode); btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent); } @@ -9540,6 +9556,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->index_cnt = (u64)-1; ei->dir_index = 0; ei->last_unlink_trans = 0; + ei->last_link_trans = 0; ei->last_log_commit = 0; ei->delayed_iput_count = 0; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2109db196449..179a383a4aaa 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5795,6 +5795,22 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, goto end_trans; } + /* + * If a new hard link was added to the inode in the current transaction + * and its link count is now greater than 1, we need to fallback to a + * transaction commit, otherwise we can end up not logging all its new + * parents for all the hard links. Here just from the dentry used to + * fsync, we can not visit the ancestor inodes for all the other hard + * links to figure out if any is new, so we fallback to a transaction + * commit (instead of adding a lot of complexity of scanning a btree, + * since this scenario is not a common use case). + */ + if (inode->vfs_inode.i_nlink > 1 && + inode->last_link_trans > last_committed) { + ret = -EMLINK; + goto end_trans; + } + while (1) { if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) break; From 5e614e212a6359af78b6034ceb12c56f71d5b423 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 2 Jan 2019 18:42:00 -0200 Subject: [PATCH 1250/2608] netfilter: xt_connlimit: don't store address in the conn nodes commit ce49480dba8666cba0106e8e31a942c9ce4c438a upstream. Only stored, never read. This is a leftover from commit 7d08487777c8 ("netfilter: connlimit: use rbtree for per-host conntrack obj storage"), which added the rbtree node struct that stores the address instead. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso [mfo: backport: refresh context lines and use older symbol/file names: - nf_conncount.c -> xt_connlimit.c. - nf_conncount_rb -> xt_connlimit_rb - nf_conncount_tuple -> xt_connlimit_conn - additionally, remove the add_hlist() 'addr' parameter that isn't used and removed later upstream with commit 625c556118f3 ("netfilter: connlimit: split xt_connlimit into front and backend") in the rename from 'xt_connlimit.c' to 'nf_conncount.c', a big refactor, so do it here, while still here in this related patch.] Signed-off-by: Mauricio Faria de Oliveira Signed-off-by: Sasha Levin --- net/netfilter/xt_connlimit.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index ffa8eec980e9..79d41515dd2c 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -46,7 +46,6 @@ struct xt_connlimit_conn { struct hlist_node node; struct nf_conntrack_tuple tuple; - union nf_inet_addr addr; }; struct xt_connlimit_rb { @@ -116,8 +115,7 @@ same_source_net(const union nf_inet_addr *addr, } static bool add_hlist(struct hlist_head *head, - const struct nf_conntrack_tuple *tuple, - const union nf_inet_addr *addr) + const struct nf_conntrack_tuple *tuple) { struct xt_connlimit_conn *conn; @@ -125,7 +123,6 @@ static bool add_hlist(struct hlist_head *head, if (conn == NULL) return false; conn->tuple = *tuple; - conn->addr = *addr; hlist_add_head(&conn->node, head); return true; } @@ -231,7 +228,7 @@ count_tree(struct net *net, struct rb_root *root, if (!addit) return count; - if (!add_hlist(&rbconn->hhead, tuple, addr)) + if (!add_hlist(&rbconn->hhead, tuple)) return 0; /* hotdrop */ return count + 1; @@ -270,7 +267,6 @@ count_tree(struct net *net, struct rb_root *root, } conn->tuple = *tuple; - conn->addr = *addr; rbconn->addr = *addr; INIT_HLIST_HEAD(&rbconn->hhead); From 15ee3595d2ac6577f179dc688137d60ee92c0984 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 2 Jan 2019 18:42:01 -0200 Subject: [PATCH 1251/2608] netfilter: nf_conncount: expose connection list interface commit 5e5cbc7b23eaf13e18652c03efbad5be6995de6a upstream. This patch provides an interface to maintain the list of connections and the lookup function to obtain the number of connections in the list. Signed-off-by: Pablo Neira Ayuso [mfo: backport: refresh context lines and use older symbol/file names: - nf_conntrack_count.h: new file, add include guards. - nf_conncount.c -> xt_connlimit.c. - nf_conncount_rb -> xt_connlimit_rb - nf_conncount_tuple -> xt_connlimit_conn - conncount_rb_cachep -> connlimit_rb_cachep - conncount_conn_cachep -> connlimit_conn_cachep] Signed-off-by: Mauricio Faria de Oliveira Signed-off-by: Sasha Levin --- include/net/netfilter/nf_conntrack_count.h | 14 +++++++++ net/netfilter/xt_connlimit.c | 36 ++++++++++++++-------- 2 files changed, 37 insertions(+), 13 deletions(-) create mode 100644 include/net/netfilter/nf_conntrack_count.h diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h new file mode 100644 index 000000000000..54e43b8a8da1 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_count.h @@ -0,0 +1,14 @@ +#ifndef _NF_CONNTRACK_COUNT_H +#define _NF_CONNTRACK_COUNT_H + +unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone, + bool *addit); + +bool nf_conncount_add(struct hlist_head *head, + const struct nf_conntrack_tuple *tuple); + +void nf_conncount_cache_free(struct hlist_head *hhead); + +#endif diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 79d41515dd2c..7af58750ab49 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -114,7 +114,7 @@ same_source_net(const union nf_inet_addr *addr, } } -static bool add_hlist(struct hlist_head *head, +bool nf_conncount_add(struct hlist_head *head, const struct nf_conntrack_tuple *tuple) { struct xt_connlimit_conn *conn; @@ -126,12 +126,12 @@ static bool add_hlist(struct hlist_head *head, hlist_add_head(&conn->node, head); return true; } +EXPORT_SYMBOL_GPL(nf_conncount_add); -static unsigned int check_hlist(struct net *net, - struct hlist_head *head, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone, - bool *addit) +unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone, + bool *addit) { const struct nf_conntrack_tuple_hash *found; struct xt_connlimit_conn *conn; @@ -176,6 +176,7 @@ static unsigned int check_hlist(struct net *net, return length; } +EXPORT_SYMBOL_GPL(nf_conncount_lookup); static void tree_nodes_free(struct rb_root *root, struct xt_connlimit_rb *gc_nodes[], @@ -222,13 +223,15 @@ count_tree(struct net *net, struct rb_root *root, } else { /* same source network -> be counted! */ unsigned int count; - count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit); + + count = nf_conncount_lookup(net, &rbconn->hhead, tuple, + zone, &addit); tree_nodes_free(root, gc_nodes, gc_count); if (!addit) return count; - if (!add_hlist(&rbconn->hhead, tuple)) + if (!nf_conncount_add(&rbconn->hhead, tuple)) return 0; /* hotdrop */ return count + 1; @@ -238,7 +241,7 @@ count_tree(struct net *net, struct rb_root *root, continue; /* only used for GC on hhead, retval and 'addit' ignored */ - check_hlist(net, &rbconn->hhead, tuple, zone, &addit); + nf_conncount_lookup(net, &rbconn->hhead, tuple, zone, &addit); if (hlist_empty(&rbconn->hhead)) gc_nodes[gc_count++] = rbconn; } @@ -378,11 +381,19 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) return 0; } -static void destroy_tree(struct rb_root *r) +void nf_conncount_cache_free(struct hlist_head *hhead) { struct xt_connlimit_conn *conn; - struct xt_connlimit_rb *rbconn; struct hlist_node *n; + + hlist_for_each_entry_safe(conn, n, hhead, node) + kmem_cache_free(connlimit_conn_cachep, conn); +} +EXPORT_SYMBOL_GPL(nf_conncount_cache_free); + +static void destroy_tree(struct rb_root *r) +{ + struct xt_connlimit_rb *rbconn; struct rb_node *node; while ((node = rb_first(r)) != NULL) { @@ -390,8 +401,7 @@ static void destroy_tree(struct rb_root *r) rb_erase(node, r); - hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node) - kmem_cache_free(connlimit_conn_cachep, conn); + nf_conncount_cache_free(&rbconn->hhead); kmem_cache_free(connlimit_rb_cachep, rbconn); } From 525e1dffed8711973f77412729621098a95238e5 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Wed, 2 Jan 2019 18:42:02 -0200 Subject: [PATCH 1252/2608] netfilter: nf_conncount: Fix garbage collection with zones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 21ba8847f857028dc83a0f341e16ecc616e34740 upstream. Currently, we use check_hlist() for garbage colleciton. However, we use the ‘zone’ from the counted entry to query the existence of existing entries in the hlist. This could be wrong when they are in different zones, and this patch fixes this issue. Fixes: e59ea3df3fc2 ("netfilter: xt_connlimit: honor conntrack zone if available") Signed-off-by: Yi-Hung Wei Signed-off-by: Pablo Neira Ayuso [mfo: backport: refresh context lines and use older symbol/file names, note hunk 5: - nf_conncount.c -> xt_connlimit.c - nf_conncount_rb -> xt_connlimit_rb - nf_conncount_tuple -> xt_connlimit_conn - hunk 5: remove check for non-NULL 'tuple', that isn't required as it's introduced by upstream commit 35d8deb80 ("netfilter: conncount: Support count only use case") which addresses nf_conncount_count() that does not exist yet -- it's introduced by upstream commit 625c556118f3 ("netfilter: connlimit: split xt_connlimit into front and backend"), a refactor change. - nft_connlimit.c -> removed, not used/doesn't exist yet.] Signed-off-by: Mauricio Faria de Oliveira Signed-off-by: Sasha Levin --- include/net/netfilter/nf_conntrack_count.h | 3 ++- net/netfilter/xt_connlimit.c | 13 +++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index 54e43b8a8da1..4b71a2f4c351 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -7,7 +7,8 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, bool *addit); bool nf_conncount_add(struct hlist_head *head, - const struct nf_conntrack_tuple *tuple); + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone); void nf_conncount_cache_free(struct hlist_head *hhead); diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 7af58750ab49..ab1f849464fa 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -46,6 +46,7 @@ struct xt_connlimit_conn { struct hlist_node node; struct nf_conntrack_tuple tuple; + struct nf_conntrack_zone zone; }; struct xt_connlimit_rb { @@ -115,7 +116,8 @@ same_source_net(const union nf_inet_addr *addr, } bool nf_conncount_add(struct hlist_head *head, - const struct nf_conntrack_tuple *tuple) + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone) { struct xt_connlimit_conn *conn; @@ -123,6 +125,7 @@ bool nf_conncount_add(struct hlist_head *head, if (conn == NULL) return false; conn->tuple = *tuple; + conn->zone = *zone; hlist_add_head(&conn->node, head); return true; } @@ -143,7 +146,7 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, /* check the saved connections */ hlist_for_each_entry_safe(conn, n, head, node) { - found = nf_conntrack_find_get(net, zone, &conn->tuple); + found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); if (found == NULL) { hlist_del(&conn->node); kmem_cache_free(connlimit_conn_cachep, conn); @@ -152,7 +155,8 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, found_ct = nf_ct_tuplehash_to_ctrack(found); - if (nf_ct_tuple_equal(&conn->tuple, tuple)) { + if (nf_ct_tuple_equal(&conn->tuple, tuple) && + nf_ct_zone_equal(found_ct, zone, zone->dir)) { /* * Just to be sure we have it only once in the list. * We should not see tuples twice unless someone hooks @@ -231,7 +235,7 @@ count_tree(struct net *net, struct rb_root *root, if (!addit) return count; - if (!nf_conncount_add(&rbconn->hhead, tuple)) + if (!nf_conncount_add(&rbconn->hhead, tuple, zone)) return 0; /* hotdrop */ return count + 1; @@ -270,6 +274,7 @@ count_tree(struct net *net, struct rb_root *root, } conn->tuple = *tuple; + conn->zone = *zone; rbconn->addr = *addr; INIT_HLIST_HEAD(&rbconn->hhead); From 75af3d78168e654a5cd8bbc4c774f97be836165f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 2 Jan 2019 18:42:03 -0200 Subject: [PATCH 1253/2608] netfilter: nf_conncount: fix garbage collection confirm race commit b36e4523d4d56e2595e28f16f6ccf1cd6a9fc452 upstream. Yi-Hung Wei and Justin Pettit found a race in the garbage collection scheme used by nf_conncount. When doing list walk, we lookup the tuple in the conntrack table. If the lookup fails we remove this tuple from our list because the conntrack entry is gone. This is the common cause, but turns out its not the only one. The list entry could have been created just before by another cpu, i.e. the conntrack entry might not yet have been inserted into the global hash. The avoid this, we introduce a timestamp and the owning cpu. If the entry appears to be stale, evict only if: 1. The current cpu is the one that added the entry, or, 2. The timestamp is older than two jiffies The second constraint allows GC to be taken over by other cpu too (e.g. because a cpu was offlined or napi got moved to another cpu). We can't pretend the 'doubtful' entry wasn't in our list. Instead, when we don't find an entry indicate via IS_ERR that entry was removed ('did not exist' or withheld ('might-be-unconfirmed'). This most likely also fixes a xt_connlimit imbalance earlier reported by Dmitry Andrianov. Cc: Dmitry Andrianov Reported-by: Justin Pettit Reported-by: Yi-Hung Wei Signed-off-by: Florian Westphal Acked-by: Yi-Hung Wei Signed-off-by: Pablo Neira Ayuso [mfo: backport: refresh context lines and use older symbol/file names: - nf_conncount.c -> xt_connlimit.c. - nf_conncount_rb -> xt_connlimit_rb - nf_conncount_tuple -> xt_connlimit_conn - conncount_conn_cachep -> connlimit_conn_cachep] Signed-off-by: Mauricio Faria de Oliveira Signed-off-by: Sasha Levin --- net/netfilter/xt_connlimit.c | 52 ++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index ab1f849464fa..913b86ef3a8d 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -47,6 +47,8 @@ struct xt_connlimit_conn { struct hlist_node node; struct nf_conntrack_tuple tuple; struct nf_conntrack_zone zone; + int cpu; + u32 jiffies32; }; struct xt_connlimit_rb { @@ -126,11 +128,42 @@ bool nf_conncount_add(struct hlist_head *head, return false; conn->tuple = *tuple; conn->zone = *zone; + conn->cpu = raw_smp_processor_id(); + conn->jiffies32 = (u32)jiffies; hlist_add_head(&conn->node, head); return true; } EXPORT_SYMBOL_GPL(nf_conncount_add); +static const struct nf_conntrack_tuple_hash * +find_or_evict(struct net *net, struct xt_connlimit_conn *conn) +{ + const struct nf_conntrack_tuple_hash *found; + unsigned long a, b; + int cpu = raw_smp_processor_id(); + __s32 age; + + found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); + if (found) + return found; + b = conn->jiffies32; + a = (u32)jiffies; + + /* conn might have been added just before by another cpu and + * might still be unconfirmed. In this case, nf_conntrack_find() + * returns no result. Thus only evict if this cpu added the + * stale entry or if the entry is older than two jiffies. + */ + age = a - b; + if (conn->cpu == cpu || age >= 2) { + hlist_del(&conn->node); + kmem_cache_free(connlimit_conn_cachep, conn); + return ERR_PTR(-ENOENT); + } + + return ERR_PTR(-EAGAIN); +} + unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone, @@ -138,18 +171,27 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, { const struct nf_conntrack_tuple_hash *found; struct xt_connlimit_conn *conn; - struct hlist_node *n; struct nf_conn *found_ct; + struct hlist_node *n; unsigned int length = 0; *addit = true; /* check the saved connections */ hlist_for_each_entry_safe(conn, n, head, node) { - found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); - if (found == NULL) { - hlist_del(&conn->node); - kmem_cache_free(connlimit_conn_cachep, conn); + found = find_or_evict(net, conn); + if (IS_ERR(found)) { + /* Not found, but might be about to be confirmed */ + if (PTR_ERR(found) == -EAGAIN) { + length++; + if (!tuple) + continue; + + if (nf_ct_tuple_equal(&conn->tuple, tuple) && + nf_ct_zone_id(&conn->zone, conn->zone.dir) == + nf_ct_zone_id(zone, zone->dir)) + *addit = false; + } continue; } From 052ccb86b31c184d8ed7d76d1c06d7ca0b8fd32e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 2 Jan 2019 18:42:04 -0200 Subject: [PATCH 1254/2608] netfilter: nf_conncount: don't skip eviction when age is negative commit 4cd273bb91b3001f623f516ec726c49754571b1a upstream. (not in Linus's tree now, but in nf.git + linux-next.git already.) age is signed integer, so result can be negative when the timestamps have a large delta. In this case we want to discard the entry. Instead of using age >= 2 || age < 0, just make it unsigned. Fixes: b36e4523d4d56 ("netfilter: nf_conncount: fix garbage collection confirm race") Reviewed-by: Shawn Bohrer Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso [mfo: backport: use older file name, nf_conncount.c -> xt_connlimit.c] Signed-off-by: Mauricio Faria de Oliveira Signed-off-by: Sasha Levin --- net/netfilter/xt_connlimit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 913b86ef3a8d..b1646c24a632 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -141,7 +141,7 @@ find_or_evict(struct net *net, struct xt_connlimit_conn *conn) const struct nf_conntrack_tuple_hash *found; unsigned long a, b; int cpu = raw_smp_processor_id(); - __s32 age; + u32 age; found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); if (found) From c5e4022f25d05429c53cafe84701b5136e628663 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 22 Dec 2018 11:22:26 +0100 Subject: [PATCH 1255/2608] f2fs: fix validation of the block count in sanity_check_raw_super commit 88960068f25fcc3759455d85460234dcc9d43fef upstream. Treat "block_count" from struct f2fs_super_block as 64-bit little endian value in sanity_check_raw_super() because struct f2fs_super_block declares "block_count" as "__le64". This fixes a bug where the superblock validation fails on big endian devices with the following error: F2FS-fs (sda1): Wrong segment_count / block_count (61439 > 0) F2FS-fs (sda1): Can't find valid F2FS filesystem in 1th superblock F2FS-fs (sda1): Wrong segment_count / block_count (61439 > 0) F2FS-fs (sda1): Can't find valid F2FS filesystem in 2th superblock As result of this the partition cannot be mounted. With this patch applied the superblock validation works fine and the partition can be mounted again: F2FS-fs (sda1): Mounted with checkpoint version = 7c84 My little endian x86-64 hardware was able to mount the partition without this fix. To confirm that mounting f2fs filesystems works on big endian machines again I tested this on a 32-bit MIPS big endian (lantiq) device. Fixes: 0cfe75c5b01199 ("f2fs: enhance sanity_check_raw_super() to avoid potential overflows") Cc: stable@vger.kernel.org Signed-off-by: Martin Blumenstingl Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index de4de4ebe64c..fc5c41257e68 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1897,10 +1897,10 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return 1; } - if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) { + if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) { f2fs_msg(sb, KERN_INFO, - "Wrong segment_count / block_count (%u > %u)", - segment_count, le32_to_cpu(raw_super->block_count)); + "Wrong segment_count / block_count (%u > %llu)", + segment_count, le64_to_cpu(raw_super->block_count)); return 1; } From 5110d0b4c2319e9cc300271525b197af201df2bf Mon Sep 17 00:00:00 2001 From: Nava kishore Manne Date: Tue, 18 Dec 2018 13:18:42 +0100 Subject: [PATCH 1256/2608] serial: uartps: Fix interrupt mask issue to handle the RX interrupts properly commit 260683137ab5276113fc322fdbbc578024185fee upstream. This patch Correct the RX interrupt mask value to handle the RX interrupts properly. Fixes: c8dbdc842d30 ("serial: xuartps: Rewrite the interrupt handling logic") Signed-off-by: Nava kishore Manne Cc: stable Signed-off-by: Michal Simek Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/xilinx_uartps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index 21c35ad72b99..897b1c515d00 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -130,7 +130,7 @@ MODULE_PARM_DESC(rx_timeout, "Rx timeout, 1-255"); #define CDNS_UART_IXR_RXTRIG 0x00000001 /* RX FIFO trigger interrupt */ #define CDNS_UART_IXR_RXFULL 0x00000004 /* RX FIFO full interrupt. */ #define CDNS_UART_IXR_RXEMPTY 0x00000002 /* RX FIFO empty interrupt. */ -#define CDNS_UART_IXR_MASK 0x00001FFF /* Valid bit mask */ +#define CDNS_UART_IXR_RXMASK 0x000021e7 /* Valid RX bit mask */ /* * Do not enable parity error interrupt for the following @@ -366,7 +366,7 @@ static irqreturn_t cdns_uart_isr(int irq, void *dev_id) cdns_uart_handle_tx(dev_id); isrstatus &= ~CDNS_UART_IXR_TXEMPTY; } - if (isrstatus & CDNS_UART_IXR_MASK) + if (isrstatus & CDNS_UART_IXR_RXMASK) cdns_uart_handle_rx(dev_id, isrstatus); spin_unlock(&port->lock); From cd1f0770d2772ebfab5678725a55b479465558a0 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 9 Nov 2018 08:37:44 -0500 Subject: [PATCH 1257/2608] media: vivid: free bitmap_cap when updating std/timings/etc. commit 560ccb75c2caa6b1039dec1a53cd2ef526f5bf03 upstream. When vivid_update_format_cap() is called it should free any overlay bitmap since the compose size will change. Signed-off-by: Hans Verkuil Reported-by: syzbot+0cc8e3cc63ca373722c6@syzkaller.appspotmail.com Cc: # for v3.18 and up Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/vivid/vivid-vid-cap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/platform/vivid/vivid-vid-cap.c b/drivers/media/platform/vivid/vivid-vid-cap.c index 01419455e545..a7a366093524 100644 --- a/drivers/media/platform/vivid/vivid-vid-cap.c +++ b/drivers/media/platform/vivid/vivid-vid-cap.c @@ -455,6 +455,8 @@ void vivid_update_format_cap(struct vivid_dev *dev, bool keep_controls) tpg_s_rgb_range(&dev->tpg, v4l2_ctrl_g_ctrl(dev->rgb_range_cap)); break; } + vfree(dev->bitmap_cap); + dev->bitmap_cap = NULL; vivid_update_quality(dev); tpg_reset_source(&dev->tpg, dev->src_rect.width, dev->src_rect.height, dev->field_cap); dev->crop_cap = dev->src_rect; From 35323cb2ae4e6a83bcb961e3744a9278806854e5 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 8 Nov 2018 11:12:47 -0500 Subject: [PATCH 1258/2608] media: v4l2-tpg: array index could become negative commit e5f71a27fa12c1a1b02ad478a568e76260f1815e upstream. text[s] is a signed char, so using that as index into the font8x16 array can result in negative indices. Cast it to u8 to be safe. Signed-off-by: Hans Verkuil Reported-by: syzbot+ccf0a61ed12f2a7313ee@syzkaller.appspotmail.com Cc: # for v4.7 and up Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/common/v4l2-tpg/v4l2-tpg-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c index a1aacd6fb96f..664c4b8b1a6d 100644 --- a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c +++ b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c @@ -1733,7 +1733,7 @@ typedef struct { u16 __; u8 _; } __packed x24; unsigned s; \ \ for (s = 0; s < len; s++) { \ - u8 chr = font8x16[text[s] * 16 + line]; \ + u8 chr = font8x16[(u8)text[s] * 16 + line]; \ \ if (hdiv == 2 && tpg->hflip) { \ pos[3] = (chr & (0x01 << 6) ? fg : bg); \ From 2713b8fd7ef6e8fc5048eee0837b837c7bd1adbc Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 20 Dec 2018 17:45:43 +0000 Subject: [PATCH 1259/2608] MIPS: math-emu: Write-protect delay slot emulation pages commit adcc81f148d733b7e8e641300c5590a2cdc13bf3 upstream. Mapping the delay slot emulation page as both writeable & executable presents a security risk, in that if an exploit can write to & jump into the page then it can be used as an easy way to execute arbitrary code. Prevent this by mapping the page read-only for userland, and using access_process_vm() with the FOLL_FORCE flag to write to it from mips_dsemul(). This will likely be less efficient due to copy_to_user_page() performing cache maintenance on a whole page, rather than a single line as in the previous use of flush_cache_sigtramp(). However this delay slot emulation code ought not to be running in any performance critical paths anyway so this isn't really a problem, and we can probably do better in copy_to_user_page() anyway in future. A major advantage of this approach is that the fix is small & simple to backport to stable kernels. Reported-by: Andy Lutomirski Signed-off-by: Paul Burton Fixes: 432c6bacbd0c ("MIPS: Use per-mm page to execute branch delay slot instructions") Cc: stable@vger.kernel.org # v4.8+ Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Rich Felker Cc: David Daney Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/vdso.c | 4 ++-- arch/mips/math-emu/dsemul.c | 38 +++++++++++++++++++------------------ 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index 48a9c6b90e07..9df3ebdc7b0f 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -126,8 +126,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) /* Map delay slot emulation page */ base = mmap_region(NULL, STACK_TOP, PAGE_SIZE, - VM_READ|VM_WRITE|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + VM_READ | VM_EXEC | + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, 0, NULL); if (IS_ERR_VALUE(base)) { ret = base; diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c index 5450f4d1c920..e2d46cb93ca9 100644 --- a/arch/mips/math-emu/dsemul.c +++ b/arch/mips/math-emu/dsemul.c @@ -214,8 +214,9 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, { int isa16 = get_isa16_mode(regs->cp0_epc); mips_instruction break_math; - struct emuframe __user *fr; - int err, fr_idx; + unsigned long fr_uaddr; + struct emuframe fr; + int fr_idx, ret; /* NOP is easy */ if (ir == 0) @@ -250,27 +251,31 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, fr_idx = alloc_emuframe(); if (fr_idx == BD_EMUFRAME_NONE) return SIGBUS; - fr = &dsemul_page()[fr_idx]; /* Retrieve the appropriately encoded break instruction */ break_math = BREAK_MATH(isa16); /* Write the instructions to the frame */ if (isa16) { - err = __put_user(ir >> 16, - (u16 __user *)(&fr->emul)); - err |= __put_user(ir & 0xffff, - (u16 __user *)((long)(&fr->emul) + 2)); - err |= __put_user(break_math >> 16, - (u16 __user *)(&fr->badinst)); - err |= __put_user(break_math & 0xffff, - (u16 __user *)((long)(&fr->badinst) + 2)); + union mips_instruction _emul = { + .halfword = { ir >> 16, ir } + }; + union mips_instruction _badinst = { + .halfword = { break_math >> 16, break_math } + }; + + fr.emul = _emul.word; + fr.badinst = _badinst.word; } else { - err = __put_user(ir, &fr->emul); - err |= __put_user(break_math, &fr->badinst); + fr.emul = ir; + fr.badinst = break_math; } - if (unlikely(err)) { + /* Write the frame to user memory */ + fr_uaddr = (unsigned long)&dsemul_page()[fr_idx]; + ret = access_process_vm(current, fr_uaddr, &fr, sizeof(fr), + FOLL_FORCE | FOLL_WRITE); + if (unlikely(ret != sizeof(fr))) { MIPS_FPU_EMU_INC_STATS(errors); free_emuframe(fr_idx, current->mm); return SIGBUS; @@ -282,10 +287,7 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, atomic_set(¤t->thread.bd_emu_frame, fr_idx); /* Change user register context to execute the frame */ - regs->cp0_epc = (unsigned long)&fr->emul | isa16; - - /* Ensure the icache observes our newly written frame */ - flush_cache_sigtramp((unsigned long)&fr->emul); + regs->cp0_epc = fr_uaddr | isa16; return 0; } From 95168354276caa4f254a5d6829b5d2c10edbe677 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 15 Nov 2018 15:53:53 +0800 Subject: [PATCH 1260/2608] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3 commit bb53fdf395eed103f85061bfff3b116cee123895 upstream. For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can only flush Node-0's scache. So we add r4k_blast_scache_node() by using (CAC_BASE | (node_id << NODE_ADDRSPACE_SHIFT)) instead of CKSEG0 as the start address. Signed-off-by: Huacai Chen [paul.burton@mips.com: Include asm/mmzone.h from asm/r4kcache.h for nid_to_addrbase(). Add asm/mach-generic/mmzone.h to allow inclusion for all platforms.] Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/21129/ Cc: Ralf Baechle Cc: James Hogan Cc: Steven J . Hill Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: # 3.15+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/mach-generic/mmzone.h | 2 + .../mips/include/asm/mach-loongson64/mmzone.h | 1 + arch/mips/include/asm/mmzone.h | 8 ++++ arch/mips/include/asm/r4kcache.h | 22 ++++++++++ arch/mips/mm/c-r4k.c | 44 ++++++++++++++++--- 5 files changed, 70 insertions(+), 7 deletions(-) create mode 100644 arch/mips/include/asm/mach-generic/mmzone.h diff --git a/arch/mips/include/asm/mach-generic/mmzone.h b/arch/mips/include/asm/mach-generic/mmzone.h new file mode 100644 index 000000000000..826392f2ee32 --- /dev/null +++ b/arch/mips/include/asm/mach-generic/mmzone.h @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Intentionally empty */ diff --git a/arch/mips/include/asm/mach-loongson64/mmzone.h b/arch/mips/include/asm/mach-loongson64/mmzone.h index c9f7e231e66b..59c8b11c090e 100644 --- a/arch/mips/include/asm/mach-loongson64/mmzone.h +++ b/arch/mips/include/asm/mach-loongson64/mmzone.h @@ -21,6 +21,7 @@ #define NODE3_ADDRSPACE_OFFSET 0x300000000000UL #define pa_to_nid(addr) (((addr) & 0xf00000000000) >> NODE_ADDRSPACE_SHIFT) +#define nid_to_addrbase(nid) ((nid) << NODE_ADDRSPACE_SHIFT) #define LEVELS_PER_SLICE 128 diff --git a/arch/mips/include/asm/mmzone.h b/arch/mips/include/asm/mmzone.h index f085fba41da5..2a0fe1d0eac6 100644 --- a/arch/mips/include/asm/mmzone.h +++ b/arch/mips/include/asm/mmzone.h @@ -9,6 +9,14 @@ #include #include +#ifndef pa_to_nid +#define pa_to_nid(addr) 0 +#endif + +#ifndef nid_to_addrbase +#define nid_to_addrbase(nid) 0 +#endif + #ifdef CONFIG_DISCONTIGMEM #define pfn_to_nid(pfn) pa_to_nid((pfn) << PAGE_SHIFT) diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h index 7f12d7e27c94..e5190126080e 100644 --- a/arch/mips/include/asm/r4kcache.h +++ b/arch/mips/include/asm/r4kcache.h @@ -20,6 +20,7 @@ #include #include #include +#include #include /* for uaccess_kernel() */ extern void (*r4k_blast_dcache)(void); @@ -747,4 +748,25 @@ __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , ) __BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, , ) __BUILD_BLAST_CACHE_RANGE(inv_s, scache, Hit_Invalidate_SD, , ) +/* Currently, this is very specific to Loongson-3 */ +#define __BUILD_BLAST_CACHE_NODE(pfx, desc, indexop, hitop, lsize) \ +static inline void blast_##pfx##cache##lsize##_node(long node) \ +{ \ + unsigned long start = CAC_BASE | nid_to_addrbase(node); \ + unsigned long end = start + current_cpu_data.desc.waysize; \ + unsigned long ws_inc = 1UL << current_cpu_data.desc.waybit; \ + unsigned long ws_end = current_cpu_data.desc.ways << \ + current_cpu_data.desc.waybit; \ + unsigned long ws, addr; \ + \ + for (ws = 0; ws < ws_end; ws += ws_inc) \ + for (addr = start; addr < end; addr += lsize * 32) \ + cache##lsize##_unroll32(addr|ws, indexop); \ +} + +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16) +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32) +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64) +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128) + #endif /* _ASM_R4KCACHE_H */ diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index a5893b2cdc0e..bacd67f5d71d 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -459,11 +459,28 @@ static void r4k_blast_scache_setup(void) r4k_blast_scache = blast_scache128; } +static void (*r4k_blast_scache_node)(long node); + +static void r4k_blast_scache_node_setup(void) +{ + unsigned long sc_lsize = cpu_scache_line_size(); + + if (current_cpu_type() != CPU_LOONGSON3) + r4k_blast_scache_node = (void *)cache_noop; + else if (sc_lsize == 16) + r4k_blast_scache_node = blast_scache16_node; + else if (sc_lsize == 32) + r4k_blast_scache_node = blast_scache32_node; + else if (sc_lsize == 64) + r4k_blast_scache_node = blast_scache64_node; + else if (sc_lsize == 128) + r4k_blast_scache_node = blast_scache128_node; +} + static inline void local_r4k___flush_cache_all(void * args) { switch (current_cpu_type()) { case CPU_LOONGSON2: - case CPU_LOONGSON3: case CPU_R4000SC: case CPU_R4000MC: case CPU_R4400SC: @@ -480,6 +497,11 @@ static inline void local_r4k___flush_cache_all(void * args) r4k_blast_scache(); break; + case CPU_LOONGSON3: + /* Use get_ebase_cpunum() for both NUMA=y/n */ + r4k_blast_scache_node(get_ebase_cpunum() >> 2); + break; + case CPU_BMIPS5000: r4k_blast_scache(); __sync(); @@ -840,10 +862,14 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) preempt_disable(); if (cpu_has_inclusive_pcaches) { - if (size >= scache_size) - r4k_blast_scache(); - else + if (size >= scache_size) { + if (current_cpu_type() != CPU_LOONGSON3) + r4k_blast_scache(); + else + r4k_blast_scache_node(pa_to_nid(addr)); + } else { blast_scache_range(addr, addr + size); + } preempt_enable(); __sync(); return; @@ -877,9 +903,12 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) preempt_disable(); if (cpu_has_inclusive_pcaches) { - if (size >= scache_size) - r4k_blast_scache(); - else { + if (size >= scache_size) { + if (current_cpu_type() != CPU_LOONGSON3) + r4k_blast_scache(); + else + r4k_blast_scache_node(pa_to_nid(addr)); + } else { /* * There is no clearly documented alignment requirement * for the cache instruction on MIPS processors and @@ -1910,6 +1939,7 @@ void r4k_cache_init(void) r4k_blast_scache_page_setup(); r4k_blast_scache_page_indexed_setup(); r4k_blast_scache_setup(); + r4k_blast_scache_node_setup(); #ifdef CONFIG_EVA r4k_blast_dcache_user_page_setup(); r4k_blast_icache_user_page_setup(); From 5744be55fbb997d44da103c546bcc08972c09265 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 15 Nov 2018 15:53:54 +0800 Subject: [PATCH 1261/2608] MIPS: Ensure pmd_present() returns false after pmd_mknotpresent() commit 92aa0718c9fa5160ad2f0e7b5bffb52f1ea1e51a upstream. This patch is borrowed from ARM64 to ensure pmd_present() returns false after pmd_mknotpresent(). This is needed for THP. References: 5bb1cc0ff9a6 ("arm64: Ensure pmd_present() returns false after pmd_mknotpresent()") Reviewed-by: James Hogan Signed-off-by: Huacai Chen Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/21135/ Cc: Ralf Baechle Cc: James Hogan Cc: Steven J . Hill Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: # 3.8+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/pgtable-64.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 67fe6dc5211c..a2252c2a9ded 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -271,6 +271,11 @@ static inline int pmd_bad(pmd_t pmd) static inline int pmd_present(pmd_t pmd) { +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + if (unlikely(pmd_val(pmd) & _PAGE_HUGE)) + return pmd_val(pmd) & _PAGE_PRESENT; +#endif + return pmd_val(pmd) != (unsigned long) invalid_pte_table; } From 5647c1d5eda48cf1a96edba859b609b4b8c62427 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 15 Nov 2018 15:53:56 +0800 Subject: [PATCH 1262/2608] MIPS: Align kernel load address to 64KB commit bec0de4cfad21bd284dbddee016ed1767a5d2823 upstream. KEXEC needs the new kernel's load address to be aligned on a page boundary (see sanity_check_segment_list()), but on MIPS the default vmlinuz load address is only explicitly aligned to 16 bytes. Since the largest PAGE_SIZE supported by MIPS kernels is 64KB, increase the alignment calculated by calc_vmlinuz_load_addr to 64KB. Signed-off-by: Huacai Chen Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/21131/ Cc: Ralf Baechle Cc: James Hogan Cc: Steven J . Hill Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: # 2.6.36+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/boot/compressed/calc_vmlinuz_load_addr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c index 37fe58c19a90..542c3ede9722 100644 --- a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c +++ b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c @@ -13,6 +13,7 @@ #include #include #include +#include "../../../../include/linux/sizes.h" int main(int argc, char *argv[]) { @@ -45,11 +46,11 @@ int main(int argc, char *argv[]) vmlinuz_load_addr = vmlinux_load_addr + vmlinux_size; /* - * Align with 16 bytes: "greater than that used for any standard data - * types by a MIPS compiler." -- See MIPS Run Linux (Second Edition). + * Align with 64KB: KEXEC needs load sections to be aligned to PAGE_SIZE, + * which may be as large as 64KB depending on the kernel configuration. */ - vmlinuz_load_addr += (16 - vmlinux_size % 16); + vmlinuz_load_addr += (SZ_64K - vmlinux_size % SZ_64K); printf("0x%llx\n", vmlinuz_load_addr); From af8a41b9246c24b0fff385c9639f09dfa427bc10 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 4 Dec 2018 23:44:12 +0000 Subject: [PATCH 1263/2608] MIPS: Expand MIPS32 ASIDs to 64 bits commit ff4dd232ec45a0e45ea69f28f069f2ab22b4908a upstream. ASIDs have always been stored as unsigned longs, ie. 32 bits on MIPS32 kernels. This is problematic because it is feasible for the ASID version to overflow & wrap around to zero. We currently attempt to handle this overflow by simply setting the ASID version to 1, using asid_first_version(), but we make no attempt to account for the fact that there may be mm_structs with stale ASIDs that have versions which we now reuse due to the overflow & wrap around. Encountering this requires that: 1) A struct mm_struct X is active on CPU A using ASID (V,n). 2) That mm is not used on CPU A for the length of time that it takes for CPU A's asid_cache to overflow & wrap around to the same version V that the mm had in step 1. During this time tasks using the mm could either be sleeping or only scheduled on other CPUs. 3) Some other mm Y becomes active on CPU A and is allocated the same ASID (V,n). 4) mm X now becomes active on CPU A again, and now incorrectly has the same ASID as mm Y. Where struct mm_struct ASIDs are represented above in the format (version, EntryHi.ASID), and on a typical MIPS32 system version will be 24 bits wide & EntryHi.ASID will be 8 bits wide. The length of time required in step 2 is highly dependent upon the CPU & workload, but for a hypothetical 2GHz CPU running a workload which generates a new ASID every 10000 cycles this period is around 248 days. Due to this long period of time & the fact that tasks need to be scheduled in just the right (or wrong, depending upon your inclination) way, this is obviously a difficult bug to encounter but it's entirely possible as evidenced by reports. In order to fix this, simply extend ASIDs to 64 bits even on MIPS32 builds. This will extend the period of time required for the hypothetical system above to encounter the problem from 28 days to around 3 trillion years, which feels safely outside of the realms of possibility. The cost of this is slightly more generated code in some commonly executed paths, but this is pretty minimal: | Code Size Gain | Percentage -----------------------|----------------|------------- decstation_defconfig | +270 | +0.00% 32r2el_defconfig | +652 | +0.01% 32r6el_defconfig | +1000 | +0.01% I have been unable to measure any change in performance of the LMbench lat_ctx or lat_proc tests resulting from the 64b ASIDs on either 32r2el_defconfig+interAptiv or 32r6el_defconfig+I6500 systems. Signed-off-by: Paul Burton Suggested-by: James Hogan References: https://lore.kernel.org/linux-mips/80B78A8B8FEE6145A87579E8435D78C30205D5F3@fzex.ruijie.com.cn/ References: https://lore.kernel.org/linux-mips/1488684260-18867-1-git-send-email-jiwei.sun@windriver.com/ Cc: Jiwei Sun Cc: Yu Huabing Cc: stable@vger.kernel.org # 2.6.12+ Cc: linux-mips@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/cpu-info.h | 2 +- arch/mips/include/asm/mmu.h | 2 +- arch/mips/include/asm/mmu_context.h | 10 ++++------ arch/mips/mm/c-r3k.c | 2 +- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h index a41059d47d31..ed7ffe4e63a3 100644 --- a/arch/mips/include/asm/cpu-info.h +++ b/arch/mips/include/asm/cpu-info.h @@ -50,7 +50,7 @@ struct guest_info { #define MIPS_CACHE_PINDEX 0x00000020 /* Physically indexed cache */ struct cpuinfo_mips { - unsigned long asid_cache; + u64 asid_cache; #ifdef CONFIG_MIPS_ASID_BITS_VARIABLE unsigned long asid_mask; #endif diff --git a/arch/mips/include/asm/mmu.h b/arch/mips/include/asm/mmu.h index 0740be7d5d4a..24d6b42345fb 100644 --- a/arch/mips/include/asm/mmu.h +++ b/arch/mips/include/asm/mmu.h @@ -7,7 +7,7 @@ #include typedef struct { - unsigned long asid[NR_CPUS]; + u64 asid[NR_CPUS]; void *vdso; atomic_t fp_mode_switching; diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h index da2004cef2d5..47cc01d948df 100644 --- a/arch/mips/include/asm/mmu_context.h +++ b/arch/mips/include/asm/mmu_context.h @@ -75,14 +75,14 @@ extern unsigned long pgd_current[]; * All unused by hardware upper bits will be considered * as a software asid extension. */ -static unsigned long asid_version_mask(unsigned int cpu) +static inline u64 asid_version_mask(unsigned int cpu) { unsigned long asid_mask = cpu_asid_mask(&cpu_data[cpu]); - return ~(asid_mask | (asid_mask - 1)); + return ~(u64)(asid_mask | (asid_mask - 1)); } -static unsigned long asid_first_version(unsigned int cpu) +static inline u64 asid_first_version(unsigned int cpu) { return ~asid_version_mask(cpu) + 1; } @@ -101,14 +101,12 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) static inline void get_new_mmu_context(struct mm_struct *mm, unsigned long cpu) { - unsigned long asid = asid_cache(cpu); + u64 asid = asid_cache(cpu); if (!((asid += cpu_asid_inc()) & cpu_asid_mask(&cpu_data[cpu]))) { if (cpu_has_vtag_icache) flush_icache_all(); local_flush_tlb_all(); /* start new asid cycle */ - if (!asid) /* fix version if needed */ - asid = asid_first_version(cpu); } cpu_context(cpu, mm) = asid_cache(cpu) = asid; diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c index 3466fcdae0ca..01848cdf2074 100644 --- a/arch/mips/mm/c-r3k.c +++ b/arch/mips/mm/c-r3k.c @@ -245,7 +245,7 @@ static void r3k_flush_cache_page(struct vm_area_struct *vma, pmd_t *pmdp; pte_t *ptep; - pr_debug("cpage[%08lx,%08lx]\n", + pr_debug("cpage[%08llx,%08lx]\n", cpu_context(smp_processor_id(), mm), addr); /* No ASID => no such page in the cache. */ From ac40fe537c3b0c9d518466d5735ea17ea2d639ff Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 2 Jan 2019 20:43:01 +0200 Subject: [PATCH 1264/2608] MIPS: OCTEON: mark RGMII interface disabled on OCTEON III commit edefae94b7b9f10d5efe32dece5a36e9d9ecc29e upstream. Commit 885872b722b7 ("MIPS: Octeon: Add Octeon III CN7xxx interface detection") added RGMII interface detection for OCTEON III, but it results in the following logs: [ 7.165984] ERROR: Unsupported Octeon model in __cvmx_helper_rgmii_probe [ 7.173017] ERROR: Unsupported Octeon model in __cvmx_helper_rgmii_probe The current RGMII routines are valid only for older OCTEONS that use GMX/ASX hardware blocks. On later chips AGL should be used, but support for that is missing in the mainline. Until that is added, mark the interface as disabled. Fixes: 885872b722b7 ("MIPS: Octeon: Add Octeon III CN7xxx interface detection") Signed-off-by: Aaro Koskinen Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@vger.kernel.org Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/cavium-octeon/executive/cvmx-helper.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper.c b/arch/mips/cavium-octeon/executive/cvmx-helper.c index c683c369bca5..c376f17e142c 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-helper.c +++ b/arch/mips/cavium-octeon/executive/cvmx-helper.c @@ -286,7 +286,8 @@ static cvmx_helper_interface_mode_t __cvmx_get_mode_cn7xxx(int interface) case 3: return CVMX_HELPER_INTERFACE_MODE_LOOP; case 4: - return CVMX_HELPER_INTERFACE_MODE_RGMII; + /* TODO: Implement support for AGL (RGMII). */ + return CVMX_HELPER_INTERFACE_MODE_DISABLED; default: return CVMX_HELPER_INTERFACE_MODE_DISABLED; } From 8697c15f2d8bad27119e37269bd855e69f149634 Mon Sep 17 00:00:00 2001 From: Georgy A Bystrenin Date: Fri, 21 Dec 2018 00:11:42 -0600 Subject: [PATCH 1265/2608] CIFS: Fix error mapping for SMB2_LOCK command which caused OFD lock problem commit 9a596f5b39593414c0ec80f71b94a226286f084e upstream. While resolving a bug with locks on samba shares found a strange behavior. When a file locked by one node and we trying to lock it from another node it fail with errno 5 (EIO) but in that case errno must be set to (EACCES | EAGAIN). This isn't happening when we try to lock file second time on same node. In this case it returns EACCES as expected. Also this issue not reproduces when we use SMB1 protocol (vers=1.0 in mount options). Further investigation showed that the mapping from status_to_posix_error is different for SMB1 and SMB2+ implementations. For SMB1 mapping is [NT_STATUS_LOCK_NOT_GRANTED to ERRlock] (See fs/cifs/netmisc.c line 66) but for SMB2+ mapping is [STATUS_LOCK_NOT_GRANTED to -EIO] (see fs/cifs/smb2maperror.c line 383) Quick changes in SMB2+ mapping from EIO to EACCES has fixed issue. BUG: https://bugzilla.kernel.org/show_bug.cgi?id=201971 Signed-off-by: Georgy A Bystrenin Reviewed-by: Pavel Shilovsky CC: Stable Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2maperror.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 62c88dfed57b..d7e839cb773f 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -378,8 +378,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_NONEXISTENT_EA_ENTRY, -EIO, "STATUS_NONEXISTENT_EA_ENTRY"}, {STATUS_NO_EAS_ON_FILE, -ENODATA, "STATUS_NO_EAS_ON_FILE"}, {STATUS_EA_CORRUPT_ERROR, -EIO, "STATUS_EA_CORRUPT_ERROR"}, - {STATUS_FILE_LOCK_CONFLICT, -EIO, "STATUS_FILE_LOCK_CONFLICT"}, - {STATUS_LOCK_NOT_GRANTED, -EIO, "STATUS_LOCK_NOT_GRANTED"}, + {STATUS_FILE_LOCK_CONFLICT, -EACCES, "STATUS_FILE_LOCK_CONFLICT"}, + {STATUS_LOCK_NOT_GRANTED, -EACCES, "STATUS_LOCK_NOT_GRANTED"}, {STATUS_DELETE_PENDING, -ENOENT, "STATUS_DELETE_PENDING"}, {STATUS_CTL_FILE_NOT_SUPPORTED, -ENOSYS, "STATUS_CTL_FILE_NOT_SUPPORTED"}, From f6be406e60c57692df4d5c6d082de1f08a3dac73 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 13 Dec 2018 16:06:14 +0000 Subject: [PATCH 1266/2608] arm64: KVM: Avoid setting the upper 32 bits of VTCR_EL2 to 1 commit df655b75c43fba0f2621680ab261083297fd6d16 upstream. Although bit 31 of VTCR_EL2 is RES1, we inadvertently end up setting all of the upper 32 bits to 1 as well because we define VTCR_EL2_RES1 as signed, which is sign-extended when assigning to kvm->arch.vtcr. Lucky for us, the architecture currently treats these upper bits as RES0 so, whilst we've been naughty, we haven't set fire to anything yet. Cc: Cc: Marc Zyngier Cc: Christoffer Dall Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_arm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 555d463c0eaa..73cc4309fe01 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -99,7 +99,7 @@ TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK) /* VTCR_EL2 Registers bits */ -#define VTCR_EL2_RES1 (1 << 31) +#define VTCR_EL2_RES1 (1U << 31) #define VTCR_EL2_HD (1 << 22) #define VTCR_EL2_HA (1 << 21) #define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK From 83c2752a5602f68ffbb21ea25a72ec23a0a260ba Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 18 Dec 2018 14:59:09 +0000 Subject: [PATCH 1267/2608] arm/arm64: KVM: vgic: Force VM halt when changing the active state of GICv3 PPIs/SGIs commit 107352a24900fb458152b92a4e72fbdc83fd5510 upstream. We currently only halt the guest when a vCPU messes with the active state of an SPI. This is perfectly fine for GICv2, but isn't enough for GICv3, where all vCPUs can access the state of any other vCPU. Let's broaden the condition to include any GICv3 interrupt that has an active state (i.e. all but LPIs). Cc: stable@vger.kernel.org Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-mmio.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index b4c5baf4af45..36194c666814 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -241,14 +241,16 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, */ static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid) { - if (intid > VGIC_NR_PRIVATE_IRQS) + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 || + intid > VGIC_NR_PRIVATE_IRQS) kvm_arm_halt_guest(vcpu->kvm); } /* See vgic_change_active_prepare */ static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid) { - if (intid > VGIC_NR_PRIVATE_IRQS) + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 || + intid > VGIC_NR_PRIVATE_IRQS) kvm_arm_resume_guest(vcpu->kvm); } From 3f23e65d32b7029bf83e3e9a853b7d37c25c2551 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 7 Nov 2018 02:39:13 +0000 Subject: [PATCH 1268/2608] rtc: m41t80: Correct alarm month range with RTC reads commit 3cc9ffbb1f51eb4320575a48e4805a8f52e0e26b upstream. Add the missing adjustment of the month range on alarm reads from the RTC, correcting an issue coming from commit 9c6dfed92c3e ("rtc: m41t80: add alarm functionality"). The range is 1-12 for hardware and 0-11 for `struct rtc_time', and is already correctly handled on alarm writes to the RTC. It was correct up until commit 48e9766726eb ("drivers/rtc/rtc-m41t80.c: remove disabled alarm functionality") too, which removed the previous implementation of alarm support. Signed-off-by: Maciej W. Rozycki Fixes: 9c6dfed92c3e ("rtc: m41t80: add alarm functionality") References: 48e9766726eb ("drivers/rtc/rtc-m41t80.c: remove disabled alarm functionality") Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-m41t80.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index 6620016869cf..407aa1ea2421 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -404,7 +404,7 @@ static int m41t80_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) alrm->time.tm_min = bcd2bin(alarmvals[3] & 0x7f); alrm->time.tm_hour = bcd2bin(alarmvals[2] & 0x3f); alrm->time.tm_mday = bcd2bin(alarmvals[1] & 0x3f); - alrm->time.tm_mon = bcd2bin(alarmvals[0] & 0x3f); + alrm->time.tm_mon = bcd2bin(alarmvals[0] & 0x3f) - 1; alrm->enabled = !!(alarmvals[0] & M41T80_ALMON_AFE); alrm->pending = (flags & M41T80_FLAGS_AF) && alrm->enabled; From cb2520535f6de543038a909746bee607e2138164 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 16 Oct 2018 16:37:16 +0300 Subject: [PATCH 1269/2608] tpm: tpm_try_transmit() refactor error flow. commit 01f54664a4db0d612de0ece8e0022f21f9374e9b upstream. First, rename out_no_locality to out_locality for bailing out on both tpm_cmd_ready() and tpm_request_locality() failure. Second, ignore the return value of go_to_idle() as it may override the return value of the actual tpm operation, the go_to_idle() error will be caught on any consequent command. Last, fix the wrong 'goto out', that jumped back instead of forward. Cc: stable@vger.kernel.org Fixes: 627448e85c76 ("tpm: separate cmd_ready/go_idle from runtime_pm") Signed-off-by: Tomas Winkler Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-interface.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 6e93df272c20..038b91bcbd31 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -479,13 +479,15 @@ static ssize_t tpm_try_transmit(struct tpm_chip *chip, if (need_locality) { rc = tpm_request_locality(chip, flags); - if (rc < 0) - goto out_no_locality; + if (rc < 0) { + need_locality = false; + goto out_locality; + } } rc = tpm_cmd_ready(chip, flags); if (rc) - goto out; + goto out_locality; rc = tpm2_prepare_space(chip, space, ordinal, buf); if (rc) @@ -549,14 +551,13 @@ out_recv: dev_err(&chip->dev, "tpm2_commit_space: error %d\n", rc); out: - rc = tpm_go_idle(chip, flags); - if (rc) - goto out; + /* may fail but do not override previous error value in rc */ + tpm_go_idle(chip, flags); +out_locality: if (need_locality) tpm_relinquish_locality(chip, flags); -out_no_locality: if (chip->ops->clk_enable != NULL) chip->ops->clk_enable(chip, false); From c85a71fe7684ec4c00a71458bfcf148904256721 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Fri, 19 Oct 2018 21:22:47 +0300 Subject: [PATCH 1270/2608] tpm: tpm_i2c_nuvoton: use correct command duration for TPM 2.x commit 2ba5780ce30549cf57929b01d8cba6fe656e31c5 upstream. tpm_i2c_nuvoton calculated commands duration using TPM 1.x values via tpm_calc_ordinal_duration() also for TPM 2.x chips. Call tpm2_calc_ordinal_duration() for retrieving ordinal duration for TPM 2.X chips. Cc: stable@vger.kernel.org Cc: Nayna Jain Signed-off-by: Tomas Winkler Reviewed-by: Nayna Jain Tested-by: Nayna Jain (For TPM 2.0) Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_i2c_nuvoton.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c index caa86b19c76d..f74f451baf6a 100644 --- a/drivers/char/tpm/tpm_i2c_nuvoton.c +++ b/drivers/char/tpm/tpm_i2c_nuvoton.c @@ -369,6 +369,7 @@ static int i2c_nuvoton_send(struct tpm_chip *chip, u8 *buf, size_t len) struct device *dev = chip->dev.parent; struct i2c_client *client = to_i2c_client(dev); u32 ordinal; + unsigned long duration; size_t count = 0; int burst_count, bytes2write, retries, rc = -EIO; @@ -455,10 +456,12 @@ static int i2c_nuvoton_send(struct tpm_chip *chip, u8 *buf, size_t len) return rc; } ordinal = be32_to_cpu(*((__be32 *) (buf + 6))); - rc = i2c_nuvoton_wait_for_data_avail(chip, - tpm_calc_ordinal_duration(chip, - ordinal), - &priv->read_queue); + if (chip->flags & TPM_CHIP_FLAG_TPM2) + duration = tpm2_calc_ordinal_duration(chip, ordinal); + else + duration = tpm_calc_ordinal_duration(chip, ordinal); + + rc = i2c_nuvoton_wait_for_data_avail(chip, duration, &priv->read_queue); if (rc) { dev_err(dev, "%s() timeout command duration\n", __func__); i2c_nuvoton_ready(chip); From 0dacf0458c02fa9fdd19813c86ee66809d5e0db1 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 29 Nov 2018 15:14:49 +0100 Subject: [PATCH 1271/2608] spi: bcm2835: Unbreak the build of esoteric configs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 29bdedfd9cf40e59456110ca417a8cb672ac9b92 upstream. Commit e82b0b382845 ("spi: bcm2835: Fix race on DMA termination") broke the build with COMPILE_TEST=y on arches whose cmpxchg() requires 32-bit operands (xtensa, older arm ISAs). Fix by changing the dma_pending flag's type from bool to unsigned int. Fixes: e82b0b382845 ("spi: bcm2835: Fix race on DMA termination") Signed-off-by: Lukas Wunner Signed-off-by: Mark Brown Cc: Frank Pavlic Cc: Martin Sperl Cc: Noralf Trønnes Cc: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-bcm2835.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index 774161bbcb2e..25abf2d1732a 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -88,7 +88,7 @@ struct bcm2835_spi { u8 *rx_buf; int tx_len; int rx_len; - bool dma_pending; + unsigned int dma_pending; }; static inline u32 bcm2835_rd(struct bcm2835_spi *bs, unsigned reg) From fc9e3f49a2884d84fbe8633d57213d688670da40 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 21 Nov 2018 19:47:57 -0800 Subject: [PATCH 1272/2608] MIPS: Only include mmzone.h when CONFIG_NEED_MULTIPLE_NODES=y commit 66a4059ba72c23ae74a7c702894ff76c4b7eac1f upstream. MIPS' asm/mmzone.h includes the machine/platform mmzone.h unconditionally, but since commit bb53fdf395ee ("MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3") is included by asm/rk4cache.h for all r4k-style configs regardless of CONFIG_NEED_MULTIPLE_NODES. This is problematic when CONFIG_NEED_MULTIPLE_NODES=n because both the loongson3 & ip27 mmzone.h headers unconditionally define the NODE_DATA preprocessor macro which is aready defined by linux/mmzone.h, resulting in the following build error: In file included from ./arch/mips/include/asm/mmzone.h:10, from ./arch/mips/include/asm/r4kcache.h:23, from arch/mips/mm/c-r4k.c:33: ./arch/mips/include/asm/mach-loongson64/mmzone.h:48: error: "NODE_DATA" redefined [-Werror] #define NODE_DATA(n) (&__node_data[(n)]->pglist) In file included from ./include/linux/topology.h:32, from ./include/linux/irq.h:19, from ./include/asm-generic/hardirq.h:13, from ./arch/mips/include/asm/hardirq.h:16, from ./include/linux/hardirq.h:9, from arch/mips/mm/c-r4k.c:11: ./include/linux/mmzone.h:907: note: this is the location of the previous definition #define NODE_DATA(nid) (&contig_page_data) Resolve this by only including the machine mmzone.h when CONFIG_NEED_MULTIPLE_NODES=y, which also removes the need for the empty mach-generic version of the header which we delete. Signed-off-by: Paul Burton Fixes: bb53fdf395ee ("MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3") Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/mach-generic/mmzone.h | 2 -- arch/mips/include/asm/mmzone.h | 5 ++++- 2 files changed, 4 insertions(+), 3 deletions(-) delete mode 100644 arch/mips/include/asm/mach-generic/mmzone.h diff --git a/arch/mips/include/asm/mach-generic/mmzone.h b/arch/mips/include/asm/mach-generic/mmzone.h deleted file mode 100644 index 826392f2ee32..000000000000 --- a/arch/mips/include/asm/mach-generic/mmzone.h +++ /dev/null @@ -1,2 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Intentionally empty */ diff --git a/arch/mips/include/asm/mmzone.h b/arch/mips/include/asm/mmzone.h index 2a0fe1d0eac6..b826b8473e95 100644 --- a/arch/mips/include/asm/mmzone.h +++ b/arch/mips/include/asm/mmzone.h @@ -7,7 +7,10 @@ #define _ASM_MMZONE_H_ #include -#include + +#ifdef CONFIG_NEED_MULTIPLE_NODES +# include +#endif #ifndef pa_to_nid #define pa_to_nid(addr) 0 From 24737fa6bcf1d7ffb71ceb78d7a7c275cb7e1d13 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 9 Jan 2019 17:14:53 +0100 Subject: [PATCH 1273/2608] Linux 4.14.92 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a6fb3b158a19..be4d1f25cb29 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 91 +SUBLEVEL = 92 EXTRAVERSION = NAME = Petit Gorille From fd78ae770d6f5caa6a97af7757fdfa92be066a6e Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 13 Nov 2018 11:55:36 +0100 Subject: [PATCH 1274/2608] pinctrl: meson: fix pull enable register calculation [ Upstream commit 614b1868a125a0ba24be08f3a7fa832ddcde6bca ] We just changed the code so we apply bias disable on the correct register but forgot to align the register calculation. The result is that we apply the change on the correct register, but possibly at the incorrect offset/bit This went undetected because offsets tends to be the same between REG_PULL and REG_PULLEN for a given pin the EE controller. This is not true for the AO controller. Fixes: e39f9dd8206a ("pinctrl: meson: fix pinconf bias disable") Signed-off-by: Jerome Brunet Acked-by: Neil Armstrong Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/meson/pinctrl-meson.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index 6c43322dbb97..2998941fdeca 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -272,7 +272,8 @@ static int meson_pinconf_set(struct pinctrl_dev *pcdev, unsigned int pin, case PIN_CONFIG_BIAS_DISABLE: dev_dbg(pc->dev, "pin %u: disable bias\n", pin); - meson_calc_reg_and_bit(bank, pin, REG_PULL, ®, &bit); + meson_calc_reg_and_bit(bank, pin, REG_PULLEN, ®, + &bit); ret = regmap_update_bits(pc->reg_pullen, reg, BIT(bit), 0); if (ret) From 3f9db10dfc625b294b636d59eb87754940cce30e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 27 Nov 2018 09:01:54 +1100 Subject: [PATCH 1275/2608] powerpc: Fix COFF zImage booting on old powermacs [ Upstream commit 5564597d51c8ff5b88d95c76255e18b13b760879 ] Commit 6975a783d7b4 ("powerpc/boot: Allow building the zImage wrapper as a relocatable ET_DYN", 2011-04-12) changed the procedure descriptor at the start of crt0.S to have a hard-coded start address of 0x500000 rather than a reference to _zimage_start, presumably because having a reference to a symbol introduced a relocation which is awkward to handle in a position-independent executable. Unfortunately, what is at 0x500000 in the COFF image is not the first instruction, but the procedure descriptor itself, that is, a word containing 0x500000, which is not a valid instruction. Hence, booting a COFF zImage results in a "DEFAULT CATCH!, code=FFF00700" message from Open Firmware. This fixes the problem by (a) putting the procedure descriptor in the data section and (b) adding a branch to _zimage_start as the first instruction in the program. Fixes: 6975a783d7b4 ("powerpc/boot: Allow building the zImage wrapper as a relocatable ET_DYN") Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/boot/crt0.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index 32dfe6d083f3..9b9d17437373 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -15,7 +15,7 @@ RELA = 7 RELACOUNT = 0x6ffffff9 - .text + .data /* A procedure descriptor used when booting this as a COFF file. * When making COFF, this comes first in the link and we're * linked at 0x500000. @@ -23,6 +23,8 @@ RELACOUNT = 0x6ffffff9 .globl _zimage_start_opd _zimage_start_opd: .long 0x500000, 0, 0, 0 + .text + b _zimage_start #ifdef __powerpc64__ .balign 8 From 5d661328f91c3c429a96dd62b628254057f200e8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 26 Nov 2018 12:59:16 +1100 Subject: [PATCH 1276/2608] powerpc/mm: Fix linux page tables build with some configs [ Upstream commit 462951cd32e1496dc64b00051dfb777efc8ae5d8 ] For some configs the build fails with: arch/powerpc/mm/dump_linuxpagetables.c: In function 'populate_markers': arch/powerpc/mm/dump_linuxpagetables.c:306:39: error: 'PKMAP_BASE' undeclared (first use in this function) arch/powerpc/mm/dump_linuxpagetables.c:314:50: error: 'LAST_PKMAP' undeclared (first use in this function) These come from highmem.h, including that fixes the build. Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/mm/dump_linuxpagetables.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index 31c1c61afaa4..0bbaf7344872 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include From af00b69210ca7e9c97a6b3a77841c4f32139125d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 26 Nov 2018 11:52:18 +0100 Subject: [PATCH 1277/2608] HID: ite: Add USB id match for another ITE based keyboard rfkill key quirk [ Upstream commit 4050207485e47e00353e87f2fe2166083e282688 ] The 258a:6a88 keyboard-dock shipped with the Prowise PT301 tablet is likely another ITE based design. The controller die is directly bonded to the PCB with a blob of black glue on top so there are no markings and the 258a vendor-id used is unknown anywhere. But the keyboard has the exact same hotkeys mapped to Fn+F1 - F10 as the other ITE8595 keyboard I have *and* it has the same quirky behavior wrt the rfkill hotkey. Either way as said this keyboard has the same quirk for its rfkill / airplane mode hotkey as the ITE 8595 chip, it only sends a single release event when pressed and released, it never sends a press event. This commit adds the 258a:6a88 USB id to the hid-ite id-table, fixing the rfkill key not working on this keyboard. Signed-off-by: Hans de Goede Signed-off-by: Benjamin Tissoires Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/hid-ite.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index fcc688df694c..28ae3dc57103 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -17,6 +17,9 @@ #ifndef HID_IDS_H_FILE #define HID_IDS_H_FILE +#define USB_VENDOR_ID_258A 0x258a +#define USB_DEVICE_ID_258A_6A88 0x6a88 + #define USB_VENDOR_ID_3M 0x0596 #define USB_DEVICE_ID_3M1968 0x0500 #define USB_DEVICE_ID_3M2256 0x0502 diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index 1882a4ab0f29..98b059d79bc8 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -42,6 +42,7 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field, static const struct hid_device_id ite_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) }, + { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) }, { } }; MODULE_DEVICE_TABLE(hid, ite_devices); From 8cd34030db2ab78d572e3276d7f4e508c392b6e3 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 4 Dec 2018 03:17:45 +0000 Subject: [PATCH 1278/2608] ARM: imx: update the cpu power up timing setting on i.mx6sx [ Upstream commit 1e434b703248580b7aaaf8a115d93e682f57d29f ] The sw2iso count should cover ARM LDO ramp-up time, the MAX ARM LDO ramp-up time may be up to more than 100us on some boards, this patch sets sw2iso to 0xf (~384us) which is the reset value, and it is much more safe to cover different boards, since we have observed that some customer boards failed with current setting of 0x2. Fixes: 05136f0897b5 ("ARM: imx: support arm power off in cpuidle for i.mx6sx") Signed-off-by: Anson Huang Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/mach-imx/cpuidle-imx6sx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/cpuidle-imx6sx.c b/arch/arm/mach-imx/cpuidle-imx6sx.c index c5a5c3a70ab1..edb888ac5ad3 100644 --- a/arch/arm/mach-imx/cpuidle-imx6sx.c +++ b/arch/arm/mach-imx/cpuidle-imx6sx.c @@ -108,7 +108,7 @@ int __init imx6sx_cpuidle_init(void) * except for power up sw2iso which need to be * larger than LDO ramp up time. */ - imx_gpc_set_arm_power_up_timing(2, 1); + imx_gpc_set_arm_power_up_timing(0xf, 1); imx_gpc_set_arm_power_down_timing(1, 1); return cpuidle_register(&imx6sx_cpuidle_driver, NULL); From 9c3fe68759f0743897bc9c8ab0ba3c5e7d9f0925 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 5 Dec 2018 09:05:30 -0200 Subject: [PATCH 1279/2608] ARM: dts: imx7d-nitrogen7: Fix the description of the Wifi clock [ Upstream commit f15096f12a4e9340168df5fdd9201aa8ed60d59e ] According to bindings/regulator/fixed-regulator.txt the 'clocks' and 'clock-names' properties are not valid ones. In order to turn on the Wifi clock the correct location for describing the CLKO2 clock is via a mmc-pwrseq handle, so do it accordingly. Fixes: 56354959cfec ("ARM: dts: imx: add Boundary Devices Nitrogen7 board") Signed-off-by: Fabio Estevam Acked-by: Troy Kisky Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx7d-nitrogen7.dts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx7d-nitrogen7.dts b/arch/arm/boot/dts/imx7d-nitrogen7.dts index e7998308861f..f8caea16bc2d 100644 --- a/arch/arm/boot/dts/imx7d-nitrogen7.dts +++ b/arch/arm/boot/dts/imx7d-nitrogen7.dts @@ -117,13 +117,17 @@ compatible = "regulator-fixed"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; - clocks = <&clks IMX7D_CLKO2_ROOT_DIV>; - clock-names = "slow"; regulator-name = "reg_wlan"; startup-delay-us = <70000>; gpio = <&gpio4 21 GPIO_ACTIVE_HIGH>; enable-active-high; }; + + usdhc2_pwrseq: usdhc2_pwrseq { + compatible = "mmc-pwrseq-simple"; + clocks = <&clks IMX7D_CLKO2_ROOT_DIV>; + clock-names = "ext_clock"; + }; }; &adc1 { @@ -430,6 +434,7 @@ bus-width = <4>; non-removable; vmmc-supply = <®_wlan>; + mmc-pwrseq = <&usdhc2_pwrseq>; cap-power-off-card; keep-power-in-suspend; status = "okay"; From 9940eeaa995557b3ece7d45b567701550863013f Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Thu, 6 Dec 2018 09:03:36 +1000 Subject: [PATCH 1280/2608] Input: restore EV_ABS ABS_RESERVED [ Upstream commit c201e3808e0e4be9b98d192802085a9f491bd80c ] ABS_RESERVED was added in d9ca1c990a7 and accidentally removed as part of ffe0e7cf290f5c9 when the high-resolution scrolling code was removed. Signed-off-by: Peter Hutterer Reviewed-by: Martin Kepplinger Acked-by: Benjamin Tissoires Acked-by: Dmitry Torokhov Signed-off-by: Benjamin Tissoires Signed-off-by: Sasha Levin --- include/uapi/linux/input-event-codes.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index f4058bd4c373..61769d4b7dba 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -742,6 +742,15 @@ #define ABS_MISC 0x28 +/* + * 0x2e is reserved and should not be used in input drivers. + * It was used by HID as ABS_MISC+6 and userspace needs to detect if + * the next ABS_* event is correct or is just ABS_MISC + n. + * We define here ABS_RESERVED so userspace can rely on it and detect + * the situation described above. + */ +#define ABS_RESERVED 0x2e + #define ABS_MT_SLOT 0x2f /* MT slot being modified */ #define ABS_MT_TOUCH_MAJOR 0x30 /* Major axis of touching ellipse */ #define ABS_MT_TOUCH_MINOR 0x31 /* Minor axis (omit if circular) */ From 7932e14f9fec2dca602b6e213445eb9bc5c6c4c6 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Dec 2018 14:17:20 -0800 Subject: [PATCH 1281/2608] checkstack.pl: fix for aarch64 [ Upstream commit f1733a1d3cd32a9492f4cf866be37bb46e10163d ] There is actually a space after "sp," like this, ffff2000080813c8: a9bb7bfd stp x29, x30, [sp, #-80]! Right now, checkstack.pl isn't able to print anything on aarch64, because it won't be able to match the stating objdump line of a function due to this missing space. Hence, it displays every stack as zero-size. After this patch, checkpatch.pl is able to match the start of a function's objdump, and is then able to calculate each function's stack correctly. Link: http://lkml.kernel.org/r/20181207195843.38528-1-cai@lca.pw Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- scripts/checkstack.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl index cb993801e4b2..16dc157f9662 100755 --- a/scripts/checkstack.pl +++ b/scripts/checkstack.pl @@ -46,8 +46,8 @@ my (@stack, $re, $dre, $x, $xs, $funcre); $xs = "[0-9a-f ]"; # hex character or space $funcre = qr/^$x* <(.*)>:$/; if ($arch eq 'aarch64') { - #ffffffc0006325cc: a9bb7bfd stp x29, x30, [sp,#-80]! - $re = qr/^.*stp.*sp,\#-([0-9]{1,8})\]\!/o; + #ffffffc0006325cc: a9bb7bfd stp x29, x30, [sp, #-80]! + $re = qr/^.*stp.*sp, \#-([0-9]{1,8})\]\!/o; } elsif ($arch eq 'arm') { #c0008ffc: e24dd064 sub sp, sp, #100 ; 0x64 $re = qr/.*sub.*sp, sp, #(([0-9]{2}|[3-9])[0-9]{2})/o; From bf2b044bc820cf1d6fa74ff77b09c33886a01568 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 27 Oct 2018 06:12:06 +0000 Subject: [PATCH 1282/2608] xfrm: Fix error return code in xfrm_output_one() [ Upstream commit 533555e5cbb6aa2d77598917871ae5b579fe724b ] xfrm_output_one() does not return a error code when there is no dst_entry attached to the skb, it is still possible crash with a NULL pointer dereference in xfrm_output_resume(). Fix it by return error code -EHOSTUNREACH. Fixes: 9e1437937807 ("xfrm: Fix NULL pointer dereference when skb_dst_force clears the dst_entry.") Signed-off-by: Wei Yongjun Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index c47660fba498..b226b230e8bf 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -103,6 +103,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) skb_dst_force(skb); if (!skb_dst(skb)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); + err = -EHOSTUNREACH; goto error_nolock; } From ed04bb260d161cab36118537f888329712ad4abe Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 5 Nov 2018 17:00:53 +0900 Subject: [PATCH 1283/2608] xfrm: Fix bucket count reported to userspace [ Upstream commit ca92e173ab34a4f7fc4128bd372bd96f1af6f507 ] sadhcnt is reported by `ip -s xfrm state count` as "buckets count", not the hash mask. Fixes: 28d8909bc790 ("[XFRM]: Export SAD info.") Signed-off-by: Benjamin Poirier Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 6c4ec69e11a0..0cd2bdf3b217 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -789,7 +789,7 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) { spin_lock_bh(&net->xfrm.xfrm_state_lock); si->sadcnt = net->xfrm.state_num; - si->sadhcnt = net->xfrm.state_hmask; + si->sadhcnt = net->xfrm.state_hmask + 1; si->sadhmcnt = xfrm_state_hashmax; spin_unlock_bh(&net->xfrm.xfrm_state_lock); } From 4ddb80f4193cf344550458087977a500a2c3cd4c Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 22 Nov 2018 07:26:24 +0100 Subject: [PATCH 1284/2608] xfrm: Fix NULL pointer dereference in xfrm_input when skb_dst_force clears the dst_entry. [ Upstream commit 0152eee6fc3b84298bb6a79961961734e8afa5b8 ] Since commit 222d7dbd258d ("net: prevent dst uses after free") skb_dst_force() might clear the dst_entry attached to the skb. The xfrm code doesn't expect this to happen, so we crash with a NULL pointer dereference in this case. Fix it by checking skb_dst(skb) for NULL after skb_dst_force() and drop the packet in case the dst_entry was cleared. We also move the skb_dst_force() to a codepath that is not used when the transformation was offloaded, because in this case we don't have a dst_entry attached to the skb. The output and forwarding path was already fixed by commit 9e1437937807 ("xfrm: Fix NULL pointer dereference when skb_dst_force clears the dst_entry.") Fixes: 222d7dbd258d ("net: prevent dst uses after free") Reported-by: Jean-Philippe Menil Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_input.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 8e75319dd9c0..06dec32503bd 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -341,6 +341,12 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp->xvec[skb->sp->len++] = x; + skb_dst_force(skb); + if (!skb_dst(skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); + goto drop; + } + lock: spin_lock(&x->lock); @@ -380,7 +386,6 @@ lock: XFRM_SKB_CB(skb)->seq.input.low = seq; XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; - skb_dst_force(skb); dev_hold(skb->dev); if (crypto_done) From 5ac1f046ab4a6f629a524b9bd4f15f72bf6a4798 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 5 Dec 2018 14:12:19 +0100 Subject: [PATCH 1285/2608] netfilter: seqadj: re-load tcp header pointer after possible head reallocation [ Upstream commit 530aad77010b81526586dfc09130ec875cd084e4 ] When adjusting sack block sequence numbers, skb_make_writable() gets called to make sure tcp options are all in the linear area, and buffer is not shared. This can cause tcp header pointer to get reallocated, so we must reaload it to avoid memory corruption. This bug pre-dates git history. Reported-by: Neel Mehta Reported-by: Shane Huntley Reported-by: Heather Adkins Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_seqadj.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index a975efd6b8c3..9da303461069 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -115,12 +115,12 @@ static void nf_ct_sack_block_adjust(struct sk_buff *skb, /* TCP SACK sequence number adjustment */ static unsigned int nf_ct_sack_adjust(struct sk_buff *skb, unsigned int protoff, - struct tcphdr *tcph, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - unsigned int dir, optoff, optend; + struct tcphdr *tcph = (void *)skb->data + protoff; struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); + unsigned int dir, optoff, optend; optoff = protoff + sizeof(struct tcphdr); optend = protoff + tcph->doff * 4; @@ -128,6 +128,7 @@ static unsigned int nf_ct_sack_adjust(struct sk_buff *skb, if (!skb_make_writable(skb, optend)) return 0; + tcph = (void *)skb->data + protoff; dir = CTINFO2DIR(ctinfo); while (optoff < optend) { @@ -207,7 +208,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb, ntohl(newack)); tcph->ack_seq = newack; - res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo); + res = nf_ct_sack_adjust(skb, protoff, ct, ctinfo); out: spin_unlock_bh(&ct->lock); From bbed2ced931009ba45ff32e8abdef8523548fd9b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 1 Nov 2018 08:25:30 +0300 Subject: [PATCH 1286/2608] scsi: bnx2fc: Fix NULL dereference in error handling [ Upstream commit 9ae4f8420ed7be4b13c96600e3568c144d101a23 ] If "interface" is NULL then we can't release it and trying to will only lead to an Oops. Fixes: aea71a024914 ("[SCSI] bnx2fc: Introduce interface structure for each vlan interface") Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 6844ba361616..89f09b122135 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -2372,7 +2372,7 @@ static int _bnx2fc_create(struct net_device *netdev, if (!interface) { printk(KERN_ERR PFX "bnx2fc_interface_create failed\n"); rc = -ENOMEM; - goto ifput_err; + goto netdev_err; } if (is_vlan_dev(netdev)) { From 6989cedd762f2e1b9062f2c20f19528f9116badc Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 4 Dec 2018 13:52:49 -0800 Subject: [PATCH 1287/2608] Input: omap-keypad - fix idle configuration to not block SoC idle states [ Upstream commit e2ca26ec4f01486661b55b03597c13e2b9c18b73 ] With PM enabled, I noticed that pressing a key on the droid4 keyboard will block deeper idle states for the SoC. Let's fix this by using IRQF_ONESHOT and stop constantly toggling the device OMAP4_KBD_IRQENABLE register as suggested by Dmitry Torokhov . From the hardware point of view, looks like we need to manage the registers for OMAP4_KBD_IRQENABLE and OMAP4_KBD_WAKEUPENABLE together to avoid blocking deeper SoC idle states. And with toggling of OMAP4_KBD_IRQENABLE register now gone with IRQF_ONESHOT, also the SoC idle state problem is gone during runtime. We still also need to clear OMAP4_KBD_WAKEUPENABLE in omap4_keypad_close() though to pair it with omap4_keypad_open() to prevent blocking deeper SoC idle states after rmmod omap4-keypad. Reported-by: Pavel Machek Signed-off-by: Tony Lindgren Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/keyboard/omap4-keypad.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c index ce8e2baf31bb..616fdd94b069 100644 --- a/drivers/input/keyboard/omap4-keypad.c +++ b/drivers/input/keyboard/omap4-keypad.c @@ -126,12 +126,8 @@ static irqreturn_t omap4_keypad_irq_handler(int irq, void *dev_id) { struct omap4_keypad *keypad_data = dev_id; - if (kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS)) { - /* Disable interrupts */ - kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQENABLE, - OMAP4_VAL_IRQDISABLE); + if (kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS)) return IRQ_WAKE_THREAD; - } return IRQ_NONE; } @@ -173,11 +169,6 @@ static irqreturn_t omap4_keypad_irq_thread_fn(int irq, void *dev_id) kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS, kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS)); - /* enable interrupts */ - kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQENABLE, - OMAP4_DEF_IRQENABLE_EVENTEN | - OMAP4_DEF_IRQENABLE_LONGKEY); - return IRQ_HANDLED; } @@ -214,9 +205,10 @@ static void omap4_keypad_close(struct input_dev *input) disable_irq(keypad_data->irq); - /* Disable interrupts */ + /* Disable interrupts and wake-up events */ kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQENABLE, OMAP4_VAL_IRQDISABLE); + kbd_writel(keypad_data, OMAP4_KBD_WAKEUPENABLE, 0); /* clear pending interrupts */ kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS, @@ -364,7 +356,7 @@ static int omap4_keypad_probe(struct platform_device *pdev) } error = request_threaded_irq(keypad_data->irq, omap4_keypad_irq_handler, - omap4_keypad_irq_thread_fn, 0, + omap4_keypad_irq_thread_fn, IRQF_ONESHOT, "omap4-keypad", keypad_data); if (error) { dev_err(&pdev->dev, "failed to register interrupt\n"); From 154ed1efeac4be5f8dc6469e08bacf5a5e45e245 Mon Sep 17 00:00:00 2001 From: Yussuf Khalil Date: Sat, 8 Dec 2018 20:13:35 -0800 Subject: [PATCH 1288/2608] Input: synaptics - enable RMI on ThinkPad T560 [ Upstream commit ca5047286c9c93a01e1f471d00a6019536992954 ] Before commit 7fd6d98b89f3 ("i2c: i801: Allow ACPI AML access I/O ports not reserved for SMBus"), enabling RMI on the T560 would cause the touchpad to stop working after resuming from suspend. Now that this issue is fixed, RMI can be enabled safely and works fine. Reviewed-by: Benjamin Tissoires Signed-off-by: Yussuf Khalil Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 54f0d037b5b6..515a0b0d48bf 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -171,6 +171,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0046", /* X250 */ "LEN004a", /* W541 */ "LEN005b", /* P50 */ + "LEN005e", /* T560 */ "LEN0071", /* T480 */ "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ "LEN0073", /* X1 Carbon G5 (Elantech) */ From ae6f4c3cbad175800c318c4fde54a3699fcdb431 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 10 Dec 2018 15:22:23 -0600 Subject: [PATCH 1289/2608] ibmvnic: Fix non-atomic memory allocation in IRQ context [ Upstream commit 1d1bbc37f89b0559c9e913682f2489d89cfde6b8 ] ibmvnic_reset allocated new reset work item objects in a non-atomic context. This can be called from a tasklet, generating the output below. Allocate work items with the GFP_ATOMIC flag instead. BUG: sleeping function called from invalid context at mm/slab.h:421 in_atomic(): 1, irqs_disabled(): 1, pid: 93, name: kworker/0:2 INFO: lockdep is turned off. irq event stamp: 66049 hardirqs last enabled at (66048): [] tasklet_action_common.isra.12+0x78/0x1c0 hardirqs last disabled at (66049): [] _raw_spin_lock_irqsave+0x48/0xf0 softirqs last enabled at (66044): [] dev_deactivate_queue.constprop.28+0xc8/0x160 softirqs last disabled at (66045): [] call_do_softirq+0x14/0x24 CPU: 0 PID: 93 Comm: kworker/0:2 Kdump: loaded Not tainted 4.20.0-rc6-00001-g1b50a8f03706 #7 Workqueue: events linkwatch_event Call Trace: [c0000003fffe7ae0] [c000000000bc83e4] dump_stack+0xe8/0x164 (unreliable) [c0000003fffe7b30] [c00000000015ba0c] ___might_sleep+0x2dc/0x320 [c0000003fffe7bb0] [c000000000391514] kmem_cache_alloc_trace+0x3e4/0x440 [c0000003fffe7c30] [d000000005b2309c] ibmvnic_reset+0x16c/0x360 [ibmvnic] [c0000003fffe7cc0] [d000000005b29834] ibmvnic_tasklet+0x1054/0x2010 [ibmvnic] [c0000003fffe7e00] [c0000000001224c8] tasklet_action_common.isra.12+0xd8/0x1c0 [c0000003fffe7e60] [c000000000bf1238] __do_softirq+0x1a8/0x64c [c0000003fffe7f90] [c0000000000306e0] call_do_softirq+0x14/0x24 [c0000003f3967980] [c00000000001ba50] do_softirq_own_stack+0x60/0xb0 [c0000003f39679c0] [c0000000001218a8] do_softirq+0xa8/0x100 [c0000003f39679f0] [c000000000121a74] __local_bh_enable_ip+0x174/0x180 [c0000003f3967a60] [c000000000bf003c] _raw_spin_unlock_bh+0x5c/0x80 [c0000003f3967a90] [c000000000a8ac78] dev_deactivate_queue.constprop.28+0xc8/0x160 [c0000003f3967ad0] [c000000000a8c8b0] dev_deactivate_many+0xd0/0x520 [c0000003f3967b70] [c000000000a8cd40] dev_deactivate+0x40/0x60 [c0000003f3967ba0] [c000000000a5e0c4] linkwatch_do_dev+0x74/0xd0 [c0000003f3967bd0] [c000000000a5e694] __linkwatch_run_queue+0x1a4/0x1f0 [c0000003f3967c30] [c000000000a5e728] linkwatch_event+0x48/0x60 [c0000003f3967c50] [c0000000001444e8] process_one_work+0x238/0x710 [c0000003f3967d20] [c000000000144a48] worker_thread+0x88/0x4e0 [c0000003f3967db0] [c00000000014e3a8] kthread+0x178/0x1c0 [c0000003f3967e20] [c00000000000bfd0] ret_from_kernel_thread+0x5c/0x6c Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 14c53ed5cca6..c914b338691b 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1596,7 +1596,7 @@ static void ibmvnic_reset(struct ibmvnic_adapter *adapter, } } - rwi = kzalloc(sizeof(*rwi), GFP_KERNEL); + rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC); if (!rwi) { mutex_unlock(&adapter->rwi_lock); ibmvnic_close(netdev); From a8fe14fcfe50e8170eeae31684eeeb42aa15e921 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 11 Dec 2018 11:13:39 +0800 Subject: [PATCH 1290/2608] ieee802154: ca8210: fix possible u8 overflow in ca8210_rx_done [ Upstream commit 8e41cae64b08fe2e86a9ffb88b295c6b4b3a3322 ] gcc warning this: drivers/net/ieee802154/ca8210.c:730:10: warning: comparison is always false due to limited range of data type [-Wtype-limits] 'len' is u8 type, we get it from buf[1] adding 2, which can overflow. This patch change the type of 'len' to unsigned int to avoid this,also fix the gcc warning. Fixes: ded845a781a5 ("ieee802154: Add CA8210 IEEE 802.15.4 device driver") Signed-off-by: YueHaibing Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin --- drivers/net/ieee802154/ca8210.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 22e466ea919a..dcd10dba08c7 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -722,7 +722,7 @@ static void ca8210_mlme_reset_worker(struct work_struct *work) static void ca8210_rx_done(struct cas_control *cas_ctl) { u8 *buf; - u8 len; + unsigned int len; struct work_priv_container *mlme_reset_wpc; struct ca8210_priv *priv = cas_ctl->priv; @@ -731,7 +731,7 @@ static void ca8210_rx_done(struct cas_control *cas_ctl) if (len > CA8210_SPI_BUF_SIZE) { dev_crit( &priv->spi->dev, - "Received packet len (%d) erroneously long\n", + "Received packet len (%u) erroneously long\n", len ); goto finish; From 0f0b37af1d3c11cf4d17c4b74e2ba3bb23b277fb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 30 Nov 2018 23:23:27 +0300 Subject: [PATCH 1291/2608] x86/mm: Fix guard hole handling [ Upstream commit 16877a5570e0c5f4270d5b17f9bab427bcae9514 ] There is a guard hole at the beginning of the kernel address space, also used by hypervisors. It occupies 16 PGD entries. This reserved range is not defined explicitely, it is calculated relative to other entities: direct mapping and user space ranges. The calculation got broken by recent changes of the kernel memory layout: LDT remap range is now mapped before direct mapping and makes the calculation invalid. The breakage leads to crash on Xen dom0 boot[1]. Define the reserved range explicitely. It's part of kernel ABI (hypervisors expect it to be stable) and must not depend on changes in the rest of kernel memory layout. [1] https://lists.xenproject.org/archives/html/xen-devel/2018-11/msg03313.html Fixes: d52888aa2753 ("x86/mm: Move LDT remap out of KASLR region on 5-level paging") Reported-by: Hans van Kranenburg Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Tested-by: Hans van Kranenburg Reviewed-by: Juergen Gross Cc: bp@alien8.de Cc: hpa@zytor.com Cc: dave.hansen@linux.intel.com Cc: luto@kernel.org Cc: peterz@infradead.org Cc: boris.ostrovsky@oracle.com Cc: bhe@redhat.com Cc: linux-mm@kvack.org Cc: xen-devel@lists.xenproject.org Link: https://lkml.kernel.org/r/20181130202328.65359-2-kirill.shutemov@linux.intel.com Signed-off-by: Sasha Levin --- arch/x86/include/asm/pgtable_64_types.h | 5 +++++ arch/x86/mm/dump_pagetables.c | 8 ++++---- arch/x86/xen/mmu_pv.c | 11 ++++++----- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 7764617b8f9c..bf6d2692fc60 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -94,6 +94,11 @@ typedef struct { pteval_t pte; } pte_t; # define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) #endif +#define GUARD_HOLE_PGD_ENTRY -256UL +#define GUARD_HOLE_SIZE (16UL << PGDIR_SHIFT) +#define GUARD_HOLE_BASE_ADDR (GUARD_HOLE_PGD_ENTRY << PGDIR_SHIFT) +#define GUARD_HOLE_END_ADDR (GUARD_HOLE_BASE_ADDR + GUARD_HOLE_SIZE) + #define LDT_PGD_ENTRY -240UL #define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 2a4849e92831..cf403e057f3f 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -465,11 +465,11 @@ static inline bool is_hypervisor_range(int idx) { #ifdef CONFIG_X86_64 /* - * ffff800000000000 - ffff87ffffffffff is reserved for - * the hypervisor. + * A hole in the beginning of kernel address space reserved + * for a hypervisor. */ - return (idx >= pgd_index(__PAGE_OFFSET) - 16) && - (idx < pgd_index(__PAGE_OFFSET)); + return (idx >= pgd_index(GUARD_HOLE_BASE_ADDR)) && + (idx < pgd_index(GUARD_HOLE_END_ADDR)); #else return false; #endif diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index b33fa127a613..7631e6130d44 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -614,19 +614,20 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, unsigned long limit) { int i, nr, flush = 0; - unsigned hole_low, hole_high; + unsigned hole_low = 0, hole_high = 0; /* The limit is the last byte to be touched */ limit--; BUG_ON(limit >= FIXADDR_TOP); +#ifdef CONFIG_X86_64 /* * 64-bit has a great big hole in the middle of the address - * space, which contains the Xen mappings. On 32-bit these - * will end up making a zero-sized hole and so is a no-op. + * space, which contains the Xen mappings. */ - hole_low = pgd_index(USER_LIMIT); - hole_high = pgd_index(PAGE_OFFSET); + hole_low = pgd_index(GUARD_HOLE_BASE_ADDR); + hole_high = pgd_index(GUARD_HOLE_END_ADDR); +#endif nr = pgd_index(limit) + 1; for (i = 0; i < nr; i++) { From 10294811172cc7f4ff1d67ef842064f5a4cf5fce Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 30 Nov 2018 23:23:28 +0300 Subject: [PATCH 1292/2608] x86/dump_pagetables: Fix LDT remap address marker [ Upstream commit 254eb5505ca0ca749d3a491fc6668b6c16647a99 ] The LDT remap placement has been changed. It's now placed before the direct mapping in the kernel virtual address space for both paging modes. Change address markers order accordingly. Fixes: d52888aa2753 ("x86/mm: Move LDT remap out of KASLR region on 5-level paging") Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Cc: bp@alien8.de Cc: hpa@zytor.com Cc: dave.hansen@linux.intel.com Cc: luto@kernel.org Cc: peterz@infradead.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: bhe@redhat.com Cc: hans.van.kranenburg@mendix.com Cc: linux-mm@kvack.org Cc: xen-devel@lists.xenproject.org Link: https://lkml.kernel.org/r/20181130202328.65359-3-kirill.shutemov@linux.intel.com Signed-off-by: Sasha Levin --- arch/x86/mm/dump_pagetables.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index cf403e057f3f..6bca45d06676 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -51,10 +51,10 @@ struct addr_marker { enum address_markers_idx { USER_SPACE_NR = 0, KERNEL_SPACE_NR, - LOW_KERNEL_NR, -#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL) +#ifdef CONFIG_MODIFY_LDT_SYSCALL LDT_NR, #endif + LOW_KERNEL_NR, VMALLOC_START_NR, VMEMMAP_START_NR, #ifdef CONFIG_KASAN @@ -62,9 +62,6 @@ enum address_markers_idx { KASAN_SHADOW_END_NR, #endif CPU_ENTRY_AREA_NR, -#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL) - LDT_NR, -#endif #ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, #endif From 214b5617cc701471374f7ccfe748399f952646ec Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Tue, 4 Dec 2018 15:18:52 +0100 Subject: [PATCH 1293/2608] i40e: fix mac filter delete when setting mac address [ Upstream commit 158daed16efb1170694e420ae06ba8ba954d82e5 ] A previous commit moved the ether_addr_copy() in i40e_set_mac() before the mac filter del/add to avoid a race. However it wasn't taken into account that this alters the mac address being handed to i40e_del_mac_filter(). Also changed i40e_add_mac_filter() to operate on netdev->dev_addr, hopefully that makes the code easier to read. Fixes: 458867b2ca0c ("i40e: don't remove netdev->dev_addr when syncing uc list") Signed-off-by: Stefan Assmann Tested-by: Andrew Bowers Acked-by: Jacob Keller Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 176c99b8251d..904b42becd45 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1554,17 +1554,17 @@ static int i40e_set_mac(struct net_device *netdev, void *p) netdev_info(netdev, "set new mac address %pM\n", addr->sa_data); /* Copy the address first, so that we avoid a possible race with - * .set_rx_mode(). If we copy after changing the address in the filter - * list, we might open ourselves to a narrow race window where - * .set_rx_mode could delete our dev_addr filter and prevent traffic - * from passing. + * .set_rx_mode(). + * - Remove old address from MAC filter + * - Copy new address + * - Add new address to MAC filter */ - ether_addr_copy(netdev->dev_addr, addr->sa_data); - spin_lock_bh(&vsi->mac_filter_hash_lock); i40e_del_mac_filter(vsi, netdev->dev_addr); - i40e_add_mac_filter(vsi, addr->sa_data); + ether_addr_copy(netdev->dev_addr, addr->sa_data); + i40e_add_mac_filter(vsi, netdev->dev_addr); spin_unlock_bh(&vsi->mac_filter_hash_lock); + if (vsi->type == I40E_VSI_MAIN) { i40e_status ret; From e8267a5ec404913cd2fcfc51ff5e5b3671ed4916 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Mon, 10 Dec 2018 14:39:37 +0100 Subject: [PATCH 1294/2608] netfilter: ipset: do not call ipset_nest_end after nla_nest_cancel [ Upstream commit 708abf74dd87f8640871b814faa195fb5970b0e3 ] In the error handling block, nla_nest_cancel(skb, atd) is called to cancel the nest operation. But then, ipset_nest_end(skb, atd) is unexpected called to end the nest operation. This patch calls the ipset_nest_end only on the branch that nla_nest_cancel is not called. Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel") Signed-off-by: Pan Bian Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_list_set.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 75d52aed6fdb..e563921e6af5 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -542,8 +542,8 @@ nla_put_failure: ret = -EMSGSIZE; } else { cb->args[IPSET_CB_ARG0] = i; + ipset_nest_end(skb, atd); } - ipset_nest_end(skb, atd); out: rcu_read_unlock(); return ret; From b0e56e3c569932e937fa2fa0dc5799c8f023d07e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 11 Dec 2018 07:45:29 +0100 Subject: [PATCH 1295/2608] netfilter: nat: can't use dst_hold on noref dst [ Upstream commit 542fbda0f08f1cbbc250f9e59f7537649651d0c8 ] The dst entry might already have a zero refcount, waiting on rcu list to be free'd. Using dst_hold() transitions its reference count to 1, and next dst release will try to free it again -- resulting in a double free: WARNING: CPU: 1 PID: 0 at include/net/dst.h:239 nf_xfrm_me_harder+0xe7/0x130 [nf_nat] RIP: 0010:nf_xfrm_me_harder+0xe7/0x130 [nf_nat] Code: 48 8b 5c 24 60 65 48 33 1c 25 28 00 00 00 75 53 48 83 c4 68 5b 5d 41 5c c3 85 c0 74 0d 8d 48 01 f0 0f b1 0a 74 86 85 c0 75 f3 <0f> 0b e9 7b ff ff ff 29 c6 31 d2 b9 20 00 48 00 4c 89 e7 e8 31 27 Call Trace: nf_nat_ipv4_out+0x78/0x90 [nf_nat_ipv4] nf_hook_slow+0x36/0xd0 ip_output+0x9f/0xd0 ip_forward+0x328/0x440 ip_rcv+0x8a/0xb0 Use dst_hold_safe instead and bail out if we cannot take a reference. Fixes: a4c2fd7f7891 ("net: remove DST_NOCACHE flag") Reported-by: Martin Zaharinov Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_nat_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index af8345fc4fbd..ed0ea64b8d04 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -97,7 +97,8 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) dst = skb_dst(skb); if (dst->xfrm) dst = ((struct xfrm_dst *)dst)->route; - dst_hold(dst); + if (!dst_hold_safe(dst)) + return -EHOSTUNREACH; dst = xfrm_lookup(net, dst, &fl, skb->sk, 0); if (IS_ERR(dst)) From a00f97b1af6c78f27876091b5efbc347a6262b57 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 12 Dec 2018 08:57:00 -0800 Subject: [PATCH 1296/2608] bnx2x: Clear fip MAC when fcoe offload support is disabled [ Upstream commit bbf666c1af916ed74795493c564df6fad462cc80 ] On some customer setups it was observed that shmem contains a non-zero fip MAC for 57711 which would lead to enabling of SW FCoE. Add a software workaround to clear the bad fip mac address if no FCoE connections are supported. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index bd3e3f080ebf..688d84cd6187 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -11733,8 +11733,10 @@ static void bnx2x_get_fcoe_info(struct bnx2x *bp) * If maximum allowed number of connections is zero - * disable the feature. */ - if (!bp->cnic_eth_dev.max_fcoe_conn) + if (!bp->cnic_eth_dev.max_fcoe_conn) { bp->flags |= NO_FCOE_FLAG; + eth_zero_addr(bp->fip_mac); + } } static void bnx2x_get_cnic_info(struct bnx2x *bp) From d964ef1d09b5526b244bfcb57dec76d69a79a7f5 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 12 Dec 2018 08:57:01 -0800 Subject: [PATCH 1297/2608] bnx2x: Remove configured vlans as part of unload sequence. [ Upstream commit 04f05230c5c13b1384f66f5186a68d7499e34622 ] Vlans are not getting removed when drivers are unloaded. The recent storm firmware versions had added safeguards against re-configuring an already configured vlan. As a result, PF inner reload flows (e.g., mtu change) might trigger an assertion. This change is going to remove vlans (same as we do for MACs) when doing a chip cleanup during unload. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 34 +++++++++++++++---- .../net/ethernet/broadcom/bnx2x/bnx2x_sp.h | 4 ++- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 688d84cd6187..433a7a37575d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -8462,6 +8462,7 @@ int bnx2x_set_vlan_one(struct bnx2x *bp, u16 vlan, /* Fill a user request section if needed */ if (!test_bit(RAMROD_CONT, ramrod_flags)) { ramrod_param.user_req.u.vlan.vlan = vlan; + __set_bit(BNX2X_VLAN, &ramrod_param.user_req.vlan_mac_flags); /* Set the command: ADD or DEL */ if (set) ramrod_param.user_req.cmd = BNX2X_VLAN_MAC_ADD; @@ -8482,6 +8483,27 @@ int bnx2x_set_vlan_one(struct bnx2x *bp, u16 vlan, return rc; } +static int bnx2x_del_all_vlans(struct bnx2x *bp) +{ + struct bnx2x_vlan_mac_obj *vlan_obj = &bp->sp_objs[0].vlan_obj; + unsigned long ramrod_flags = 0, vlan_flags = 0; + struct bnx2x_vlan_entry *vlan; + int rc; + + __set_bit(RAMROD_COMP_WAIT, &ramrod_flags); + __set_bit(BNX2X_VLAN, &vlan_flags); + rc = vlan_obj->delete_all(bp, vlan_obj, &vlan_flags, &ramrod_flags); + if (rc) + return rc; + + /* Mark that hw forgot all entries */ + list_for_each_entry(vlan, &bp->vlan_reg, link) + vlan->hw = false; + bp->vlan_cnt = 0; + + return 0; +} + int bnx2x_del_all_macs(struct bnx2x *bp, struct bnx2x_vlan_mac_obj *mac_obj, int mac_type, bool wait_for_comp) @@ -9320,6 +9342,11 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link) BNX2X_ERR("Failed to schedule DEL commands for UC MACs list: %d\n", rc); + /* Remove all currently configured VLANs */ + rc = bnx2x_del_all_vlans(bp); + if (rc < 0) + BNX2X_ERR("Failed to delete all VLANs\n"); + /* Disable LLH */ if (!CHIP_IS_E1(bp)) REG_WR(bp, NIG_REG_LLH0_FUNC_EN + port*8, 0); @@ -13006,13 +13033,6 @@ static void bnx2x_vlan_configure(struct bnx2x *bp, bool set_rx_mode) int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp) { - struct bnx2x_vlan_entry *vlan; - - /* The hw forgot all entries after reload */ - list_for_each_entry(vlan, &bp->vlan_reg, link) - vlan->hw = false; - bp->vlan_cnt = 0; - /* Don't set rx mode here. Our caller will do it. */ bnx2x_vlan_configure(bp, false); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h index 0bf2fd470819..7a6e82db4231 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h @@ -265,6 +265,7 @@ enum { BNX2X_ETH_MAC, BNX2X_ISCSI_ETH_MAC, BNX2X_NETQ_ETH_MAC, + BNX2X_VLAN, BNX2X_DONT_CONSUME_CAM_CREDIT, BNX2X_DONT_CONSUME_CAM_CREDIT_DEST, }; @@ -272,7 +273,8 @@ enum { #define BNX2X_VLAN_MAC_CMP_MASK (1 << BNX2X_UC_LIST_MAC | \ 1 << BNX2X_ETH_MAC | \ 1 << BNX2X_ISCSI_ETH_MAC | \ - 1 << BNX2X_NETQ_ETH_MAC) + 1 << BNX2X_NETQ_ETH_MAC | \ + 1 << BNX2X_VLAN) #define BNX2X_VLAN_MAC_CMP_FLAGS(flags) \ ((flags) & BNX2X_VLAN_MAC_CMP_MASK) From 70875bda5af2066dba4bc227f927be4abb603531 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 12 Dec 2018 08:57:03 -0800 Subject: [PATCH 1298/2608] bnx2x: Send update-svid ramrod with retry/poll flags enabled [ Upstream commit 9061193c4ee065d3240fde06767c2e06ec61decc ] Driver sends update-SVID ramrod in the MFW notification path. If there is a pending ramrod, driver doesn't retry the command and storm firmware will never be updated with the SVID value. The patch adds changes to send update-svid ramrod in process context with retry/poll flags set. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 1 + drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 1b7f4342dab9..d17a5c911524 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1278,6 +1278,7 @@ enum sp_rtnl_flag { BNX2X_SP_RTNL_TX_STOP, BNX2X_SP_RTNL_GET_DRV_VERSION, BNX2X_SP_RTNL_CHANGE_UDP_PORT, + BNX2X_SP_RTNL_UPDATE_SVID, }; enum bnx2x_iov_flag { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 433a7a37575d..85a320e5c0a8 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -2925,6 +2925,10 @@ static void bnx2x_handle_update_svid_cmd(struct bnx2x *bp) func_params.f_obj = &bp->func_obj; func_params.cmd = BNX2X_F_CMD_SWITCH_UPDATE; + /* Prepare parameters for function state transitions */ + __set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags); + __set_bit(RAMROD_RETRY, &func_params.ramrod_flags); + if (IS_MF_UFP(bp) || IS_MF_BD(bp)) { int func = BP_ABS_FUNC(bp); u32 val; @@ -4301,7 +4305,8 @@ static void bnx2x_attn_int_deasserted3(struct bnx2x *bp, u32 attn) bnx2x_handle_eee_event(bp); if (val & DRV_STATUS_OEM_UPDATE_SVID) - bnx2x_handle_update_svid_cmd(bp); + bnx2x_schedule_sp_rtnl(bp, + BNX2X_SP_RTNL_UPDATE_SVID, 0); if (bp->link_vars.periodic_flags & PERIODIC_FLAGS_LINK_EVENT) { @@ -10369,6 +10374,9 @@ sp_rtnl_not_reset: &bp->sp_rtnl_state)) bnx2x_update_mng_version(bp); + if (test_and_clear_bit(BNX2X_SP_RTNL_UPDATE_SVID, &bp->sp_rtnl_state)) + bnx2x_handle_update_svid_cmd(bp); + if (test_and_clear_bit(BNX2X_SP_RTNL_CHANGE_UDP_PORT, &bp->sp_rtnl_state)) { if (bnx2x_udp_port_update(bp)) { From b831528038e3cad0d745c53bcaeedb642f5cbc1f Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Fri, 9 Nov 2018 20:59:01 +0530 Subject: [PATCH 1299/2608] scsi: target: iscsi: cxgbit: fix csk leak [ Upstream commit 801df68d617e3cb831f531c99fa6003620e6b343 ] csk leak can happen if a new TCP connection gets established after cxgbit_accept_np() returns, to fix this leak free remaining csk in cxgbit_free_np(). Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/iscsi/cxgbit/cxgbit_cm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index d4fa41be80f9..0c00bb27c9c5 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -631,8 +631,11 @@ static void cxgbit_send_halfclose(struct cxgbit_sock *csk) static void cxgbit_arp_failure_discard(void *handle, struct sk_buff *skb) { + struct cxgbit_sock *csk = handle; + pr_debug("%s cxgbit_device %p\n", __func__, handle); kfree_skb(skb); + cxgbit_put_csk(csk); } static void cxgbit_abort_arp_failure(void *handle, struct sk_buff *skb) @@ -1147,7 +1150,7 @@ cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req) rpl5->opt0 = cpu_to_be64(opt0); rpl5->opt2 = cpu_to_be32(opt2); set_wr_txq(skb, CPL_PRIORITY_SETUP, csk->ctrlq_idx); - t4_set_arp_err_handler(skb, NULL, cxgbit_arp_failure_discard); + t4_set_arp_err_handler(skb, csk, cxgbit_arp_failure_discard); cxgbit_l2t_send(csk->com.cdev, skb, csk->l2t); } From 5e6aefd625f30006fe443ac714d240a6362c6b9a Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Fri, 9 Nov 2018 20:59:46 +0530 Subject: [PATCH 1300/2608] scsi: target: iscsi: cxgbit: add missing spin_lock_init() [ Upstream commit 9e6371d3c6913ff1707fb2c0274c9925f7aaef80 ] Add missing spin_lock_init() for cdev->np_lock. Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/iscsi/cxgbit/cxgbit_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/target/iscsi/cxgbit/cxgbit_main.c b/drivers/target/iscsi/cxgbit/cxgbit_main.c index 4fd775ace541..6340e2e7ffbe 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_main.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_main.c @@ -58,6 +58,7 @@ static void *cxgbit_uld_add(const struct cxgb4_lld_info *lldi) return ERR_PTR(-ENOMEM); kref_init(&cdev->kref); + spin_lock_init(&cdev->np_lock); cdev->lldi = *lldi; From bcf86ba5e2b487df5e2a891f9131b6368791510e Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Sat, 1 Dec 2018 21:40:38 +0000 Subject: [PATCH 1301/2608] x86, hyperv: remove PCI dependency [ Upstream commit c629421a990033ba539eb8585e73a2e6fa9ea631 ] Need to be able to boot without PCI devices present. Signed-off-by: Sinan Kaya Signed-off-by: Sasha Levin --- drivers/hv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 50b89ea0e60f..247a62604d1f 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -2,7 +2,7 @@ menu "Microsoft Hyper-V guest support" config HYPERV tristate "Microsoft Hyper-V client drivers" - depends on X86 && ACPI && PCI && X86_LOCAL_APIC && HYPERVISOR_GUEST + depends on X86 && ACPI && X86_LOCAL_APIC && HYPERVISOR_GUEST select PARAVIRT help Select this option to run Linux as a Hyper-V client operating From 0758a796eb07a6eb65af0bb7161c0f74f431eea6 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 10 Dec 2018 21:20:30 -0700 Subject: [PATCH 1302/2608] drivers: net: xgene: Remove unnecessary forward declarations [ Upstream commit 2ab4c3426c0cf711d7147e3f559638e4ab88960e ] Clang warns: drivers/net/ethernet/apm/xgene/xgene_enet_main.c:33:36: warning: tentative array definition assumed to have one element static const struct acpi_device_id xgene_enet_acpi_match[]; ^ 1 warning generated. Both xgene_enet_acpi_match and xgene_enet_of_match are defined before their uses at the bottom of the file so this is unnecessary. When CONFIG_ACPI is disabled, ACPI_PTR becomes NULL so xgene_enet_acpi_match doesn't need to be defined. Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 3b889efddf78..50dd6bf176d0 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -29,9 +29,6 @@ #define RES_RING_CSR 1 #define RES_RING_CMD 2 -static const struct of_device_id xgene_enet_of_match[]; -static const struct acpi_device_id xgene_enet_acpi_match[]; - static void xgene_enet_init_bufpool(struct xgene_enet_desc_ring *buf_pool) { struct xgene_enet_raw_desc16 *raw_desc; From 491fe3ac1e8a3b8dfbb280a6a0e3bd3b0faf3c67 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 10 Dec 2018 21:45:07 +0100 Subject: [PATCH 1303/2608] w90p910_ether: remove incorrect __init annotation [ Upstream commit 51367e423c6501a26e67d91a655d2bc892303462 ] The get_mac_address() function is normally inline, but when it is not, we get a warning that this configuration is broken: WARNING: vmlinux.o(.text+0x4aff00): Section mismatch in reference from the function w90p910_ether_setup() to the function .init.text:get_mac_address() The function w90p910_ether_setup() references the function __init get_mac_address(). This is often because w90p910_ether_setup lacks a __init Remove the __init to make it always do the right thing. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/nuvoton/w90p910_ether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c index 4a67c55aa9f1..11a9add81849 100644 --- a/drivers/net/ethernet/nuvoton/w90p910_ether.c +++ b/drivers/net/ethernet/nuvoton/w90p910_ether.c @@ -912,7 +912,7 @@ static const struct net_device_ops w90p910_ether_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; -static void __init get_mac_address(struct net_device *dev) +static void get_mac_address(struct net_device *dev) { struct w90p910_ether *ether = netdev_priv(dev); struct platform_device *pdev; From 9c97aac00f9d6b01d45f3731b7f14f9d34a167c1 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:20 +0800 Subject: [PATCH 1304/2608] net: hns: Incorrect offset address used for some registers. [ Upstream commit 4e1d4be681b2c26fd874adbf584bf034573ac45d ] According to the hip06 Datasheet: 1. The offset of INGRESS_SW_VLAN_TAG_DISC should be 0x1A00+4*all_chn_num 2. The offset of INGRESS_IN_DATA_STP_DISC should be 0x1A50+4*all_chn_num Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 1f056a6b167e..bc020dddd253 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -2159,9 +2159,9 @@ void hns_dsaf_update_stats(struct dsaf_device *dsaf_dev, u32 node_num) DSAF_INODE_LOCAL_ADDR_FALSE_NUM_0_REG + 0x80 * (u64)node_num); hw_stats->vlan_drop += dsaf_read_dev(dsaf_dev, - DSAF_INODE_SW_VLAN_TAG_DISC_0_REG + 0x80 * (u64)node_num); + DSAF_INODE_SW_VLAN_TAG_DISC_0_REG + 4 * (u64)node_num); hw_stats->stp_drop += dsaf_read_dev(dsaf_dev, - DSAF_INODE_IN_DATA_STP_DISC_0_REG + 0x80 * (u64)node_num); + DSAF_INODE_IN_DATA_STP_DISC_0_REG + 4 * (u64)node_num); /* pfc pause frame statistics stored in dsaf inode*/ if ((node_num < DSAF_SERVICE_NW_NUM) && !is_ver1) { From 649171175b35f9f36f58bbe886718ebc2e788e0b Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:21 +0800 Subject: [PATCH 1305/2608] net: hns: All ports can not work when insmod hns ko after rmmod. [ Upstream commit 308c6cafde0147616da45e3a928adae55c428deb ] There are two test cases: 1. Remove the 4 modules:hns_enet_drv/hns_dsaf/hnae/hns_mdio, and install them again, must use "ifconfig down/ifconfig up" command pair to bring port to work. This patch calls phy_stop function when init phy to fix this bug. 2. Remove the 2 modules:hns_enet_drv/hns_dsaf, and install them again, all ports can not use anymore, because of the phy devices register failed(phy devices already exists). Phy devices are registered when hns_dsaf installed, this patch removes them when hns_dsaf removed. The two cases are sometimes related, fixing the second case also requires fixing the first case, so fix them together. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 15 +++++++++++++++ drivers/net/ethernet/hisilicon/hns/hns_enet.c | 3 +++ 2 files changed, 18 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 5a8dbd72fe45..07e117deeb0f 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -783,6 +783,17 @@ static int hns_mac_register_phy(struct hns_mac_cb *mac_cb) return rc; } +static void hns_mac_remove_phydev(struct hns_mac_cb *mac_cb) +{ + if (!to_acpi_device_node(mac_cb->fw_port) || !mac_cb->phy_dev) + return; + + phy_device_remove(mac_cb->phy_dev); + phy_device_free(mac_cb->phy_dev); + + mac_cb->phy_dev = NULL; +} + #define MAC_MEDIA_TYPE_MAX_LEN 16 static const struct { @@ -1120,7 +1131,11 @@ void hns_mac_uninit(struct dsaf_device *dsaf_dev) int max_port_num = hns_mac_get_max_port_num(dsaf_dev); for (i = 0; i < max_port_num; i++) { + if (!dsaf_dev->mac_cb[i]) + continue; + dsaf_dev->misc_op->cpld_reset_led(dsaf_dev->mac_cb[i]); + hns_mac_remove_phydev(dsaf_dev->mac_cb[i]); dsaf_dev->mac_cb[i] = NULL; } } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 4faadc3ffe8c..2758c4bb9208 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1286,6 +1286,9 @@ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) if (h->phy_if == PHY_INTERFACE_MODE_XGMII) phy_dev->autoneg = false; + if (h->phy_if == PHY_INTERFACE_MODE_SGMII) + phy_stop(phy_dev); + return 0; } From 5fed62f0692508804684a0e79719569e146c3300 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:22 +0800 Subject: [PATCH 1306/2608] net: hns: Some registers use wrong address according to the datasheet. [ Upstream commit 4ad26f117b6ea0f5d5f1592127bafb5ec65904d3 ] According to the hip06 datasheet: 1.Six registers use wrong address: RCB_COM_SF_CFG_INTMASK_RING RCB_COM_SF_CFG_RING_STS RCB_COM_SF_CFG_RING RCB_COM_SF_CFG_INTMASK_BD RCB_COM_SF_CFG_BD_RINT_STS DSAF_INODE_VC1_IN_PKT_NUM_0_REG 2.The offset of DSAF_INODE_VC1_IN_PKT_NUM_0_REG should be 0x103C + 0x80 * all_chn_num 3.The offset to show the value of DSAF_INODE_IN_DATA_STP_DISC_0_REG is wrong, so the value of DSAF_INODE_SW_VLAN_TAG_DISC_0_REG will be overwrite These registers are only used in "ethtool -d", so that did not cause ndev to misfunction. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ethernet/hisilicon/hns/hns_dsaf_main.c | 248 +++++++++--------- .../net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 12 +- 2 files changed, 130 insertions(+), 130 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index bc020dddd253..9086d566a624 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -2278,237 +2278,237 @@ void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data) DSAF_INODE_BD_ORDER_STATUS_0_REG + j * 4); p[223 + i] = dsaf_read_dev(ddev, DSAF_INODE_SW_VLAN_TAG_DISC_0_REG + j * 4); - p[224 + i] = dsaf_read_dev(ddev, + p[226 + i] = dsaf_read_dev(ddev, DSAF_INODE_IN_DATA_STP_DISC_0_REG + j * 4); } - p[227] = dsaf_read_dev(ddev, DSAF_INODE_GE_FC_EN_0_REG + port * 4); + p[229] = dsaf_read_dev(ddev, DSAF_INODE_GE_FC_EN_0_REG + port * 4); for (i = 0; i < DSAF_INODE_NUM / DSAF_COMM_CHN; i++) { j = i * DSAF_COMM_CHN + port; - p[228 + i] = dsaf_read_dev(ddev, + p[230 + i] = dsaf_read_dev(ddev, DSAF_INODE_VC0_IN_PKT_NUM_0_REG + j * 4); } - p[231] = dsaf_read_dev(ddev, - DSAF_INODE_VC1_IN_PKT_NUM_0_REG + port * 4); + p[233] = dsaf_read_dev(ddev, + DSAF_INODE_VC1_IN_PKT_NUM_0_REG + port * 0x80); /* dsaf inode registers */ for (i = 0; i < HNS_DSAF_SBM_NUM(ddev) / DSAF_COMM_CHN; i++) { j = i * DSAF_COMM_CHN + port; - p[232 + i] = dsaf_read_dev(ddev, + p[234 + i] = dsaf_read_dev(ddev, DSAF_SBM_CFG_REG_0_REG + j * 0x80); - p[235 + i] = dsaf_read_dev(ddev, + p[237 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CFG_0_XGE_REG_0_REG + j * 0x80); - p[238 + i] = dsaf_read_dev(ddev, + p[240 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CFG_1_REG_0_REG + j * 0x80); - p[241 + i] = dsaf_read_dev(ddev, + p[243 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CFG_2_XGE_REG_0_REG + j * 0x80); - p[244 + i] = dsaf_read_dev(ddev, + p[246 + i] = dsaf_read_dev(ddev, DSAF_SBM_FREE_CNT_0_0_REG + j * 0x80); - p[245 + i] = dsaf_read_dev(ddev, + p[249 + i] = dsaf_read_dev(ddev, DSAF_SBM_FREE_CNT_1_0_REG + j * 0x80); - p[248 + i] = dsaf_read_dev(ddev, + p[252 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CNT_0_0_REG + j * 0x80); - p[251 + i] = dsaf_read_dev(ddev, + p[255 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CNT_1_0_REG + j * 0x80); - p[254 + i] = dsaf_read_dev(ddev, + p[258 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CNT_2_0_REG + j * 0x80); - p[257 + i] = dsaf_read_dev(ddev, + p[261 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CNT_3_0_REG + j * 0x80); - p[260 + i] = dsaf_read_dev(ddev, + p[264 + i] = dsaf_read_dev(ddev, DSAF_SBM_INER_ST_0_REG + j * 0x80); - p[263 + i] = dsaf_read_dev(ddev, + p[267 + i] = dsaf_read_dev(ddev, DSAF_SBM_MIB_REQ_FAILED_TC_0_REG + j * 0x80); - p[266 + i] = dsaf_read_dev(ddev, + p[270 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_CNT_0_REG + j * 0x80); - p[269 + i] = dsaf_read_dev(ddev, + p[273 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_DROP_CNT_0_REG + j * 0x80); - p[272 + i] = dsaf_read_dev(ddev, + p[276 + i] = dsaf_read_dev(ddev, DSAF_SBM_INF_OUTPORT_CNT_0_REG + j * 0x80); - p[275 + i] = dsaf_read_dev(ddev, + p[279 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_TC0_CNT_0_REG + j * 0x80); - p[278 + i] = dsaf_read_dev(ddev, + p[282 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_TC1_CNT_0_REG + j * 0x80); - p[281 + i] = dsaf_read_dev(ddev, + p[285 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_TC2_CNT_0_REG + j * 0x80); - p[284 + i] = dsaf_read_dev(ddev, + p[288 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_TC3_CNT_0_REG + j * 0x80); - p[287 + i] = dsaf_read_dev(ddev, + p[291 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_TC4_CNT_0_REG + j * 0x80); - p[290 + i] = dsaf_read_dev(ddev, + p[294 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_TC5_CNT_0_REG + j * 0x80); - p[293 + i] = dsaf_read_dev(ddev, + p[297 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_TC6_CNT_0_REG + j * 0x80); - p[296 + i] = dsaf_read_dev(ddev, + p[300 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_TC7_CNT_0_REG + j * 0x80); - p[299 + i] = dsaf_read_dev(ddev, + p[303 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_REQ_CNT_0_REG + j * 0x80); - p[302 + i] = dsaf_read_dev(ddev, + p[306 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_RELS_CNT_0_REG + j * 0x80); - p[305 + i] = dsaf_read_dev(ddev, + p[309 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CFG_3_REG_0_REG + j * 0x80); - p[308 + i] = dsaf_read_dev(ddev, + p[312 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CFG_4_REG_0_REG + j * 0x80); } /* dsaf onode registers */ for (i = 0; i < DSAF_XOD_NUM; i++) { - p[311 + i] = dsaf_read_dev(ddev, + p[315 + i] = dsaf_read_dev(ddev, DSAF_XOD_ETS_TSA_TC0_TC3_CFG_0_REG + i * 0x90); - p[319 + i] = dsaf_read_dev(ddev, + p[323 + i] = dsaf_read_dev(ddev, DSAF_XOD_ETS_TSA_TC4_TC7_CFG_0_REG + i * 0x90); - p[327 + i] = dsaf_read_dev(ddev, + p[331 + i] = dsaf_read_dev(ddev, DSAF_XOD_ETS_BW_TC0_TC3_CFG_0_REG + i * 0x90); - p[335 + i] = dsaf_read_dev(ddev, + p[339 + i] = dsaf_read_dev(ddev, DSAF_XOD_ETS_BW_TC4_TC7_CFG_0_REG + i * 0x90); - p[343 + i] = dsaf_read_dev(ddev, + p[347 + i] = dsaf_read_dev(ddev, DSAF_XOD_ETS_BW_OFFSET_CFG_0_REG + i * 0x90); - p[351 + i] = dsaf_read_dev(ddev, + p[355 + i] = dsaf_read_dev(ddev, DSAF_XOD_ETS_TOKEN_CFG_0_REG + i * 0x90); } - p[359] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_0_0_REG + port * 0x90); - p[360] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_1_0_REG + port * 0x90); - p[361] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_2_0_REG + port * 0x90); + p[363] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_0_0_REG + port * 0x90); + p[364] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_1_0_REG + port * 0x90); + p[365] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_2_0_REG + port * 0x90); for (i = 0; i < DSAF_XOD_BIG_NUM / DSAF_COMM_CHN; i++) { j = i * DSAF_COMM_CHN + port; - p[362 + i] = dsaf_read_dev(ddev, + p[366 + i] = dsaf_read_dev(ddev, DSAF_XOD_GNT_L_0_REG + j * 0x90); - p[365 + i] = dsaf_read_dev(ddev, + p[369 + i] = dsaf_read_dev(ddev, DSAF_XOD_GNT_H_0_REG + j * 0x90); - p[368 + i] = dsaf_read_dev(ddev, + p[372 + i] = dsaf_read_dev(ddev, DSAF_XOD_CONNECT_STATE_0_REG + j * 0x90); - p[371 + i] = dsaf_read_dev(ddev, + p[375 + i] = dsaf_read_dev(ddev, DSAF_XOD_RCVPKT_CNT_0_REG + j * 0x90); - p[374 + i] = dsaf_read_dev(ddev, + p[378 + i] = dsaf_read_dev(ddev, DSAF_XOD_RCVTC0_CNT_0_REG + j * 0x90); - p[377 + i] = dsaf_read_dev(ddev, + p[381 + i] = dsaf_read_dev(ddev, DSAF_XOD_RCVTC1_CNT_0_REG + j * 0x90); - p[380 + i] = dsaf_read_dev(ddev, + p[384 + i] = dsaf_read_dev(ddev, DSAF_XOD_RCVTC2_CNT_0_REG + j * 0x90); - p[383 + i] = dsaf_read_dev(ddev, + p[387 + i] = dsaf_read_dev(ddev, DSAF_XOD_RCVTC3_CNT_0_REG + j * 0x90); - p[386 + i] = dsaf_read_dev(ddev, + p[390 + i] = dsaf_read_dev(ddev, DSAF_XOD_RCVVC0_CNT_0_REG + j * 0x90); - p[389 + i] = dsaf_read_dev(ddev, + p[393 + i] = dsaf_read_dev(ddev, DSAF_XOD_RCVVC1_CNT_0_REG + j * 0x90); } - p[392] = dsaf_read_dev(ddev, - DSAF_XOD_XGE_RCVIN0_CNT_0_REG + port * 0x90); - p[393] = dsaf_read_dev(ddev, - DSAF_XOD_XGE_RCVIN1_CNT_0_REG + port * 0x90); - p[394] = dsaf_read_dev(ddev, - DSAF_XOD_XGE_RCVIN2_CNT_0_REG + port * 0x90); - p[395] = dsaf_read_dev(ddev, - DSAF_XOD_XGE_RCVIN3_CNT_0_REG + port * 0x90); p[396] = dsaf_read_dev(ddev, - DSAF_XOD_XGE_RCVIN4_CNT_0_REG + port * 0x90); + DSAF_XOD_XGE_RCVIN0_CNT_0_REG + port * 0x90); p[397] = dsaf_read_dev(ddev, - DSAF_XOD_XGE_RCVIN5_CNT_0_REG + port * 0x90); + DSAF_XOD_XGE_RCVIN1_CNT_0_REG + port * 0x90); p[398] = dsaf_read_dev(ddev, - DSAF_XOD_XGE_RCVIN6_CNT_0_REG + port * 0x90); + DSAF_XOD_XGE_RCVIN2_CNT_0_REG + port * 0x90); p[399] = dsaf_read_dev(ddev, - DSAF_XOD_XGE_RCVIN7_CNT_0_REG + port * 0x90); + DSAF_XOD_XGE_RCVIN3_CNT_0_REG + port * 0x90); p[400] = dsaf_read_dev(ddev, - DSAF_XOD_PPE_RCVIN0_CNT_0_REG + port * 0x90); + DSAF_XOD_XGE_RCVIN4_CNT_0_REG + port * 0x90); p[401] = dsaf_read_dev(ddev, - DSAF_XOD_PPE_RCVIN1_CNT_0_REG + port * 0x90); + DSAF_XOD_XGE_RCVIN5_CNT_0_REG + port * 0x90); p[402] = dsaf_read_dev(ddev, - DSAF_XOD_ROCEE_RCVIN0_CNT_0_REG + port * 0x90); + DSAF_XOD_XGE_RCVIN6_CNT_0_REG + port * 0x90); p[403] = dsaf_read_dev(ddev, - DSAF_XOD_ROCEE_RCVIN1_CNT_0_REG + port * 0x90); + DSAF_XOD_XGE_RCVIN7_CNT_0_REG + port * 0x90); p[404] = dsaf_read_dev(ddev, + DSAF_XOD_PPE_RCVIN0_CNT_0_REG + port * 0x90); + p[405] = dsaf_read_dev(ddev, + DSAF_XOD_PPE_RCVIN1_CNT_0_REG + port * 0x90); + p[406] = dsaf_read_dev(ddev, + DSAF_XOD_ROCEE_RCVIN0_CNT_0_REG + port * 0x90); + p[407] = dsaf_read_dev(ddev, + DSAF_XOD_ROCEE_RCVIN1_CNT_0_REG + port * 0x90); + p[408] = dsaf_read_dev(ddev, DSAF_XOD_FIFO_STATUS_0_REG + port * 0x90); /* dsaf voq registers */ for (i = 0; i < DSAF_VOQ_NUM / DSAF_COMM_CHN; i++) { j = (i * DSAF_COMM_CHN + port) * 0x90; - p[405 + i] = dsaf_read_dev(ddev, + p[409 + i] = dsaf_read_dev(ddev, DSAF_VOQ_ECC_INVERT_EN_0_REG + j); - p[408 + i] = dsaf_read_dev(ddev, + p[412 + i] = dsaf_read_dev(ddev, DSAF_VOQ_SRAM_PKT_NUM_0_REG + j); - p[411 + i] = dsaf_read_dev(ddev, DSAF_VOQ_IN_PKT_NUM_0_REG + j); - p[414 + i] = dsaf_read_dev(ddev, + p[415 + i] = dsaf_read_dev(ddev, DSAF_VOQ_IN_PKT_NUM_0_REG + j); + p[418 + i] = dsaf_read_dev(ddev, DSAF_VOQ_OUT_PKT_NUM_0_REG + j); - p[417 + i] = dsaf_read_dev(ddev, + p[421 + i] = dsaf_read_dev(ddev, DSAF_VOQ_ECC_ERR_ADDR_0_REG + j); - p[420 + i] = dsaf_read_dev(ddev, DSAF_VOQ_BP_STATUS_0_REG + j); - p[423 + i] = dsaf_read_dev(ddev, DSAF_VOQ_SPUP_IDLE_0_REG + j); - p[426 + i] = dsaf_read_dev(ddev, + p[424 + i] = dsaf_read_dev(ddev, DSAF_VOQ_BP_STATUS_0_REG + j); + p[427 + i] = dsaf_read_dev(ddev, DSAF_VOQ_SPUP_IDLE_0_REG + j); + p[430 + i] = dsaf_read_dev(ddev, DSAF_VOQ_XGE_XOD_REQ_0_0_REG + j); - p[429 + i] = dsaf_read_dev(ddev, + p[433 + i] = dsaf_read_dev(ddev, DSAF_VOQ_XGE_XOD_REQ_1_0_REG + j); - p[432 + i] = dsaf_read_dev(ddev, + p[436 + i] = dsaf_read_dev(ddev, DSAF_VOQ_PPE_XOD_REQ_0_REG + j); - p[435 + i] = dsaf_read_dev(ddev, + p[439 + i] = dsaf_read_dev(ddev, DSAF_VOQ_ROCEE_XOD_REQ_0_REG + j); - p[438 + i] = dsaf_read_dev(ddev, + p[442 + i] = dsaf_read_dev(ddev, DSAF_VOQ_BP_ALL_THRD_0_REG + j); } /* dsaf tbl registers */ - p[441] = dsaf_read_dev(ddev, DSAF_TBL_CTRL_0_REG); - p[442] = dsaf_read_dev(ddev, DSAF_TBL_INT_MSK_0_REG); - p[443] = dsaf_read_dev(ddev, DSAF_TBL_INT_SRC_0_REG); - p[444] = dsaf_read_dev(ddev, DSAF_TBL_INT_STS_0_REG); - p[445] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_ADDR_0_REG); - p[446] = dsaf_read_dev(ddev, DSAF_TBL_LINE_ADDR_0_REG); - p[447] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_HIGH_0_REG); - p[448] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_LOW_0_REG); - p[449] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_4_0_REG); - p[450] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_3_0_REG); - p[451] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_2_0_REG); - p[452] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_1_0_REG); - p[453] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_0_0_REG); - p[454] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_UCAST_CFG_0_REG); - p[455] = dsaf_read_dev(ddev, DSAF_TBL_LIN_CFG_0_REG); - p[456] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RDATA_HIGH_0_REG); - p[457] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RDATA_LOW_0_REG); - p[458] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA4_0_REG); - p[459] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA3_0_REG); - p[460] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA2_0_REG); - p[461] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA1_0_REG); - p[462] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA0_0_REG); - p[463] = dsaf_read_dev(ddev, DSAF_TBL_LIN_RDATA_0_REG); + p[445] = dsaf_read_dev(ddev, DSAF_TBL_CTRL_0_REG); + p[446] = dsaf_read_dev(ddev, DSAF_TBL_INT_MSK_0_REG); + p[447] = dsaf_read_dev(ddev, DSAF_TBL_INT_SRC_0_REG); + p[448] = dsaf_read_dev(ddev, DSAF_TBL_INT_STS_0_REG); + p[449] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_ADDR_0_REG); + p[450] = dsaf_read_dev(ddev, DSAF_TBL_LINE_ADDR_0_REG); + p[451] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_HIGH_0_REG); + p[452] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_LOW_0_REG); + p[453] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_4_0_REG); + p[454] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_3_0_REG); + p[455] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_2_0_REG); + p[456] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_1_0_REG); + p[457] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_0_0_REG); + p[458] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_UCAST_CFG_0_REG); + p[459] = dsaf_read_dev(ddev, DSAF_TBL_LIN_CFG_0_REG); + p[460] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RDATA_HIGH_0_REG); + p[461] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RDATA_LOW_0_REG); + p[462] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA4_0_REG); + p[463] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA3_0_REG); + p[464] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA2_0_REG); + p[465] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA1_0_REG); + p[466] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA0_0_REG); + p[467] = dsaf_read_dev(ddev, DSAF_TBL_LIN_RDATA_0_REG); for (i = 0; i < DSAF_SW_PORT_NUM; i++) { j = i * 0x8; - p[464 + 2 * i] = dsaf_read_dev(ddev, + p[468 + 2 * i] = dsaf_read_dev(ddev, DSAF_TBL_DA0_MIS_INFO1_0_REG + j); - p[465 + 2 * i] = dsaf_read_dev(ddev, + p[469 + 2 * i] = dsaf_read_dev(ddev, DSAF_TBL_DA0_MIS_INFO0_0_REG + j); } - p[480] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO2_0_REG); - p[481] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO1_0_REG); - p[482] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO0_0_REG); - p[483] = dsaf_read_dev(ddev, DSAF_TBL_PUL_0_REG); - p[484] = dsaf_read_dev(ddev, DSAF_TBL_OLD_RSLT_0_REG); - p[485] = dsaf_read_dev(ddev, DSAF_TBL_OLD_SCAN_VAL_0_REG); - p[486] = dsaf_read_dev(ddev, DSAF_TBL_DFX_CTRL_0_REG); - p[487] = dsaf_read_dev(ddev, DSAF_TBL_DFX_STAT_0_REG); - p[488] = dsaf_read_dev(ddev, DSAF_TBL_DFX_STAT_2_0_REG); - p[489] = dsaf_read_dev(ddev, DSAF_TBL_LKUP_NUM_I_0_REG); - p[490] = dsaf_read_dev(ddev, DSAF_TBL_LKUP_NUM_O_0_REG); - p[491] = dsaf_read_dev(ddev, DSAF_TBL_UCAST_BCAST_MIS_INFO_0_0_REG); + p[484] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO2_0_REG); + p[485] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO1_0_REG); + p[486] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO0_0_REG); + p[487] = dsaf_read_dev(ddev, DSAF_TBL_PUL_0_REG); + p[488] = dsaf_read_dev(ddev, DSAF_TBL_OLD_RSLT_0_REG); + p[489] = dsaf_read_dev(ddev, DSAF_TBL_OLD_SCAN_VAL_0_REG); + p[490] = dsaf_read_dev(ddev, DSAF_TBL_DFX_CTRL_0_REG); + p[491] = dsaf_read_dev(ddev, DSAF_TBL_DFX_STAT_0_REG); + p[492] = dsaf_read_dev(ddev, DSAF_TBL_DFX_STAT_2_0_REG); + p[493] = dsaf_read_dev(ddev, DSAF_TBL_LKUP_NUM_I_0_REG); + p[494] = dsaf_read_dev(ddev, DSAF_TBL_LKUP_NUM_O_0_REG); + p[495] = dsaf_read_dev(ddev, DSAF_TBL_UCAST_BCAST_MIS_INFO_0_0_REG); /* dsaf other registers */ - p[492] = dsaf_read_dev(ddev, DSAF_INODE_FIFO_WL_0_REG + port * 0x4); - p[493] = dsaf_read_dev(ddev, DSAF_ONODE_FIFO_WL_0_REG + port * 0x4); - p[494] = dsaf_read_dev(ddev, DSAF_XGE_GE_WORK_MODE_0_REG + port * 0x4); - p[495] = dsaf_read_dev(ddev, + p[496] = dsaf_read_dev(ddev, DSAF_INODE_FIFO_WL_0_REG + port * 0x4); + p[497] = dsaf_read_dev(ddev, DSAF_ONODE_FIFO_WL_0_REG + port * 0x4); + p[498] = dsaf_read_dev(ddev, DSAF_XGE_GE_WORK_MODE_0_REG + port * 0x4); + p[499] = dsaf_read_dev(ddev, DSAF_XGE_APP_RX_LINK_UP_0_REG + port * 0x4); - p[496] = dsaf_read_dev(ddev, DSAF_NETPORT_CTRL_SIG_0_REG + port * 0x4); - p[497] = dsaf_read_dev(ddev, DSAF_XGE_CTRL_SIG_CFG_0_REG + port * 0x4); + p[500] = dsaf_read_dev(ddev, DSAF_NETPORT_CTRL_SIG_0_REG + port * 0x4); + p[501] = dsaf_read_dev(ddev, DSAF_XGE_CTRL_SIG_CFG_0_REG + port * 0x4); if (!is_ver1) - p[498] = dsaf_read_dev(ddev, DSAF_PAUSE_CFG_REG + port * 0x4); + p[502] = dsaf_read_dev(ddev, DSAF_PAUSE_CFG_REG + port * 0x4); /* mark end of dsaf regs */ - for (i = 499; i < 504; i++) + for (i = 503; i < 504; i++) p[i] = 0xdddddddd; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index 6d20e4eb7402..5ae3d017ddbc 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -176,7 +176,7 @@ #define DSAF_INODE_IN_DATA_STP_DISC_0_REG 0x1A50 #define DSAF_INODE_GE_FC_EN_0_REG 0x1B00 #define DSAF_INODE_VC0_IN_PKT_NUM_0_REG 0x1B50 -#define DSAF_INODE_VC1_IN_PKT_NUM_0_REG 0x1C00 +#define DSAF_INODE_VC1_IN_PKT_NUM_0_REG 0x103C #define DSAF_INODE_IN_PRIO_PAUSE_BASE_REG 0x1C00 #define DSAF_INODE_IN_PRIO_PAUSE_BASE_OFFSET 0x100 #define DSAF_INODE_IN_PRIO_PAUSE_OFFSET 0x50 @@ -404,11 +404,11 @@ #define RCB_ECC_ERR_ADDR4_REG 0x460 #define RCB_ECC_ERR_ADDR5_REG 0x464 -#define RCB_COM_SF_CFG_INTMASK_RING 0x480 -#define RCB_COM_SF_CFG_RING_STS 0x484 -#define RCB_COM_SF_CFG_RING 0x488 -#define RCB_COM_SF_CFG_INTMASK_BD 0x48C -#define RCB_COM_SF_CFG_BD_RINT_STS 0x470 +#define RCB_COM_SF_CFG_INTMASK_RING 0x470 +#define RCB_COM_SF_CFG_RING_STS 0x474 +#define RCB_COM_SF_CFG_RING 0x478 +#define RCB_COM_SF_CFG_INTMASK_BD 0x47C +#define RCB_COM_SF_CFG_BD_RINT_STS 0x480 #define RCB_COM_RCB_RD_BD_BUSY 0x490 #define RCB_COM_RCB_FBD_CRT_EN 0x494 #define RCB_COM_AXI_WR_ERR_INTMASK 0x498 From 1ef17642df395627055661c3c20dfd183e78b07a Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:23 +0800 Subject: [PATCH 1307/2608] net: hns: Fixed bug that netdev was opened twice [ Upstream commit 5778b13b64eca5549d242686f2f91a2c80c8fa40 ] After resetting dsaf to try to repair chip error such as ecc error, the net device will be open if net interface is up. But at this time if there is the users set the net device up with the command ifconfig, the net device will be opened twice consecutively. Function napi_enable was called when open device. And Kernel panic will be occurred if it was called twice consecutively. Such as follow: static inline void napi_enable(struct napi_struct *n) { BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); smp_mb__before_clear_bit(); clear_bit(NAPI_STATE_SCHED, &n->state); } [37255.571996] Kernel panic - not syncing: BUG! [37255.595234] Call trace: [37255.597694] [] dump_backtrace+0x0/0x1a0 [37255.603114] [] show_stack+0x20/0x28 [37255.608187] [] dump_stack+0x98/0xb8 [37255.613258] [] panic+0x10c/0x26c [37255.618070] [] hns_nic_net_up+0x30c/0x4e0 [37255.623664] [] hns_nic_net_open+0x94/0x12c [37255.629346] [] __dev_open+0xf4/0x168 [37255.634504] [] __dev_change_flags+0x98/0x15c [37255.640359] [] dev_change_flags+0x2c/0x68 [37255.769580] [] devinet_ioctl+0x650/0x704 [37255.775086] [] inet_ioctl+0x98/0xb4 [37255.780159] [] sock_do_ioctl+0x44/0x84 [37255.785490] [] sock_ioctl+0x248/0x30c [37255.790737] [] do_vfs_ioctl+0x480/0x618 [37255.796156] [] SyS_ioctl+0x90/0xa4 [37255.801139] SMP: stopping secondary CPUs [37255.805079] kbox: catch panic event. [37255.809586] collected_len = 128928, LOG_BUF_LEN_LOCAL = 131072 [37255.816103] flush cache 0xffff80003f000000 size 0x800000 [37255.822192] flush cache 0xffff80003f000000 size 0x800000 [37255.828289] flush cache 0xffff80003f000000 size 0x800000 [37255.834378] kbox: no notify die func register. no need to notify [37255.840413] ---[ end Kernel panic - not syncing: BUG! This patchset fix this bug according to the flag NIC_STATE_DOWN. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 2758c4bb9208..801fbcebfbaa 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1433,6 +1433,9 @@ static int hns_nic_net_up(struct net_device *ndev) int i, j; int ret; + if (!test_bit(NIC_STATE_DOWN, &priv->state)) + return 0; + ret = hns_nic_init_irq(priv); if (ret != 0) { netdev_err(ndev, "hns init irq failed! ret=%d\n", ret); From 5ddca1a9915f48f583186c627865d20f1e435d23 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:24 +0800 Subject: [PATCH 1308/2608] net: hns: Clean rx fbd when ae stopped. [ Upstream commit 31f6b61d810654fb3ef43f4d8afda0f44b142fad ] If there are packets in hardware when changing the speed or duplex, it may cause hardware hang up. This patch adds the code to wait rx fbd clean up when ae stopped. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index bf930ab3c2bd..a185a8be7999 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -379,6 +379,9 @@ void hns_ae_stop(struct hnae_handle *handle) hns_ae_ring_enable_all(handle, 0); + /* clean rx fbd. */ + hns_rcb_wait_fbd_clean(handle->qs, handle->q_num, RCB_INT_FLAG_RX); + (void)hns_mac_vm_config_bc_en(mac_cb, 0, false); } From 02ad6ced13d71d8af63c6195ac1a8205d8d40e2f Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:25 +0800 Subject: [PATCH 1309/2608] net: hns: Free irq when exit from abnormal branch [ Upstream commit c82bd077e1ba3dd586569c733dc6d3dd4b0e43cd ] 1.In "hns_nic_init_irq", if request irq fail at index i, the function return directly without releasing irq resources that already requested. 2.In "hns_nic_net_up" after "hns_nic_init_irq", if exceptional branch occurs, irqs that already requested are not release. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 801fbcebfbaa..bcd9aaba09b3 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1384,6 +1384,22 @@ static int hns_nic_init_affinity_mask(int q_num, int ring_idx, return cpu; } +static void hns_nic_free_irq(int q_num, struct hns_nic_priv *priv) +{ + int i; + + for (i = 0; i < q_num * 2; i++) { + if (priv->ring_data[i].ring->irq_init_flag == RCB_IRQ_INITED) { + irq_set_affinity_hint(priv->ring_data[i].ring->irq, + NULL); + free_irq(priv->ring_data[i].ring->irq, + &priv->ring_data[i]); + priv->ring_data[i].ring->irq_init_flag = + RCB_IRQ_NOT_INITED; + } + } +} + static int hns_nic_init_irq(struct hns_nic_priv *priv) { struct hnae_handle *h = priv->ae_handle; @@ -1409,7 +1425,7 @@ static int hns_nic_init_irq(struct hns_nic_priv *priv) if (ret) { netdev_err(priv->netdev, "request irq(%d) fail\n", rd->ring->irq); - return ret; + goto out_free_irq; } disable_irq(rd->ring->irq); @@ -1424,6 +1440,10 @@ static int hns_nic_init_irq(struct hns_nic_priv *priv) } return 0; + +out_free_irq: + hns_nic_free_irq(h->q_num, priv); + return ret; } static int hns_nic_net_up(struct net_device *ndev) @@ -1471,6 +1491,7 @@ out_has_some_queues: for (j = i - 1; j >= 0; j--) hns_nic_ring_close(ndev, j); + hns_nic_free_irq(h->q_num, priv); set_bit(NIC_STATE_DOWN, &priv->state); return ret; From b984806de33f1bade282aac1725610698ceff4f2 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:26 +0800 Subject: [PATCH 1310/2608] net: hns: Avoid net reset caused by pause frames storm [ Upstream commit a57275d35576fdd89d8c771eedf1e7cf97e0dfa6 ] There will be a large number of MAC pause frames on the net, which caused tx timeout of net device. And then the net device was reset to try to recover it. So that is not useful, and will cause some other problems. So need doubled ndev->watchdog_timeo if device watchdog occurred until watchdog_timeo up to 40s and then try resetting to recover it. When collecting dfx information such as hardware registers when tx timeout. Some registers for count were cleared when read. So need move this task before update net state which also read the count registers. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index bcd9aaba09b3..f28a639e8393 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1609,11 +1609,19 @@ static int hns_nic_net_stop(struct net_device *ndev) } static void hns_tx_timeout_reset(struct hns_nic_priv *priv); +#define HNS_TX_TIMEO_LIMIT (40 * HZ) static void hns_nic_net_timeout(struct net_device *ndev) { struct hns_nic_priv *priv = netdev_priv(ndev); - hns_tx_timeout_reset(priv); + if (ndev->watchdog_timeo < HNS_TX_TIMEO_LIMIT) { + ndev->watchdog_timeo *= 2; + netdev_info(ndev, "watchdog_timo changed to %d.\n", + ndev->watchdog_timeo); + } else { + ndev->watchdog_timeo = HNS_NIC_TX_TIMEOUT; + hns_tx_timeout_reset(priv); + } } static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr, @@ -2193,11 +2201,11 @@ static void hns_nic_service_task(struct work_struct *work) = container_of(work, struct hns_nic_priv, service_task); struct hnae_handle *h = priv->ae_handle; + hns_nic_reset_subtask(priv); hns_nic_update_link_status(priv->netdev); h->dev->ops->update_led_status(h); hns_nic_update_stats(priv->netdev); - hns_nic_reset_subtask(priv); hns_nic_service_event_complete(priv); } From 4f64bc8f6626b39a4a65d2450b980de00c742db2 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:27 +0800 Subject: [PATCH 1311/2608] net: hns: Fix ntuple-filters status error. [ Upstream commit 7e74a19ca522aec7c2be201a7ae1d1d57ded409b ] The ntuple-filters features is forced on by chip. But it shows "ntuple-filters: off [fixed]" when use ethtool. This patch make it correct with "ntuple-filters: on [fixed]". Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index f28a639e8393..86662a14208e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -2486,7 +2486,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev) ndev->min_mtu = MAC_MIN_MTU; switch (priv->enet_ver) { case AE_VERSION_2: - ndev->features |= NETIF_F_TSO | NETIF_F_TSO6; + ndev->features |= NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_NTUPLE; ndev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6; From 31212d944a0f3873e6bc6c28d352db7d6493fcac Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:28 +0800 Subject: [PATCH 1312/2608] net: hns: Add mac pcs config when enable|disable mac [ Upstream commit 726ae5c9e5f0c18eca8ea5296b526242c3e89822 ] In some case, when mac enable|disable and adjust link, may cause hard to link(or abnormal) between mac and phy. This patch adds the code for rx PCS to avoid this bug. Disable the rx PCS when driver disable the gmac, and enable the rx PCS when driver enable the mac. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c | 14 ++++++++++---- drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index 8c7bc5cf193c..5e8930d02f50 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -67,11 +67,14 @@ static void hns_gmac_enable(void *mac_drv, enum mac_commom_mode mode) struct mac_driver *drv = (struct mac_driver *)mac_drv; /*enable GE rX/tX */ - if ((mode == MAC_COMM_MODE_TX) || (mode == MAC_COMM_MODE_RX_AND_TX)) + if (mode == MAC_COMM_MODE_TX || mode == MAC_COMM_MODE_RX_AND_TX) dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_TX_EN_B, 1); - if ((mode == MAC_COMM_MODE_RX) || (mode == MAC_COMM_MODE_RX_AND_TX)) + if (mode == MAC_COMM_MODE_RX || mode == MAC_COMM_MODE_RX_AND_TX) { + /* enable rx pcs */ + dsaf_set_dev_bit(drv, GMAC_PCS_RX_EN_REG, 0, 0); dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_RX_EN_B, 1); + } } static void hns_gmac_disable(void *mac_drv, enum mac_commom_mode mode) @@ -79,11 +82,14 @@ static void hns_gmac_disable(void *mac_drv, enum mac_commom_mode mode) struct mac_driver *drv = (struct mac_driver *)mac_drv; /*disable GE rX/tX */ - if ((mode == MAC_COMM_MODE_TX) || (mode == MAC_COMM_MODE_RX_AND_TX)) + if (mode == MAC_COMM_MODE_TX || mode == MAC_COMM_MODE_RX_AND_TX) dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_TX_EN_B, 0); - if ((mode == MAC_COMM_MODE_RX) || (mode == MAC_COMM_MODE_RX_AND_TX)) + if (mode == MAC_COMM_MODE_RX || mode == MAC_COMM_MODE_RX_AND_TX) { + /* disable rx pcs */ + dsaf_set_dev_bit(drv, GMAC_PCS_RX_EN_REG, 0, 1); dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_RX_EN_B, 0); + } } /* hns_gmac_get_en - get port enable diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index 5ae3d017ddbc..ae97b203f73b 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -534,6 +534,7 @@ #define GMAC_LD_LINK_COUNTER_REG 0x01D0UL #define GMAC_LOOP_REG 0x01DCUL #define GMAC_RECV_CONTROL_REG 0x01E0UL +#define GMAC_PCS_RX_EN_REG 0x01E4UL #define GMAC_VLAN_CODE_REG 0x01E8UL #define GMAC_RX_OVERRUN_CNT_REG 0x01ECUL #define GMAC_RX_LENGTHFIELD_ERR_CNT_REG 0x01F4UL From 1eac41bac7c9a578c6bd93d4660fa4c67b4238d9 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:29 +0800 Subject: [PATCH 1313/2608] net: hns: Fix ping failed when use net bridge and send multicast [ Upstream commit 6adafc356e20189193b38ee6b9af7743078bf6b4 ] Create a net bridge, add eth and vnet to the bridge. The vnet is used by a virtual machine. When ping the virtual machine from the outside host and the virtual machine send multicast at the same time, the ping package will lost. The multicast package send to the eth, eth will send it to the bridge too, and the bridge learn the mac of eth. When outside host ping the virtual mechine, it will match the promisc entry of the eth which is not expected, and the bridge send it to eth not to vnet, cause ping lost. So this patch change promisc tcam entry position to the END of 512 tcam entries, which indicate lower priority. And separate one promisc entry to two: mc & uc, to avoid package match the wrong tcam entry. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ethernet/hisilicon/hns/hns_dsaf_main.c | 271 ++++++++++++++---- 1 file changed, 223 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 9086d566a624..51d42d7f6074 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -934,6 +934,62 @@ static void hns_dsaf_tcam_mc_cfg( spin_unlock_bh(&dsaf_dev->tcam_lock); } +/** + * hns_dsaf_tcam_uc_cfg_vague - INT + * @dsaf_dev: dsa fabric device struct pointer + * @address, + * @ptbl_tcam_data, + */ +static void hns_dsaf_tcam_uc_cfg_vague(struct dsaf_device *dsaf_dev, + u32 address, + struct dsaf_tbl_tcam_data *tcam_data, + struct dsaf_tbl_tcam_data *tcam_mask, + struct dsaf_tbl_tcam_ucast_cfg *tcam_uc) +{ + spin_lock_bh(&dsaf_dev->tcam_lock); + hns_dsaf_tbl_tcam_addr_cfg(dsaf_dev, address); + hns_dsaf_tbl_tcam_data_cfg(dsaf_dev, tcam_data); + hns_dsaf_tbl_tcam_ucast_cfg(dsaf_dev, tcam_uc); + hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, tcam_mask); + hns_dsaf_tbl_tcam_data_ucast_pul(dsaf_dev); + + /*Restore Match Data*/ + tcam_mask->tbl_tcam_data_high = 0xffffffff; + tcam_mask->tbl_tcam_data_low = 0xffffffff; + hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, tcam_mask); + + spin_unlock_bh(&dsaf_dev->tcam_lock); +} + +/** + * hns_dsaf_tcam_mc_cfg_vague - INT + * @dsaf_dev: dsa fabric device struct pointer + * @address, + * @ptbl_tcam_data, + * @ptbl_tcam_mask + * @ptbl_tcam_mcast + */ +static void hns_dsaf_tcam_mc_cfg_vague(struct dsaf_device *dsaf_dev, + u32 address, + struct dsaf_tbl_tcam_data *tcam_data, + struct dsaf_tbl_tcam_data *tcam_mask, + struct dsaf_tbl_tcam_mcast_cfg *tcam_mc) +{ + spin_lock_bh(&dsaf_dev->tcam_lock); + hns_dsaf_tbl_tcam_addr_cfg(dsaf_dev, address); + hns_dsaf_tbl_tcam_data_cfg(dsaf_dev, tcam_data); + hns_dsaf_tbl_tcam_mcast_cfg(dsaf_dev, tcam_mc); + hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, tcam_mask); + hns_dsaf_tbl_tcam_data_mcast_pul(dsaf_dev); + + /*Restore Match Data*/ + tcam_mask->tbl_tcam_data_high = 0xffffffff; + tcam_mask->tbl_tcam_data_low = 0xffffffff; + hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, tcam_mask); + + spin_unlock_bh(&dsaf_dev->tcam_lock); +} + /** * hns_dsaf_tcam_mc_invld - INT * @dsaf_id: dsa fabric id @@ -1491,6 +1547,27 @@ static u16 hns_dsaf_find_empty_mac_entry(struct dsaf_device *dsaf_dev) return DSAF_INVALID_ENTRY_IDX; } +/** + * hns_dsaf_find_empty_mac_entry_reverse + * search dsa fabric soft empty-entry from the end + * @dsaf_dev: dsa fabric device struct pointer + */ +static u16 hns_dsaf_find_empty_mac_entry_reverse(struct dsaf_device *dsaf_dev) +{ + struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev); + struct dsaf_drv_soft_mac_tbl *soft_mac_entry; + int i; + + soft_mac_entry = priv->soft_mac_tbl + (DSAF_TCAM_SUM - 1); + for (i = (DSAF_TCAM_SUM - 1); i > 0; i--) { + /* search all entry from end to start.*/ + if (soft_mac_entry->index == DSAF_INVALID_ENTRY_IDX) + return i; + soft_mac_entry--; + } + return DSAF_INVALID_ENTRY_IDX; +} + /** * hns_dsaf_set_mac_key - set mac key * @dsaf_dev: dsa fabric device struct pointer @@ -2666,58 +2743,156 @@ int hns_dsaf_get_regs_count(void) return DSAF_DUMP_REGS_NUM; } +static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port) +{ + struct dsaf_tbl_tcam_ucast_cfg tbl_tcam_ucast = {0, 1, 0, 0, 0x80}; + struct dsaf_tbl_tcam_data tbl_tcam_data_mc = {0x01000000, port}; + struct dsaf_tbl_tcam_data tbl_tcam_mask_uc = {0x01000000, 0xf}; + struct dsaf_tbl_tcam_mcast_cfg tbl_tcam_mcast = {0, 0, {0} }; + struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev); + struct dsaf_tbl_tcam_data tbl_tcam_data_uc = {0, port}; + struct dsaf_drv_mac_single_dest_entry mask_entry; + struct dsaf_drv_tbl_tcam_key temp_key, mask_key; + struct dsaf_drv_soft_mac_tbl *soft_mac_entry; + u16 entry_index = DSAF_INVALID_ENTRY_IDX; + struct dsaf_drv_tbl_tcam_key mac_key; + struct hns_mac_cb *mac_cb; + u8 addr[ETH_ALEN] = {0}; + u8 port_num; + u16 mskid; + + /* promisc use vague table match with vlanid = 0 & macaddr = 0 */ + hns_dsaf_set_mac_key(dsaf_dev, &mac_key, 0x00, port, addr); + entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key); + if (entry_index != DSAF_INVALID_ENTRY_IDX) + return; + + /* put promisc tcam entry in the end. */ + /* 1. set promisc unicast vague tcam entry. */ + entry_index = hns_dsaf_find_empty_mac_entry_reverse(dsaf_dev); + if (entry_index == DSAF_INVALID_ENTRY_IDX) { + dev_err(dsaf_dev->dev, + "enable uc promisc failed (port:%#x)\n", + port); + return; + } + + mac_cb = dsaf_dev->mac_cb[port]; + (void)hns_mac_get_inner_port_num(mac_cb, 0, &port_num); + tbl_tcam_ucast.tbl_ucast_out_port = port_num; + + /* config uc vague table */ + hns_dsaf_tcam_uc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_uc, + &tbl_tcam_mask_uc, &tbl_tcam_ucast); + + /* update software entry */ + soft_mac_entry = priv->soft_mac_tbl; + soft_mac_entry += entry_index; + soft_mac_entry->index = entry_index; + soft_mac_entry->tcam_key.high.val = mac_key.high.val; + soft_mac_entry->tcam_key.low.val = mac_key.low.val; + /* step back to the START for mc. */ + soft_mac_entry = priv->soft_mac_tbl; + + /* 2. set promisc multicast vague tcam entry. */ + entry_index = hns_dsaf_find_empty_mac_entry_reverse(dsaf_dev); + if (entry_index == DSAF_INVALID_ENTRY_IDX) { + dev_err(dsaf_dev->dev, + "enable mc promisc failed (port:%#x)\n", + port); + return; + } + + memset(&mask_entry, 0x0, sizeof(mask_entry)); + memset(&mask_key, 0x0, sizeof(mask_key)); + memset(&temp_key, 0x0, sizeof(temp_key)); + mask_entry.addr[0] = 0x01; + hns_dsaf_set_mac_key(dsaf_dev, &mask_key, mask_entry.in_vlan_id, + port, mask_entry.addr); + tbl_tcam_mcast.tbl_mcast_item_vld = 1; + tbl_tcam_mcast.tbl_mcast_old_en = 0; + + if (port < DSAF_SERVICE_NW_NUM) { + mskid = port; + } else if (port >= DSAF_BASE_INNER_PORT_NUM) { + mskid = port - DSAF_BASE_INNER_PORT_NUM + DSAF_SERVICE_NW_NUM; + } else { + dev_err(dsaf_dev->dev, "%s,pnum(%d)error,key(%#x:%#x)\n", + dsaf_dev->ae_dev.name, port, + mask_key.high.val, mask_key.low.val); + return; + } + + dsaf_set_bit(tbl_tcam_mcast.tbl_mcast_port_msk[mskid / 32], + mskid % 32, 1); + memcpy(&temp_key, &mask_key, sizeof(mask_key)); + hns_dsaf_tcam_mc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_mc, + (struct dsaf_tbl_tcam_data *)(&mask_key), + &tbl_tcam_mcast); + + /* update software entry */ + soft_mac_entry += entry_index; + soft_mac_entry->index = entry_index; + soft_mac_entry->tcam_key.high.val = temp_key.high.val; + soft_mac_entry->tcam_key.low.val = temp_key.low.val; +} + +static void set_promisc_tcam_disable(struct dsaf_device *dsaf_dev, u32 port) +{ + struct dsaf_tbl_tcam_data tbl_tcam_data_mc = {0x01000000, port}; + struct dsaf_tbl_tcam_ucast_cfg tbl_tcam_ucast = {0, 0, 0, 0, 0}; + struct dsaf_tbl_tcam_mcast_cfg tbl_tcam_mcast = {0, 0, {0} }; + struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev); + struct dsaf_tbl_tcam_data tbl_tcam_data_uc = {0, 0}; + struct dsaf_tbl_tcam_data tbl_tcam_mask = {0, 0}; + struct dsaf_drv_soft_mac_tbl *soft_mac_entry; + u16 entry_index = DSAF_INVALID_ENTRY_IDX; + struct dsaf_drv_tbl_tcam_key mac_key; + u8 addr[ETH_ALEN] = {0}; + + /* 1. delete uc vague tcam entry. */ + /* promisc use vague table match with vlanid = 0 & macaddr = 0 */ + hns_dsaf_set_mac_key(dsaf_dev, &mac_key, 0x00, port, addr); + entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key); + + if (entry_index == DSAF_INVALID_ENTRY_IDX) + return; + + /* config uc vague table */ + hns_dsaf_tcam_uc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_uc, + &tbl_tcam_mask, &tbl_tcam_ucast); + /* update soft management table. */ + soft_mac_entry = priv->soft_mac_tbl; + soft_mac_entry += entry_index; + soft_mac_entry->index = DSAF_INVALID_ENTRY_IDX; + /* step back to the START for mc. */ + soft_mac_entry = priv->soft_mac_tbl; + + /* 2. delete mc vague tcam entry. */ + addr[0] = 0x01; + memset(&mac_key, 0x0, sizeof(mac_key)); + hns_dsaf_set_mac_key(dsaf_dev, &mac_key, 0x00, port, addr); + entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key); + + if (entry_index == DSAF_INVALID_ENTRY_IDX) + return; + + /* config mc vague table */ + hns_dsaf_tcam_mc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_mc, + &tbl_tcam_mask, &tbl_tcam_mcast); + /* update soft management table. */ + soft_mac_entry += entry_index; + soft_mac_entry->index = DSAF_INVALID_ENTRY_IDX; +} + /* Reserve the last TCAM entry for promisc support */ -#define dsaf_promisc_tcam_entry(port) \ - (DSAF_TCAM_SUM - DSAFV2_MAC_FUZZY_TCAM_NUM + (port)) void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev, u32 port, bool enable) { - struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev); - struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl; - u16 entry_index; - struct dsaf_drv_tbl_tcam_key tbl_tcam_data, tbl_tcam_mask; - struct dsaf_tbl_tcam_mcast_cfg mac_data = {0}; - - if ((AE_IS_VER1(dsaf_dev->dsaf_ver)) || HNS_DSAF_IS_DEBUG(dsaf_dev)) - return; - - /* find the tcam entry index for promisc */ - entry_index = dsaf_promisc_tcam_entry(port); - - memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data)); - memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask)); - - /* config key mask */ - if (enable) { - dsaf_set_field(tbl_tcam_data.low.bits.port_vlan, - DSAF_TBL_TCAM_KEY_PORT_M, - DSAF_TBL_TCAM_KEY_PORT_S, port); - dsaf_set_field(tbl_tcam_mask.low.bits.port_vlan, - DSAF_TBL_TCAM_KEY_PORT_M, - DSAF_TBL_TCAM_KEY_PORT_S, 0xf); - - /* SUB_QID */ - dsaf_set_bit(mac_data.tbl_mcast_port_msk[0], - DSAF_SERVICE_NW_NUM, true); - mac_data.tbl_mcast_item_vld = true; /* item_vld bit */ - } else { - mac_data.tbl_mcast_item_vld = false; /* item_vld bit */ - } - - dev_dbg(dsaf_dev->dev, - "set_promisc_entry, %s Mac key(%#x:%#x) entry_index%d\n", - dsaf_dev->ae_dev.name, tbl_tcam_data.high.val, - tbl_tcam_data.low.val, entry_index); - - /* config promisc entry with mask */ - hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)&tbl_tcam_data, - (struct dsaf_tbl_tcam_data *)&tbl_tcam_mask, - &mac_data); - - /* config software entry */ - soft_mac_entry += entry_index; - soft_mac_entry->index = enable ? entry_index : DSAF_INVALID_ENTRY_IDX; + if (enable) + set_promisc_tcam_enable(dsaf_dev, port); + else + set_promisc_tcam_disable(dsaf_dev, port); } int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port) From 8265e34e5705c21978f1e3359c430832110057f1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Dec 2018 17:38:51 -0500 Subject: [PATCH 1314/2608] SUNRPC: Fix a race with XPRT_CONNECTING [ Upstream commit cf76785d30712d90185455e752337acdb53d2a5d ] Ensure that we clear XPRT_CONNECTING before releasing the XPRT_LOCK so that we don't have races between the (asynchronous) socket setup code and tasks in xprt_connect(). Signed-off-by: Trond Myklebust Tested-by: Chuck Lever Signed-off-by: Sasha Levin --- net/sunrpc/xprtsock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 30192abfdc3b..05a58cc1b0cd 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2223,8 +2223,8 @@ static void xs_udp_setup_socket(struct work_struct *work) trace_rpc_socket_connect(xprt, sock, 0); status = 0; out: - xprt_unlock_connect(xprt, transport); xprt_clear_connecting(xprt); + xprt_unlock_connect(xprt, transport); xprt_wake_pending_tasks(xprt, status); } @@ -2451,8 +2451,8 @@ static void xs_tcp_setup_socket(struct work_struct *work) } status = -EAGAIN; out: - xprt_unlock_connect(xprt, transport); xprt_clear_connecting(xprt); + xprt_unlock_connect(xprt, transport); xprt_wake_pending_tasks(xprt, status); } From 6de29b8cd448490a6ba514d9f6b334341cf99eff Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 17 Dec 2018 10:05:13 +0300 Subject: [PATCH 1315/2608] qed: Fix an error code qed_ll2_start_xmit() [ Upstream commit f07d4276892d97671e880190ff195a288b2d8d92 ] We accidentally deleted the code to set "rc = -ENOMEM;" and this patch adds it back. Fixes: d2201a21598a ("qed: No need for LL2 frags indication") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 5f52f14761a3..b73bcbeb5f27 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -2351,6 +2351,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb) if (unlikely(dma_mapping_error(&cdev->pdev->dev, mapping))) { DP_NOTICE(cdev, "Unable to map frag - dropping packet\n"); + rc = -ENOMEM; goto err; } From 358e56b282ecfd3ca9ff6521ee6e35ef34945bcd Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Mon, 17 Dec 2018 15:05:39 +0200 Subject: [PATCH 1316/2608] net: macb: fix random memory corruption on RX with 64-bit DMA [ Upstream commit e100a897bf9b19089e57f236f2398c9e0538900e ] 64-bit DMA addresses are split in upper and lower halves that are written in separate fields on GEM. For RX, bit 0 of the address is used as the ownership bit (RX_USED). When the RX_USED bit is unset the controller is allowed to write data to the buffer. The driver does not guarantee that the controller already sees the upper half when the RX_USED bit is cleared, possibly resulting in the controller writing an incoming frame to an address with an incorrect upper half and therefore possibly corrupting unrelated system memory. Fix that by adding the necessary DMA memory barrier between the writes. This corruption was observed on a ZynqMP based system. Fixes: fff8019a08b6 ("net: macb: Add 64 bit addressing support for GEM") Signed-off-by: Anssi Hannula Acked-by: Harini Katakam Tested-by: Claudiu Beznea Cc: Nicolas Ferre Cc: Michal Simek Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/cadence/macb_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 0b2f9ddfb1c4..3aabc36eec0b 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -675,6 +675,11 @@ static void macb_set_addr(struct macb *bp, struct macb_dma_desc *desc, dma_addr_ if (bp->hw_dma_cap & HW_DMA_CAP_64B) { desc_64 = macb_64b_desc(bp, desc); desc_64->addrh = upper_32_bits(addr); + /* The low bits of RX address contain the RX_USED bit, clearing + * of which allows packet RX. Make sure the high bits are also + * visible to HW at that point. + */ + dma_wmb(); } #endif desc->addr = lower_32_bits(addr); From 4d45ed2d0d28326b9fb7401d96f7c772ee0ad078 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Mon, 17 Dec 2018 15:05:40 +0200 Subject: [PATCH 1317/2608] net: macb: fix dropped RX frames due to a race [ Upstream commit 8159ecab0db9095902d4c73605fb8787f5c7d653 ] Bit RX_USED set to 0 in the address field allows the controller to write data to the receive buffer descriptor. The driver does not ensure the ctrl field is ready (cleared) when the controller sees the RX_USED=0 written by the driver. The ctrl field might only be cleared after the controller has already updated it according to a newly received frame, causing the frame to be discarded in gem_rx() due to unexpected ctrl field contents. A message is logged when the above scenario occurs: macb ff0b0000.ethernet eth0: not whole frame pointed by descriptor Fix the issue by ensuring that when the controller sees RX_USED=0 the ctrl field is already cleared. This issue was observed on a ZynqMP based system. Fixes: 4df95131ea80 ("net/macb: change RX path for GEM") Signed-off-by: Anssi Hannula Tested-by: Claudiu Beznea Cc: Nicolas Ferre Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/cadence/macb_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 3aabc36eec0b..9046993947cc 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -923,14 +923,19 @@ static void gem_rx_refill(struct macb *bp) if (entry == bp->rx_ring_size - 1) paddr |= MACB_BIT(RX_WRAP); - macb_set_addr(bp, desc, paddr); desc->ctrl = 0; + /* Setting addr clears RX_USED and allows reception, + * make sure ctrl is cleared first to avoid a race. + */ + dma_wmb(); + macb_set_addr(bp, desc, paddr); /* properly align Ethernet header */ skb_reserve(skb, NET_IP_ALIGN); } else { - desc->addr &= ~MACB_BIT(RX_USED); desc->ctrl = 0; + dma_wmb(); + desc->addr &= ~MACB_BIT(RX_USED); } } From 9c5239ee1c3f59e1ece9072cae57af8b601e0b99 Mon Sep 17 00:00:00 2001 From: Jason Martinsen Date: Tue, 18 Dec 2018 05:38:22 +0000 Subject: [PATCH 1318/2608] lan78xx: Resolve issue with changing MAC address [ Upstream commit 15515aaaa69659c502003926a2067ee76176148a ] Current state for the lan78xx driver does not allow for changing the MAC address of the interface, without either removing the module (if you compiled it that way) or rebooting the machine. If you attempt to change the MAC address, ifconfig will show the new address, however, the system/interface will not respond to any traffic using that configuration. A few short-term options to work around this are to unload the module and reload it with the new MAC address, change the interface to "promisc", or reboot with the correct configuration to change the MAC. This patch enables the ability to change the MAC address via fairly normal means... ifdown modify entry in /etc/network/interfaces OR a similar method ifup Then test via any network communication, such as ICMP requests to gateway. My only test platform for this patch has been a raspberry pi model 3b+. Signed-off-by: Jason Martinsen ----- Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index e069b310d6a6..b62c41114e34 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2212,6 +2212,10 @@ static int lan78xx_set_mac_addr(struct net_device *netdev, void *p) ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); + /* Added to support MAC address changes */ + ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); + ret = lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); + return 0; } From fb34793c9f380b99c9ce925ff2d4e6b7f5af32c8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 18 Dec 2018 15:19:47 +0000 Subject: [PATCH 1319/2608] vxge: ensure data0 is initialized in when fetching firmware version information [ Upstream commit f7db2beb4c2c6cc8111f5ab90fc7363ca91107b6 ] Currently variable data0 is not being initialized so a garbage value is being passed to vxge_hw_vpath_fw_api and this value is being written to the rts_access_steer_data0 register. There are other occurrances where data0 is being initialized to zero (e.g. in function vxge_hw_upgrade_read_version) so I think it makes sense to ensure data0 is initialized likewise to 0. Detected by CoverityScan, CID#140696 ("Uninitialized scalar variable") Fixes: 8424e00dfd52 ("vxge: serialize access to steering control register") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/neterion/vxge/vxge-config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c index 6223930a8155..6f57b0b7d57a 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-config.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c @@ -808,7 +808,7 @@ __vxge_hw_vpath_fw_ver_get(struct __vxge_hw_virtualpath *vpath, struct vxge_hw_device_date *fw_date = &hw_info->fw_date; struct vxge_hw_device_version *flash_version = &hw_info->flash_version; struct vxge_hw_device_date *flash_date = &hw_info->flash_date; - u64 data0, data1 = 0, steer_ctrl = 0; + u64 data0 = 0, data1 = 0, steer_ctrl = 0; enum vxge_hw_status status; status = vxge_hw_vpath_fw_api(vpath, From bb2509c12bea9601f0637f815936e9562d3c3cbc Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sat, 15 Dec 2018 11:03:06 +0200 Subject: [PATCH 1320/2608] mac80211: free skb fraglist before freeing the skb [ Upstream commit 34b1e0e9efe101822e83cc62d22443ed3867ae7a ] mac80211 uses the frag list to build AMSDU. When freeing the skb, it may not be really freed, since someone is still holding a reference to it. In that case, when TCP skb is being retransmitted, the pointer to the frag list is being reused, while the data in there is no longer valid. Since we will never get frag list from the network stack, as mac80211 doesn't advertise the capability, we can safely free and nullify it before releasing the SKB. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/status.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 35912270087c..b18466cf466c 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -545,6 +545,11 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, } ieee80211_led_tx(local); + + if (skb_has_frag_list(skb)) { + kfree_skb_list(skb_shinfo(skb)->frag_list); + skb_shinfo(skb)->frag_list = NULL; + } } /* From b08f331e84f75a2981350d3ee7452ea67412776d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 18 Dec 2018 14:25:41 +0900 Subject: [PATCH 1321/2608] kbuild: fix false positive warning/error about missing libelf [ Upstream commit ef7cfd00b2caf6edeb7f169682b64be2d0a798cf ] For the same reason as commit 25896d073d8a ("x86/build: Fix compiler support check for CONFIG_RETPOLINE"), you cannot put this $(error ...) into the parse stage of the top Makefile. Perhaps I'd propose a more sophisticated solution later, but this is the best I can do for now. Link: https://lkml.org/lkml/2017/12/25/211 Reported-by: Paul Gortmaker Reported-by: Bernd Edlinger Reported-by: Qian Cai Cc: Josh Poimboeuf Signed-off-by: Masahiro Yamada Tested-by: Qian Cai Signed-off-by: Sasha Levin --- Makefile | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index be4d1f25cb29..2de4072bcc90 100644 --- a/Makefile +++ b/Makefile @@ -954,11 +954,6 @@ ifdef CONFIG_STACK_VALIDATION ifeq ($(has_libelf),1) objtool_target := tools/objtool FORCE else - ifdef CONFIG_UNWINDER_ORC - $(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") - else - $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") - endif SKIP_STACK_VALIDATION := 1 export SKIP_STACK_VALIDATION endif @@ -1102,6 +1097,14 @@ uapi-asm-generic: PHONY += prepare-objtool prepare-objtool: $(objtool_target) +ifeq ($(SKIP_STACK_VALIDATION),1) +ifdef CONFIG_UNWINDER_ORC + @echo "error: Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel" >&2 + @false +else + @echo "warning: Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel" >&2 +endif +endif # Check for CONFIG flags that require compiler support. Abort the build # after .config has been processed, but before the kernel build starts. From 9adf9d714b3dd94fddf583eb8e8f12efdfa797e4 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 19 Dec 2018 18:21:51 -0500 Subject: [PATCH 1322/2608] virtio: fix test build after uio.h change [ Upstream commit c5c08bed843c2b2c048c16d1296d7631d7c1620e ] Fixes: d38499530e5 ("fs: decouple READ and WRITE from the block layer ops") Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- tools/virtio/linux/kernel.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 395521a7a8d8..268ce239de65 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -23,6 +23,10 @@ #define PAGE_MASK (~(PAGE_SIZE-1)) #define PAGE_ALIGN(x) ((x + PAGE_SIZE - 1) & PAGE_MASK) +/* generic data direction definitions */ +#define READ 0 +#define WRITE 1 + typedef unsigned long long phys_addr_t; typedef unsigned long long dma_addr_t; typedef size_t __kernel_size_t; From 7bee9f9a131059a16538bb8e86ba567fc39ef733 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 17 Dec 2018 09:43:13 +0100 Subject: [PATCH 1323/2608] gpio: mvebu: only fail on missing clk if pwm is actually to be used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c8da642d41a6811c21177c9994aa7dc35be67d46 ] The gpio IP on Armada 370 at offset 0x18180 has neither a clk nor pwm registers. So there is no need for a clk as the pwm isn't used anyhow. So only check for the clk in the presence of the pwm registers. This fixes a failure to probe the gpio driver for the above mentioned gpio device. Fixes: 757642f9a584 ("gpio: mvebu: Add limited PWM support") Signed-off-by: Uwe Kleine-König Reviewed-by: Gregory CLEMENT Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-mvebu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 45c65f805fd6..be85d4b39e99 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -777,9 +777,6 @@ static int mvebu_pwm_probe(struct platform_device *pdev, "marvell,armada-370-gpio")) return 0; - if (IS_ERR(mvchip->clk)) - return PTR_ERR(mvchip->clk); - /* * There are only two sets of PWM configuration registers for * all the GPIO lines on those SoCs which this driver reserves @@ -790,6 +787,9 @@ static int mvebu_pwm_probe(struct platform_device *pdev, if (!res) return 0; + if (IS_ERR(mvchip->clk)) + return PTR_ERR(mvchip->clk); + /* * Use set A for lines of GPIO chip with id 0, B for GPIO chip * with id 1. Don't allow further GPIO chips to be used for PWM. From dd707bdce260c0b9adf99e9a3a77a21ca3ead2eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mantas=20Mikul=C4=97nas?= Date: Fri, 21 Dec 2018 01:04:26 -0800 Subject: [PATCH 1324/2608] Input: synaptics - enable SMBus for HP EliteBook 840 G4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7a71712293ba303aad928f580b89addb0be2892e ] dmesg reports that "Your touchpad (PNP: SYN3052 SYN0100 SYN0002 PNP0f13) says it can support a different bus." I've tested the offered psmouse.synaptics_intertouch=1 with 4.18.x and 4.19.x and it seems to work well. No problems seen with suspend/resume. Also, it appears that RMI/SMBus mode is actually required for 3-4 finger multitouch gestures to work -- otherwise they are not reported at all. Information from dmesg in both modes: psmouse serio3: synaptics: Touchpad model: 1, fw: 8.2, id: 0x1e2b1, caps: 0xf00123/0x840300/0x2e800/0x0, board id: 3139, fw id: 2000742 psmouse serio3: synaptics: Trying to set up SMBus access rmi4_smbus 6-002c: registering SMbus-connected sensor rmi4_f01 rmi4-00.fn01: found RMI device, manufacturer: Synaptics, product: TM3139-001, fw id: 2000742 Signed-off-by: Mantas Mikulėnas Reviewed-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 515a0b0d48bf..e9ec5d10e0a9 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -179,6 +179,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0096", /* X280 */ "LEN0097", /* X280 -> ALPS trackpoint */ "LEN200f", /* T450s */ + "SYN3052", /* HP EliteBook 840 G4 */ "SYN3221", /* HP 15-ay000 */ NULL }; From 29b7def0ffb1c9acc043a5ca4b9fb212c2d0687d Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 21 Dec 2018 00:22:32 -0600 Subject: [PATCH 1325/2608] net: netxen: fix a missing check and an uninitialized use [ Upstream commit d134e486e831defd26130770181f01dfc6195f7d ] When netxen_rom_fast_read() fails, "bios" is left uninitialized and may contain random value, thus should not be used. The fix ensures that if netxen_rom_fast_read() fails, we return "-EIO". Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c index 3dd973475125..4b444351ab7d 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c @@ -1125,7 +1125,8 @@ netxen_validate_firmware(struct netxen_adapter *adapter) return -EINVAL; } val = nx_get_bios_version(adapter); - netxen_rom_fast_read(adapter, NX_BIOS_VERSION_OFFSET, (int *)&bios); + if (netxen_rom_fast_read(adapter, NX_BIOS_VERSION_OFFSET, (int *)&bios)) + return -EIO; if ((__force u32)val != bios) { dev_err(&pdev->dev, "%s: firmware bios is incompatible\n", fw_name[fw_type]); From be899fe44fe522bc3f573cee3b381c7c733d5961 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 21 Dec 2018 13:07:23 +0100 Subject: [PATCH 1326/2608] qmi_wwan: Fix qmap header retrieval in qmimux_rx_fixup [ Upstream commit d667044f49513d55fcfefe4fa8f8d96091782901 ] This patch fixes qmap header retrieval when modem is configured for dl data aggregation. Signed-off-by: Daniele Palmas Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/qmi_wwan.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 969474c9d297..891f8f975b43 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -151,17 +151,18 @@ static bool qmimux_has_slaves(struct usbnet *dev) static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { - unsigned int len, offset = sizeof(struct qmimux_hdr); + unsigned int len, offset = 0; struct qmimux_hdr *hdr; struct net_device *net; struct sk_buff *skbn; + u8 qmimux_hdr_sz = sizeof(*hdr); - while (offset < skb->len) { - hdr = (struct qmimux_hdr *)skb->data; + while (offset + qmimux_hdr_sz < skb->len) { + hdr = (struct qmimux_hdr *)(skb->data + offset); len = be16_to_cpu(hdr->pkt_len); /* drop the packet, bogus length */ - if (offset + len > skb->len) + if (offset + len + qmimux_hdr_sz > skb->len) return 0; /* control packet, we do not know what to do */ @@ -176,7 +177,7 @@ static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb) return 0; skbn->dev = net; - switch (skb->data[offset] & 0xf0) { + switch (skb->data[offset + qmimux_hdr_sz] & 0xf0) { case 0x40: skbn->protocol = htons(ETH_P_IP); break; @@ -188,12 +189,12 @@ static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb) goto skip; } - skb_put_data(skbn, skb->data + offset, len); + skb_put_data(skbn, skb->data + offset + qmimux_hdr_sz, len); if (netif_rx(skbn) != NET_RX_SUCCESS) return 0; skip: - offset += len + sizeof(struct qmimux_hdr); + offset += len + qmimux_hdr_sz; } return 1; } From d87abc2ae5474bd4b42e892ee7369086d2d6af6b Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Dec 2018 11:01:45 -0500 Subject: [PATCH 1327/2608] serial/sunsu: fix refcount leak [ Upstream commit d430aff8cd0c57502d873909c184e3b5753f8b88 ] The function of_find_node_by_path() acquires a reference to the node returned by it and that reference needs to be dropped by its caller. su_get_type() doesn't do that. The match node are used as an identifier to compare against the current node, so we can directly drop the refcount after getting the node from the path as it is not used as pointer. Fix this by use a single variable and drop the refcount right after of_find_node_by_path(). Signed-off-by: Yangtao Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/tty/serial/sunsu.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/sunsu.c b/drivers/tty/serial/sunsu.c index 95d34d7565c9..1cf78cec7461 100644 --- a/drivers/tty/serial/sunsu.c +++ b/drivers/tty/serial/sunsu.c @@ -1393,22 +1393,43 @@ static inline struct console *SUNSU_CONSOLE(void) static enum su_type su_get_type(struct device_node *dp) { struct device_node *ap = of_find_node_by_path("/aliases"); + enum su_type rc = SU_PORT_PORT; if (ap) { const char *keyb = of_get_property(ap, "keyboard", NULL); const char *ms = of_get_property(ap, "mouse", NULL); + struct device_node *match; if (keyb) { - if (dp == of_find_node_by_path(keyb)) - return SU_PORT_KBD; + match = of_find_node_by_path(keyb); + + /* + * The pointer is used as an identifier not + * as a pointer, we can drop the refcount on + * the of__node immediately after getting it. + */ + of_node_put(match); + + if (dp == match) { + rc = SU_PORT_KBD; + goto out; + } } if (ms) { - if (dp == of_find_node_by_path(ms)) - return SU_PORT_MS; + match = of_find_node_by_path(ms); + + of_node_put(match); + + if (dp == match) { + rc = SU_PORT_MS; + goto out; + } } } - return SU_PORT_PORT; +out: + of_node_put(ap); + return rc; } static int su_probe(struct platform_device *op) From 8eb9f245dc977335fbff8ad5f428447a55f24484 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Thu, 6 Dec 2018 17:31:20 +0100 Subject: [PATCH 1328/2608] scsi: zfcp: fix posting too many status read buffers leading to adapter shutdown commit 60a161b7e5b2a252ff0d4c622266a7d8da1120ce upstream. Suppose adapter (open) recovery is between opened QDIO queues and before (the end of) initial posting of status read buffers (SRBs). This time window can be seconds long due to FSF_PROT_HOST_CONNECTION_INITIALIZING causing by design looping with exponential increase sleeps in the function performing exchange config data during recovery [zfcp_erp_adapter_strat_fsf_xconf()]. Recovery triggered by local link up. Suppose an event occurs for which the FCP channel would send an unsolicited notification to zfcp by means of a previously posted SRB. We saw it with local cable pull (link down) in multi-initiator zoning with multiple NPIV-enabled subchannels of the same shared FCP channel. As soon as zfcp_erp_adapter_strategy_open_fsf() starts posting the initial status read buffers from within the adapter's ERP thread, the channel does send an unsolicited notification. Since v2.6.27 commit d26ab06ede83 ("[SCSI] zfcp: receiving an unsolicted status can lead to I/O stall"), zfcp_fsf_status_read_handler() schedules adapter->stat_work to re-fill the just consumed SRB from a work item. Now the ERP thread and the work item post SRBs in parallel. Both contexts call the helper function zfcp_status_read_refill(). The tracking of missing (to be posted / re-filled) SRBs is not thread-safe due to separate atomic_read() and atomic_dec(), in order to depend on posting success. Hence, both contexts can see atomic_read(&adapter->stat_miss) == 1. One of the two contexts posts one too many SRB. Zfcp gets QDIO_ERROR_SLSB_STATE on the output queue (trace tag "qdireq1") leading to zfcp_erp_adapter_shutdown() in zfcp_qdio_handler_error(). An obvious and seemingly clean fix would be to schedule stat_work from the ERP thread and wait for it to finish. This would serialize all SRB re-fills. However, we already have another work item wait on the ERP thread: adapter->scan_work runs zfcp_fc_scan_ports() which calls zfcp_fc_eval_gpn_ft(). The latter calls zfcp_erp_wait() to wait for all the open port recoveries during zfcp auto port scan, but in fact it waits for any pending recovery including an adapter recovery. This approach leads to a deadlock. [see also v3.19 commit 18f87a67e6d6 ("zfcp: auto port scan resiliency"); v2.6.37 commit d3e1088d6873 ("[SCSI] zfcp: No ERP escalation on gpn_ft eval"); v2.6.28 commit fca55b6fb587 ("[SCSI] zfcp: fix deadlock between wq triggered port scan and ERP") fixing v2.6.27 commit c57a39a45a76 ("[SCSI] zfcp: wait until adapter is finished with ERP during auto-port"); v2.6.27 commit cc8c282963bd ("[SCSI] zfcp: Automatically attach remote ports")] Instead make the accounting of missing SRBs atomic for parallel execution in both the ERP thread and adapter->stat_work. Signed-off-by: Steffen Maier Fixes: d26ab06ede83 ("[SCSI] zfcp: receiving an unsolicted status can lead to I/O stall") Cc: #2.6.27+ Reviewed-by: Jens Remus Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_aux.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index 84752152d41f..fab02bd73d85 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -274,16 +274,16 @@ static void zfcp_free_low_mem_buffers(struct zfcp_adapter *adapter) */ int zfcp_status_read_refill(struct zfcp_adapter *adapter) { - while (atomic_read(&adapter->stat_miss) > 0) + while (atomic_add_unless(&adapter->stat_miss, -1, 0)) if (zfcp_fsf_status_read(adapter->qdio)) { + atomic_inc(&adapter->stat_miss); /* undo add -1 */ if (atomic_read(&adapter->stat_miss) >= adapter->stat_read_buf_num) { zfcp_erp_adapter_reopen(adapter, 0, "axsref1"); return 1; } break; - } else - atomic_dec(&adapter->stat_miss); + } return 0; } From 0840feb396aa78cdcac7f9b30f7678b68e22fddb Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Thu, 13 Dec 2018 15:25:16 -0500 Subject: [PATCH 1329/2608] scsi: lpfc: do not set queue->page_count to 0 if pc_sli4_params.wqpcnt is invalid commit 4e87eb2f46ea547d12a276b2e696ab934d16cfb6 upstream. Certain older adapters such as the OneConnect OCe10100 may not have a valid wqpcnt value. In this case, do not set queue->page_count to 0 in lpfc_sli4_queue_alloc() as this will prevent the driver from initializing. Fixes: 895427bd01 ("scsi: lpfc: NVME Initiator: Base modifications") Cc: stable@vger.kernel.org # 4.11+ Signed-off-by: Ewan D. Milne Reviewed-by: Laurence Oberman Tested-by: Laurence Oberman Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_sli.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 24b6e56f6e97..6c2b098b7609 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -13941,7 +13941,8 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t entry_size, hw_page_size))/hw_page_size; /* If needed, Adjust page count to match the max the adapter supports */ - if (queue->page_count > phba->sli4_hba.pc_sli4_params.wqpcnt) + if (phba->sli4_hba.pc_sli4_params.wqpcnt && + (queue->page_count > phba->sli4_hba.pc_sli4_params.wqpcnt)) queue->page_count = phba->sli4_hba.pc_sli4_params.wqpcnt; INIT_LIST_HEAD(&queue->list); From 76369ed5bce3b31de6c21ebf216d01fd9c9d3476 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Apr 2018 12:40:07 +0200 Subject: [PATCH 1330/2608] genirq/affinity: Don't return with empty affinity masks on error commit 0211e12dd0a5385ecffd3557bc570dbad7fcf245 upstream. When the allocation of node_to_possible_cpumask fails, then irq_create_affinity_masks() returns with a pointer to the empty affinity masks array, which will cause malfunction. Reorder the allocations so the masks array allocation comes last and every failure path returns NULL. Fixes: 9a0ef98e186d ("genirq/affinity: Assign vectors to all present CPUs") Signed-off-by: Thomas Gleixner Cc: Christoph Hellwig Cc: Ming Lei Cc: Mihai Carabas Signed-off-by: Greg Kroah-Hartman --- kernel/irq/affinity.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index a37a3b4b6342..e0665549af59 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -108,7 +108,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) int affv = nvecs - affd->pre_vectors - affd->post_vectors; int last_affv = affv + affd->pre_vectors; nodemask_t nodemsk = NODE_MASK_NONE; - struct cpumask *masks; + struct cpumask *masks = NULL; cpumask_var_t nmsk, *node_to_possible_cpumask; /* @@ -121,13 +121,13 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) return NULL; - masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); - if (!masks) - goto out; - node_to_possible_cpumask = alloc_node_to_possible_cpumask(); if (!node_to_possible_cpumask) - goto out; + goto outcpumsk; + + masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); + if (!masks) + goto outnodemsk; /* Fill out vectors at the beginning that don't need affinity */ for (curvec = 0; curvec < affd->pre_vectors; curvec++) @@ -192,8 +192,9 @@ done: /* Fill out vectors at the end that don't need affinity */ for (; curvec < nvecs; curvec++) cpumask_copy(masks + curvec, irq_default_affinity); +outnodemsk: free_node_to_possible_cpumask(node_to_possible_cpumask); -out: +outcpumsk: free_cpumask_var(nmsk); return masks; } From 5ee254ef76e8c5f052487a5c3e0cd5fcfa805ec7 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Wed, 21 Feb 2018 14:45:12 -0800 Subject: [PATCH 1331/2608] tools: fix cross-compile var clobbering commit 7ed1c1901fe52e6c5828deb155920b44b0adabb1 upstream. Currently a number of Makefiles break when used with toolchains that pass extra flags in CC and other cross-compile related variables (such as --sysroot). Thus we get this error when we use a toolchain that puts --sysroot in the CC var: ~/src/linux/tools$ make iio [snip] iio_event_monitor.c:18:10: fatal error: unistd.h: No such file or directory #include ^~~~~~~~~~ This occurs because we clobber several env vars related to cross-compiling with lines like this: CC = $(CROSS_COMPILE)gcc Although this will point to a valid cross-compiler, we lose any extra flags that might exist in the CC variable, which can break toolchains that rely on them (for example, those that use --sysroot). This easily shows up using a Yocto SDK: $ . [snip]/sdk/environment-setup-cortexa8hf-neon-poky-linux-gnueabi $ echo $CC arm-poky-linux-gnueabi-gcc -march=armv7-a -mfpu=neon -mfloat-abi=hard -mcpu=cortex-a8 --sysroot=[snip]/sdk/sysroots/cortexa8hf-neon-poky-linux-gnueabi $ echo $CROSS_COMPILE arm-poky-linux-gnueabi- $ echo ${CROSS_COMPILE}gcc krm-poky-linux-gnueabi-gcc Although arm-poky-linux-gnueabi-gcc is a cross-compiler, we've lost the --sysroot and other flags that enable us to find the right libraries to link against, so we can't find unistd.h and other libraries and headers. Normally with the --sysroot flag we would find unistd.h in the sdk directory in the sysroot: $ find [snip]/sdk/sysroots -path '*/usr/include/unistd.h' [snip]/sdk/sysroots/cortexa8hf-neon-poky-linux-gnueabi/usr/include/unistd.h The perf Makefile adds CC = $(CROSS_COMPILE)gcc if and only if CC is not already set, and it compiles correctly with the above toolchain. So, generalize the logic that perf uses in the common Makefile and remove the manual CC = $(CROSS_COMPILE)gcc lines from each Makefile. Note that this patch does not fix cross-compile for all the tools (some have other bugs), but it does fix it for all except usb and acpi, which still have other unrelated issues. I tested both with and without the patch on native and cross-build and there appear to be no regressions. Link: http://lkml.kernel.org/r/20180107214028.23771-1-martin@martingkelly.com Signed-off-by: Martin Kelly Acked-by: Mark Brown Cc: Tejun Heo Cc: Li Zefan Cc: Johannes Weiner Cc: Linus Walleij Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Jonathan Cameron Cc: Pali Rohar Cc: Richard Purdie Cc: Jacek Anaszewski Cc: Pavel Machek Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Robert Moore Cc: Lv Zheng Cc: "Rafael J. Wysocki" Cc: Greg Kroah-Hartman Cc: Valentina Manea Cc: Shuah Khan Cc: Mario Limonciello Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Ignat Korchagin Signed-off-by: Greg Kroah-Hartman --- tools/cgroup/Makefile | 1 - tools/gpio/Makefile | 2 -- tools/hv/Makefile | 1 - tools/iio/Makefile | 2 -- tools/laptop/freefall/Makefile | 1 - tools/leds/Makefile | 1 - tools/perf/Makefile.perf | 6 ------ tools/power/acpi/Makefile.config | 3 --- tools/scripts/Makefile.include | 18 ++++++++++++++++++ tools/spi/Makefile | 2 -- tools/usb/Makefile | 1 - tools/vm/Makefile | 1 - 12 files changed, 18 insertions(+), 21 deletions(-) diff --git a/tools/cgroup/Makefile b/tools/cgroup/Makefile index 860fa151640a..ffca068e4a76 100644 --- a/tools/cgroup/Makefile +++ b/tools/cgroup/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for cgroup tools -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -Wextra all: cgroup_event_listener diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile index 805a2c0cf4cd..240eda014b37 100644 --- a/tools/gpio/Makefile +++ b/tools/gpio/Makefile @@ -12,8 +12,6 @@ endif # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r -CC = $(CROSS_COMPILE)gcc -LD = $(CROSS_COMPILE)ld CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon diff --git a/tools/hv/Makefile b/tools/hv/Makefile index 31503819454d..68c2d7b059b3 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for Hyper-V tools -CC = $(CROSS_COMPILE)gcc WARNINGS = -Wall -Wextra CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS) diff --git a/tools/iio/Makefile b/tools/iio/Makefile index a08e7a47d6a3..332ed2f6c2c2 100644 --- a/tools/iio/Makefile +++ b/tools/iio/Makefile @@ -12,8 +12,6 @@ endif # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r -CC = $(CROSS_COMPILE)gcc -LD = $(CROSS_COMPILE)ld CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include ALL_TARGETS := iio_event_monitor lsiio iio_generic_buffer diff --git a/tools/laptop/freefall/Makefile b/tools/laptop/freefall/Makefile index 5f758c489a20..b572d94255f6 100644 --- a/tools/laptop/freefall/Makefile +++ b/tools/laptop/freefall/Makefile @@ -2,7 +2,6 @@ PREFIX ?= /usr SBINDIR ?= sbin INSTALL ?= install -CC = $(CROSS_COMPILE)gcc TARGET = freefall diff --git a/tools/leds/Makefile b/tools/leds/Makefile index c379af003807..7b6bed13daaa 100644 --- a/tools/leds/Makefile +++ b/tools/leds/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for LEDs tools -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -Wextra -g -I../../include/uapi all: uledmon led_hw_brightness_mon diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 2a858ea56a81..349ea5133d83 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -144,12 +144,6 @@ define allow-override $(eval $(1) = $(2))) endef -# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix. -$(call allow-override,CC,$(CROSS_COMPILE)gcc) -$(call allow-override,AR,$(CROSS_COMPILE)ar) -$(call allow-override,LD,$(CROSS_COMPILE)ld) -$(call allow-override,CXX,$(CROSS_COMPILE)g++) - LD += $(EXTRA_LDFLAGS) HOSTCC ?= gcc diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index a1883bbb0144..2cccbba64418 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -56,9 +56,6 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM} # to compile vs uClibc, that can be done here as well. CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- CROSS_COMPILE ?= $(CROSS) -CC = $(CROSS_COMPILE)gcc -LD = $(CROSS_COMPILE)gcc -STRIP = $(CROSS_COMPILE)strip HOSTCC = gcc # check if compiler option is supported diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 5f3f1f44ed0a..71dc7efc7efa 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -42,6 +42,24 @@ EXTRA_WARNINGS += -Wformat CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?) +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting various cross-compile vars or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,LD,$(CROSS_COMPILE)ld) +$(call allow-override,CXX,$(CROSS_COMPILE)g++) +$(call allow-override,STRIP,$(CROSS_COMPILE)strip) + ifeq ($(CC_NO_CLANG), 1) EXTRA_WARNINGS += -Wstrict-aliasing=3 endif diff --git a/tools/spi/Makefile b/tools/spi/Makefile index 90615e10c79a..815d15589177 100644 --- a/tools/spi/Makefile +++ b/tools/spi/Makefile @@ -11,8 +11,6 @@ endif # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r -CC = $(CROSS_COMPILE)gcc -LD = $(CROSS_COMPILE)ld CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include ALL_TARGETS := spidev_test spidev_fdx diff --git a/tools/usb/Makefile b/tools/usb/Makefile index 4e6506078494..01d758d73b6d 100644 --- a/tools/usb/Makefile +++ b/tools/usb/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for USB tools -CC = $(CROSS_COMPILE)gcc PTHREAD_LIBS = -lpthread WARNINGS = -Wall -Wextra CFLAGS = $(WARNINGS) -g -I../include diff --git a/tools/vm/Makefile b/tools/vm/Makefile index be320b905ea7..20f6cf04377f 100644 --- a/tools/vm/Makefile +++ b/tools/vm/Makefile @@ -6,7 +6,6 @@ TARGETS=page-types slabinfo page_owner_sort LIB_DIR = ../lib/api LIBS = $(LIB_DIR)/libapi.a -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -Wextra -I../lib/ LDFLAGS = $(LIBS) From 3f2e4e1d9a6cffa95d31b7a491243d5e92a82507 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Tue, 8 Jan 2019 13:58:52 +0100 Subject: [PATCH 1332/2608] fork: record start_time late commit 7b55851367136b1efd84d98fea81ba57a98304cf upstream. This changes the fork(2) syscall to record the process start_time after initializing the basic task structure but still before making the new process visible to user-space. Technically, we could record the start_time anytime during fork(2). But this might lead to scenarios where a start_time is recorded long before a process becomes visible to user-space. For instance, with userfaultfd(2) and TLS, user-space can delay the execution of fork(2) for an indefinite amount of time (and will, if this causes network access, or similar). By recording the start_time late, it much closer reflects the point in time where the process becomes live and can be observed by other processes. Lastly, this makes it much harder for user-space to predict and control the start_time they get assigned. Previously, user-space could fork a process and stall it in copy_thread_tls() before its pid is allocated, but after its start_time is recorded. This can be misused to later-on cycle through PIDs and resume the stalled fork(2) yielding a process that has the same pid and start_time as a process that existed before. This can be used to circumvent security systems that identify processes by their pid+start_time combination. Even though user-space was always aware that start_time recording is flaky (but several projects are known to still rely on start_time-based identification), changing the start_time to be recorded late will help mitigate existing attacks and make it much harder for user-space to control the start_time a process gets assigned. Reported-by: Jann Horn Signed-off-by: Tom Gundersen Signed-off-by: David Herrmann Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/fork.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 6a219fea4926..6d6ce2c3a364 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1672,8 +1672,6 @@ static __latent_entropy struct task_struct *copy_process( posix_cpu_timers_init(p); - p->start_time = ktime_get_ns(); - p->real_start_time = ktime_get_boot_ns(); p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); @@ -1837,6 +1835,17 @@ static __latent_entropy struct task_struct *copy_process( if (retval) goto bad_fork_free_pid; + /* + * From this point on we must avoid any synchronous user-space + * communication until we take the tasklist-lock. In particular, we do + * not want user-space to be able to predict the process start-time by + * stalling fork(2) after we recorded the start_time but before it is + * visible to the system. + */ + + p->start_time = ktime_get_ns(); + p->real_start_time = ktime_get_boot_ns(); + /* * Make it visible to the rest of the system, but dont wake it up yet. * Need tasklist lock for parent etc handling! From ce8daa28a1d53dcf4258cff2b559d7ad37325dfa Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 28 Dec 2018 00:36:37 -0800 Subject: [PATCH 1333/2608] zram: fix double free backing device commit 5547932dc67a48713eece4fa4703bfdf0cfcb818 upstream. If blkdev_get fails, we shouldn't do blkdev_put. Otherwise, kernel emits below log. This patch fixes it. WARNING: CPU: 0 PID: 1893 at fs/block_dev.c:1828 blkdev_put+0x105/0x120 Modules linked in: CPU: 0 PID: 1893 Comm: swapoff Not tainted 4.19.0+ #453 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 RIP: 0010:blkdev_put+0x105/0x120 Call Trace: __x64_sys_swapoff+0x46d/0x490 do_syscall_64+0x5a/0x190 entry_SYSCALL_64_after_hwframe+0x49/0xbe irq event stamp: 4466 hardirqs last enabled at (4465): __free_pages_ok+0x1e3/0x490 hardirqs last disabled at (4466): trace_hardirqs_off_thunk+0x1a/0x1c softirqs last enabled at (3420): __do_softirq+0x333/0x446 softirqs last disabled at (3407): irq_exit+0xd1/0xe0 Link: http://lkml.kernel.org/r/20181127055429.251614-3-minchan@kernel.org Signed-off-by: Minchan Kim Reviewed-by: Sergey Senozhatsky Reviewed-by: Joey Pabalinas Cc: [4.14+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/block/zram/zram_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 27b202c64c84..a46776a84480 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -366,8 +366,10 @@ static ssize_t backing_dev_store(struct device *dev, bdev = bdgrab(I_BDEV(inode)); err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); - if (err < 0) + if (err < 0) { + bdev = NULL; goto out; + } nr_pages = i_size_read(inode) >> PAGE_SHIFT; bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); From 2c25071bed4b1f9c4cfb10a7914847d7069794bf Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 28 Dec 2018 00:38:01 -0800 Subject: [PATCH 1334/2608] hwpoison, memory_hotplug: allow hwpoisoned pages to be offlined commit b15c87263a69272423771118c653e9a1d0672caa upstream. We have received a bug report that an injected MCE about faulty memory prevents memory offline to succeed on 4.4 base kernel. The underlying reason was that the HWPoison page has an elevated reference count and the migration keeps failing. There are two problems with that. First of all it is dubious to migrate the poisoned page because we know that accessing that memory is possible to fail. Secondly it doesn't make any sense to migrate a potentially broken content and preserve the memory corruption over to a new location. Oscar has found out that 4.4 and the current upstream kernels behave slightly differently with his simply testcase === int main(void) { int ret; int i; int fd; char *array = malloc(4096); char *array_locked = malloc(4096); fd = open("/tmp/data", O_RDONLY); read(fd, array, 4095); for (i = 0; i < 4096; i++) array_locked[i] = 'd'; ret = mlock((void *)PAGE_ALIGN((unsigned long)array_locked), sizeof(array_locked)); if (ret) perror("mlock"); sleep (20); ret = madvise((void *)PAGE_ALIGN((unsigned long)array_locked), 4096, MADV_HWPOISON); if (ret) perror("madvise"); for (i = 0; i < 4096; i++) array_locked[i] = 'd'; return 0; } === + offline this memory. In 4.4 kernels he saw the hwpoisoned page to be returned back to the LRU list kernel: [] dump_trace+0x59/0x340 kernel: [] show_stack_log_lvl+0xea/0x170 kernel: [] show_stack+0x21/0x40 kernel: [] dump_stack+0x5c/0x7c kernel: [] warn_slowpath_common+0x81/0xb0 kernel: [] __pagevec_lru_add_fn+0x14c/0x160 kernel: [] pagevec_lru_move_fn+0xad/0x100 kernel: [] __lru_cache_add+0x6c/0xb0 kernel: [] add_to_page_cache_lru+0x46/0x70 kernel: [] extent_readpages+0xc3/0x1a0 [btrfs] kernel: [] __do_page_cache_readahead+0x177/0x200 kernel: [] ondemand_readahead+0x168/0x2a0 kernel: [] generic_file_read_iter+0x41f/0x660 kernel: [] __vfs_read+0xcd/0x140 kernel: [] vfs_read+0x7a/0x120 kernel: [] kernel_read+0x3b/0x50 kernel: [] do_execveat_common.isra.29+0x490/0x6f0 kernel: [] do_execve+0x28/0x30 kernel: [] call_usermodehelper_exec_async+0xfb/0x130 kernel: [] ret_from_fork+0x55/0x80 And that latter confuses the hotremove path because an LRU page is attempted to be migrated and that fails due to an elevated reference count. It is quite possible that the reuse of the HWPoisoned page is some kind of fixed race condition but I am not really sure about that. With the upstream kernel the failure is slightly different. The page doesn't seem to have LRU bit set but isolate_movable_page simply fails and do_migrate_range simply puts all the isolated pages back to LRU and therefore no progress is made and scan_movable_pages finds same set of pages over and over again. Fix both cases by explicitly checking HWPoisoned pages before we even try to get reference on the page, try to unmap it if it is still mapped. As explained by Naoya: : Hwpoison code never unmapped those for no big reason because : Ksm pages never dominate memory, so we simply didn't have strong : motivation to save the pages. Also put WARN_ON(PageLRU) in case there is a race and we can hit LRU HWPoison pages which shouldn't happen but I couldn't convince myself about that. Naoya has noted the following: : Theoretically no such gurantee, because try_to_unmap() doesn't have a : guarantee of success and then memory_failure() returns immediately : when hwpoison_user_mappings fails. : Or the following code (comes after hwpoison_user_mappings block) also impli= : es : that the target page can still have PageLRU flag. : : /* : * Torn down by someone else? : */ : if (PageLRU(p) && !PageSwapCache(p) && p->mapping =3D=3D NULL) { : action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED); : res =3D -EBUSY; : goto out; : } : : So I think it's OK to keep "if (WARN_ON(PageLRU(page)))" block in : current version of your patch. Link: http://lkml.kernel.org/r/20181206120135.14079-1-mhocko@kernel.org Signed-off-by: Michal Hocko Reviewed-by: Oscar Salvador Debugged-by: Oscar Salvador Tested-by: Oscar Salvador Acked-by: David Hildenbrand Acked-by: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory_hotplug.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index d4b5f29906b9..c7c74a927d6f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -35,6 +35,7 @@ #include #include #include +#include #include @@ -1391,6 +1392,21 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) pfn = page_to_pfn(compound_head(page)) + hpage_nr_pages(page) - 1; + /* + * HWPoison pages have elevated reference counts so the migration would + * fail on them. It also doesn't make any sense to migrate them in the + * first place. Still try to unmap such a page in case it is still mapped + * (e.g. current hwpoison implementation doesn't unmap KSM pages but keep + * the unmap as the catch all safety net). + */ + if (PageHWPoison(page)) { + if (WARN_ON(PageLRU(page))) + isolate_lru_page(page); + if (page_mapped(page)) + try_to_unmap(page, TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS); + continue; + } + if (!get_page_unless_zero(page)) continue; /* From 47d24f8c8feee17d9f3e58c4227e3d00d7210120 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 28 Dec 2018 00:34:50 -0800 Subject: [PATCH 1335/2608] mm, devm_memremap_pages: mark devm_memremap_pages() EXPORT_SYMBOL_GPL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 808153e1187fa77ac7d7dad261ff476888dcf398 upstream. devm_memremap_pages() is a facility that can create struct page entries for any arbitrary range and give drivers the ability to subvert core aspects of page management. Specifically the facility is tightly integrated with the kernel's memory hotplug functionality. It injects an altmap argument deep into the architecture specific vmemmap implementation to allow allocating from specific reserved pages, and it has Linux specific assumptions about page structure reference counting relative to get_user_pages() and get_user_pages_fast(). It was an oversight and a mistake that this was not marked EXPORT_SYMBOL_GPL from the outset. Again, devm_memremap_pagex() exposes and relies upon core kernel internal assumptions and will continue to evolve along with 'struct page', memory hotplug, and support for new memory types / topologies. Only an in-kernel GPL-only driver is expected to keep up with this ongoing evolution. This interface, and functionality derived from this interface, is not suitable for kernel-external drivers. Link: http://lkml.kernel.org/r/154275557457.76910.16923571232582744134.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reviewed-by: Christoph Hellwig Acked-by: Michal Hocko Cc: "Jérôme Glisse" Cc: Balbir Singh Cc: Logan Gunthorpe Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/memremap.c | 2 +- tools/testing/nvdimm/test/iomap.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/memremap.c b/kernel/memremap.c index 790ddf3bce19..9d080c7b5990 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -482,7 +482,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, devres_free(page_map); return ERR_PTR(error); } -EXPORT_SYMBOL(devm_memremap_pages); +EXPORT_SYMBOL_GPL(devm_memremap_pages); unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) { diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index e1f75a1914a1..f2a00b0698a3 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -114,7 +114,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res, return nfit_res->buf + offset - nfit_res->res.start; return devm_memremap_pages(dev, res, ref, altmap); } -EXPORT_SYMBOL(__wrap_devm_memremap_pages); +EXPORT_SYMBOL_GPL(__wrap_devm_memremap_pages); pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags) { From 1432754217f7eec4cfa9794012d2e5d34b9ebef7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 28 Dec 2018 00:34:54 -0800 Subject: [PATCH 1336/2608] mm, devm_memremap_pages: kill mapping "System RAM" support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 06489cfbd915ff36c8e36df27f1c2dc60f97ca56 upstream. Given the fact that devm_memremap_pages() requires a percpu_ref that is torn down by devm_memremap_pages_release() the current support for mapping RAM is broken. Support for remapping "System RAM" has been broken since the beginning and there is no existing user of this this code path, so just kill the support and make it an explicit error. This cleanup also simplifies a follow-on patch to fix the error path when setting a devm release action for devm_memremap_pages_release() fails. Link: http://lkml.kernel.org/r/154275557997.76910.14689813630968180480.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reviewed-by: "Jérôme Glisse" Reviewed-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Cc: Balbir Singh Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/memremap.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/kernel/memremap.c b/kernel/memremap.c index 9d080c7b5990..0d676d6d2f62 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -379,15 +379,12 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, is_ram = region_intersects(align_start, align_size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE); - if (is_ram == REGION_MIXED) { - WARN_ONCE(1, "%s attempted on mixed region %pr\n", - __func__, res); + if (is_ram != REGION_DISJOINT) { + WARN_ONCE(1, "%s attempted on %s region %pr\n", __func__, + is_ram == REGION_MIXED ? "mixed" : "ram", res); return ERR_PTR(-ENXIO); } - if (is_ram == REGION_INTERSECTS) - return __va(res->start); - if (!ref) return ERR_PTR(-EINVAL); From 465c5cf0bfdb7836dcb2012a52e79eb79f5e16ac Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 28 Dec 2018 00:35:07 -0800 Subject: [PATCH 1337/2608] mm, hmm: use devm semantics for hmm_devmem_{add, remove} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 58ef15b765af0d2cbe6799ec564f1dc485010ab8 upstream. devm semantics arrange for resources to be torn down when device-driver-probe fails or when device-driver-release completes. Similar to devm_memremap_pages() there is no need to support an explicit remove operation when the users properly adhere to devm semantics. Note that devm_kzalloc() automatically handles allocating node-local memory. Link: http://lkml.kernel.org/r/154275559545.76910.9186690723515469051.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reviewed-by: Christoph Hellwig Reviewed-by: Jérôme Glisse Cc: "Jérôme Glisse" Cc: Logan Gunthorpe Cc: Balbir Singh Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/hmm.h | 4 +- mm/hmm.c | 133 +++++++++----------------------------------- 2 files changed, 28 insertions(+), 109 deletions(-) diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 96e69979f84d..7d799c4d2669 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -437,8 +437,7 @@ struct hmm_devmem { * enough and allocate struct page for it. * * The device driver can wrap the hmm_devmem struct inside a private device - * driver struct. The device driver must call hmm_devmem_remove() before the - * device goes away and before freeing the hmm_devmem struct memory. + * driver struct. */ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, struct device *device, @@ -446,7 +445,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops, struct device *device, struct resource *res); -void hmm_devmem_remove(struct hmm_devmem *devmem); /* * hmm_devmem_page_set_drvdata - set per-page driver data field diff --git a/mm/hmm.c b/mm/hmm.c index 81ff1dbbf8a8..92acce59d42a 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -767,7 +767,6 @@ static void hmm_devmem_ref_exit(void *data) devmem = container_of(ref, struct hmm_devmem, ref); percpu_ref_exit(ref); - devm_remove_action(devmem->device, &hmm_devmem_ref_exit, data); } static void hmm_devmem_ref_kill(void *data) @@ -778,7 +777,6 @@ static void hmm_devmem_ref_kill(void *data) devmem = container_of(ref, struct hmm_devmem, ref); percpu_ref_kill(ref); wait_for_completion(&devmem->completion); - devm_remove_action(devmem->device, &hmm_devmem_ref_kill, data); } static int hmm_devmem_fault(struct vm_area_struct *vma, @@ -818,7 +816,7 @@ static void hmm_devmem_radix_release(struct resource *resource) mutex_unlock(&hmm_devmem_lock); } -static void hmm_devmem_release(struct device *dev, void *data) +static void hmm_devmem_release(void *data) { struct hmm_devmem *devmem = data; struct resource *resource = devmem->resource; @@ -826,11 +824,6 @@ static void hmm_devmem_release(struct device *dev, void *data) struct zone *zone; struct page *page; - if (percpu_ref_tryget_live(&devmem->ref)) { - dev_WARN(dev, "%s: page mapping is still live!\n", __func__); - percpu_ref_put(&devmem->ref); - } - /* pages are dead and unused, undo the arch mapping */ start_pfn = (resource->start & ~(PA_SECTION_SIZE - 1)) >> PAGE_SHIFT; npages = ALIGN(resource_size(resource), PA_SECTION_SIZE) >> PAGE_SHIFT; @@ -961,19 +954,6 @@ error: return ret; } -static int hmm_devmem_match(struct device *dev, void *data, void *match_data) -{ - struct hmm_devmem *devmem = data; - - return devmem->resource == match_data; -} - -static void hmm_devmem_pages_remove(struct hmm_devmem *devmem) -{ - devres_release(devmem->device, &hmm_devmem_release, - &hmm_devmem_match, devmem->resource); -} - /* * hmm_devmem_add() - hotplug ZONE_DEVICE memory for device memory * @@ -1001,8 +981,7 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, static_branch_enable(&device_private_key); - devmem = devres_alloc_node(&hmm_devmem_release, sizeof(*devmem), - GFP_KERNEL, dev_to_node(device)); + devmem = devm_kzalloc(device, sizeof(*devmem), GFP_KERNEL); if (!devmem) return ERR_PTR(-ENOMEM); @@ -1016,11 +995,11 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, ret = percpu_ref_init(&devmem->ref, &hmm_devmem_ref_release, 0, GFP_KERNEL); if (ret) - goto error_percpu_ref; + return ERR_PTR(ret); - ret = devm_add_action(device, hmm_devmem_ref_exit, &devmem->ref); + ret = devm_add_action_or_reset(device, hmm_devmem_ref_exit, &devmem->ref); if (ret) - goto error_devm_add_action; + return ERR_PTR(ret); size = ALIGN(size, PA_SECTION_SIZE); addr = min((unsigned long)iomem_resource.end, @@ -1040,16 +1019,12 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, devmem->resource = devm_request_mem_region(device, addr, size, dev_name(device)); - if (!devmem->resource) { - ret = -ENOMEM; - goto error_no_resource; - } + if (!devmem->resource) + return ERR_PTR(-ENOMEM); break; } - if (!devmem->resource) { - ret = -ERANGE; - goto error_no_resource; - } + if (!devmem->resource) + return ERR_PTR(-ERANGE); devmem->resource->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY; devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT; @@ -1058,28 +1033,13 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, ret = hmm_devmem_pages_create(devmem); if (ret) - goto error_pages; - - devres_add(device, devmem); - - ret = devm_add_action(device, hmm_devmem_ref_kill, &devmem->ref); - if (ret) { - hmm_devmem_remove(devmem); return ERR_PTR(ret); - } + + ret = devm_add_action_or_reset(device, hmm_devmem_release, devmem); + if (ret) + return ERR_PTR(ret); return devmem; - -error_pages: - devm_release_mem_region(device, devmem->resource->start, - resource_size(devmem->resource)); -error_no_resource: -error_devm_add_action: - hmm_devmem_ref_kill(&devmem->ref); - hmm_devmem_ref_exit(&devmem->ref); -error_percpu_ref: - devres_free(devmem); - return ERR_PTR(ret); } EXPORT_SYMBOL(hmm_devmem_add); @@ -1095,8 +1055,7 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops, static_branch_enable(&device_private_key); - devmem = devres_alloc_node(&hmm_devmem_release, sizeof(*devmem), - GFP_KERNEL, dev_to_node(device)); + devmem = devm_kzalloc(device, sizeof(*devmem), GFP_KERNEL); if (!devmem) return ERR_PTR(-ENOMEM); @@ -1110,12 +1069,12 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops, ret = percpu_ref_init(&devmem->ref, &hmm_devmem_ref_release, 0, GFP_KERNEL); if (ret) - goto error_percpu_ref; + return ERR_PTR(ret); - ret = devm_add_action(device, hmm_devmem_ref_exit, &devmem->ref); + ret = devm_add_action_or_reset(device, hmm_devmem_ref_exit, + &devmem->ref); if (ret) - goto error_devm_add_action; - + return ERR_PTR(ret); devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT; devmem->pfn_last = devmem->pfn_first + @@ -1123,59 +1082,21 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops, ret = hmm_devmem_pages_create(devmem); if (ret) - goto error_devm_add_action; - - devres_add(device, devmem); - - ret = devm_add_action(device, hmm_devmem_ref_kill, &devmem->ref); - if (ret) { - hmm_devmem_remove(devmem); return ERR_PTR(ret); - } + + ret = devm_add_action_or_reset(device, hmm_devmem_release, devmem); + if (ret) + return ERR_PTR(ret); + + ret = devm_add_action_or_reset(device, hmm_devmem_ref_kill, + &devmem->ref); + if (ret) + return ERR_PTR(ret); return devmem; - -error_devm_add_action: - hmm_devmem_ref_kill(&devmem->ref); - hmm_devmem_ref_exit(&devmem->ref); -error_percpu_ref: - devres_free(devmem); - return ERR_PTR(ret); } EXPORT_SYMBOL(hmm_devmem_add_resource); -/* - * hmm_devmem_remove() - remove device memory (kill and free ZONE_DEVICE) - * - * @devmem: hmm_devmem struct use to track and manage the ZONE_DEVICE memory - * - * This will hot-unplug memory that was hotplugged by hmm_devmem_add on behalf - * of the device driver. It will free struct page and remove the resource that - * reserved the physical address range for this device memory. - */ -void hmm_devmem_remove(struct hmm_devmem *devmem) -{ - resource_size_t start, size; - struct device *device; - bool cdm = false; - - if (!devmem) - return; - - device = devmem->device; - start = devmem->resource->start; - size = resource_size(devmem->resource); - - cdm = devmem->resource->desc == IORES_DESC_DEVICE_PUBLIC_MEMORY; - hmm_devmem_ref_kill(&devmem->ref); - hmm_devmem_ref_exit(&devmem->ref); - hmm_devmem_pages_remove(devmem); - - if (!cdm) - devm_release_mem_region(device, start, size); -} -EXPORT_SYMBOL(hmm_devmem_remove); - /* * A device driver that wants to handle multiple devices memory through a * single fake device can use hmm_device to do so. This is purely a helper From c5a2c79da3f8685de31791358a98a352f241fb83 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 28 Dec 2018 00:35:15 -0800 Subject: [PATCH 1338/2608] mm, hmm: mark hmm_devmem_{add, add_resource} EXPORT_SYMBOL_GPL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 02917e9f8676207a4c577d4d94eae12bf348e9d7 upstream. At Maintainer Summit, Greg brought up a topic I proposed around EXPORT_SYMBOL_GPL usage. The motivation was considerations for when EXPORT_SYMBOL_GPL is warranted and the criteria for taking the exceptional step of reclassifying an existing export. Specifically, I wanted to make the case that although the line is fuzzy and hard to specify in abstract terms, it is nonetheless clear that devm_memremap_pages() and HMM (Heterogeneous Memory Management) have crossed it. The devm_memremap_pages() facility should have been EXPORT_SYMBOL_GPL from the beginning, and HMM as a derivative of that functionality should have naturally picked up that designation as well. Contrary to typical rules, the HMM infrastructure was merged upstream with zero in-tree consumers. There was a promise at the time that those users would be merged "soon", but it has been over a year with no drivers arriving. While the Nouveau driver is about to belatedly make good on that promise it is clear that HMM was targeted first and foremost at an out-of-tree consumer. HMM is derived from devm_memremap_pages(), a facility Christoph and I spearheaded to support persistent memory. It combines a device lifetime model with a dynamically created 'struct page' / memmap array for any physical address range. It enables coordination and control of the many code paths in the kernel built to interact with memory via 'struct page' objects. With HMM the integration goes even deeper by allowing device drivers to hook and manipulate page fault and page free events. One interpretation of when EXPORT_SYMBOL is suitable is when it is exporting stable and generic leaf functionality. The devm_memremap_pages() facility continues to see expanding use cases, peer-to-peer DMA being the most recent, with no clear end date when it will stop attracting reworks and semantic changes. It is not suitable to export devm_memremap_pages() as a stable 3rd party driver API due to the fact that it is still changing and manipulates core behavior. Moreover, it is not in the best interest of the long term development of the core memory management subsystem to permit any external driver to effectively define its own system-wide memory management policies with no encouragement to engage with upstream. I am also concerned that HMM was designed in a way to minimize further engagement with the core-MM. That, with these hooks in place, device-drivers are free to implement their own policies without much consideration for whether and how the core-MM could grow to meet that need. Going forward not only should HMM be EXPORT_SYMBOL_GPL, but the core-MM should be allowed the opportunity and stimulus to change and address these new use cases as first class functionality. Original changelog: hmm_devmem_add(), and hmm_devmem_add_resource() duplicated devm_memremap_pages() and are now simple now wrappers around the core facility to inject a dev_pagemap instance into the global pgmap_radix and hook page-idle events. The devm_memremap_pages() interface is base infrastructure for HMM. HMM has more and deeper ties into the kernel memory management implementation than base ZONE_DEVICE which is itself a EXPORT_SYMBOL_GPL facility. Originally, the HMM page structure creation routines copied the devm_memremap_pages() code and reused ZONE_DEVICE. A cleanup to unify the implementations was discussed during the initial review: http://lkml.iu.edu/hypermail/linux/kernel/1701.2/00812.html Recent work to extend devm_memremap_pages() for the peer-to-peer-DMA facility enabled this cleanup to move forward. In addition to the integration with devm_memremap_pages() HMM depends on other GPL-only symbols: mmu_notifier_unregister_no_release percpu_ref region_intersects __class_create It goes further to consume / indirectly expose functionality that is not exported to any other driver: alloc_pages_vma walk_page_range HMM is derived from devm_memremap_pages(), and extends deep core-kernel fundamentals. Similar to devm_memremap_pages(), mark its entry points EXPORT_SYMBOL_GPL(). [logang@deltatee.com: PCI/P2PDMA: match interface changes to devm_memremap_pages()] Link: http://lkml.kernel.org/r/20181130225911.2900-1-logang@deltatee.com Link: http://lkml.kernel.org/r/154275560565.76910.15919297436557795278.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Signed-off-by: Logan Gunthorpe Reviewed-by: Christoph Hellwig Cc: Logan Gunthorpe Cc: "Jérôme Glisse" Cc: Balbir Singh , Cc: Michal Hocko Cc: Benjamin Herrenschmidt Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hmm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index 92acce59d42a..a5def9f34385 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -1041,7 +1041,7 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, return devmem; } -EXPORT_SYMBOL(hmm_devmem_add); +EXPORT_SYMBOL_GPL(hmm_devmem_add); struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops, struct device *device, @@ -1095,7 +1095,7 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops, return devmem; } -EXPORT_SYMBOL(hmm_devmem_add_resource); +EXPORT_SYMBOL_GPL(hmm_devmem_add_resource); /* * A device driver that wants to handle multiple devices memory through a From 89b03877088751fc44dd93d40fdcd74a04927c45 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 28 Dec 2018 00:39:53 -0800 Subject: [PATCH 1339/2608] mm, swap: fix swapoff with KSM pages commit 7af7a8e19f0c5425ff639b0f0d2d244c2a647724 upstream. KSM pages may be mapped to the multiple VMAs that cannot be reached from one anon_vma. So during swapin, a new copy of the page need to be generated if a different anon_vma is needed, please refer to comments of ksm_might_need_to_copy() for details. During swapoff, unuse_vma() uses anon_vma (if available) to locate VMA and virtual address mapped to the page, so not all mappings to a swapped out KSM page could be found. So in try_to_unuse(), even if the swap count of a swap entry isn't zero, the page needs to be deleted from swap cache, so that, in the next round a new page could be allocated and swapin for the other mappings of the swapped out KSM page. But this contradicts with the THP swap support. Where the THP could be deleted from swap cache only after the swap count of every swap entry in the huge swap cluster backing the THP has reach 0. So try_to_unuse() is changed in commit e07098294adf ("mm, THP, swap: support to reclaim swap space for THP swapped out") to check that before delete a page from swap cache, but this has broken KSM swapoff too. Fortunately, KSM is for the normal pages only, so the original behavior for KSM pages could be restored easily via checking PageTransCompound(). That is how this patch works. The bug is introduced by e07098294adf ("mm, THP, swap: support to reclaim swap space for THP swapped out"), which is merged by v4.14-rc1. So I think we should backport the fix to from 4.14 on. But Hugh thinks it may be rare for the KSM pages being in the swap device when swapoff, so nobody reports the bug so far. Link: http://lkml.kernel.org/r/20181226051522.28442-1-ying.huang@intel.com Fixes: e07098294adf ("mm, THP, swap: support to reclaim swap space for THP swapped out") Signed-off-by: "Huang, Ying" Reported-by: Hugh Dickins Tested-by: Hugh Dickins Acked-by: Hugh Dickins Cc: Rik van Riel Cc: Johannes Weiner Cc: Minchan Kim Cc: Shaohua Li Cc: Daniel Jordan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/swapfile.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 08e8cd21770c..af3c4c5a0b4e 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2218,7 +2218,8 @@ int try_to_unuse(unsigned int type, bool frontswap, */ if (PageSwapCache(page) && likely(page_private(page) == entry.val) && - !page_swapped(page)) + (!PageTransCompound(page) || + !swap_page_trans_huge_swapped(si, entry))) delete_from_swap_cache(compound_head(page)); /* From 76da01793fbdcac42d9828b778cfa0d63d27bfac Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 28 Nov 2018 11:45:57 +0300 Subject: [PATCH 1340/2608] sunrpc: fix cache_head leak due to queued request commit 4ecd55ea074217473f94cfee21bb72864d39f8d7 upstream. After commit d202cce8963d, an expired cache_head can be removed from the cache_detail's hash. However, the expired cache_head may be waiting for a reply from a previously submitted request. Such a cache_head has an increased refcounter and therefore it won't be freed after cache_put(freeme). Because the cache_head was removed from the hash it cannot be found during cache_clean() and can be leaked forever, together with stalled cache_request and other taken resources. In our case we noticed it because an entry in the export cache was holding a reference on a filesystem. Fixes d202cce8963d ("sunrpc: never return expired entries in sunrpc_cache_lookup") Cc: Pavel Tikhomirov Cc: stable@kernel.org # 2.6.35 Signed-off-by: Vasily Averin Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/cache.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 79d55d949d9a..f2cf4edf219b 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -54,6 +54,11 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail) h->last_refresh = now; } +static void cache_fresh_locked(struct cache_head *head, time_t expiry, + struct cache_detail *detail); +static void cache_fresh_unlocked(struct cache_head *head, + struct cache_detail *detail); + struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, struct cache_head *key, int hash) { @@ -95,6 +100,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, if (cache_is_expired(detail, tmp)) { hlist_del_init(&tmp->cache_list); detail->entries --; + cache_fresh_locked(tmp, 0, detail); freeme = tmp; break; } @@ -110,8 +116,10 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, cache_get(new); write_unlock(&detail->hash_lock); - if (freeme) + if (freeme) { + cache_fresh_unlocked(freeme, detail); cache_put(freeme, detail); + } return new; } EXPORT_SYMBOL_GPL(sunrpc_cache_lookup); From aa71dcfe9c4210f471d3d3c6153ec34ba2606597 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 24 Dec 2018 14:44:42 +0300 Subject: [PATCH 1341/2608] sunrpc: use SVC_NET() in svcauth_gss_* functions commit b8be5674fa9a6f3677865ea93f7803c4212f3e10 upstream. Signed-off-by: Vasily Averin Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/auth_gss/svcauth_gss.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index f41ffb22652c..cc08cb1292a9 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1120,7 +1120,7 @@ static int svcauth_gss_legacy_init(struct svc_rqst *rqstp, struct kvec *resv = &rqstp->rq_res.head[0]; struct rsi *rsip, rsikey; int ret; - struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id); + struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); memset(&rsikey, 0, sizeof(rsikey)); ret = gss_read_verf(gc, argv, authp, @@ -1231,7 +1231,7 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp, uint64_t handle; int status; int ret; - struct net *net = rqstp->rq_xprt->xpt_net; + struct net *net = SVC_NET(rqstp); struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); memset(&ud, 0, sizeof(ud)); @@ -1422,7 +1422,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) __be32 *rpcstart; __be32 *reject_stat = resv->iov_base + resv->iov_len; int ret; - struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id); + struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); dprintk("RPC: svcauth_gss: argv->iov_len = %zd\n", argv->iov_len); @@ -1710,7 +1710,7 @@ svcauth_gss_release(struct svc_rqst *rqstp) struct rpc_gss_wire_cred *gc = &gsd->clcred; struct xdr_buf *resbuf = &rqstp->rq_res; int stat = -EINVAL; - struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id); + struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); if (gc->gc_proc != RPC_GSS_PROC_DATA) goto out; From f716d5e5fa6b6b498435ae67817122e73a8774ee Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 14 Sep 2018 15:08:54 +1000 Subject: [PATCH 1342/2608] powerpc: avoid -mno-sched-epilog on GCC 4.9 and newer commit 6977f95e63b9b3fb4a5973481a800dd9f48a1338 upstream. Signed-off-by: Nicholas Piggin Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman [nc: Adjust context due to lack of f2910f0e6835 and 2a056f58fd33] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 1381693a4a51..7452e50f4d1f 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -236,7 +236,12 @@ endif # Work around a gcc code-gen bug with -fno-omit-frame-pointer. ifeq ($(CONFIG_FUNCTION_TRACER),y) -KBUILD_CFLAGS += -mno-sched-epilog +# Work around gcc code-gen bugs with -pg / -fno-omit-frame-pointer in gcc <= 4.8 +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44199 +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52828 +ifneq ($(cc-name),clang) +KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0409, -mno-sched-epilog) +endif endif cpu-as-$(CONFIG_4xx) += -Wa,-m405 From 4df445a77526593d6236af778fda1627bd114000 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Mon, 17 Sep 2018 17:16:21 +0930 Subject: [PATCH 1343/2608] powerpc: Disable -Wbuiltin-requires-header when setjmp is used commit aea447141c7e7824b81b49acd1bc785506fba46e upstream. The powerpc kernel uses setjmp which causes a warning when building with clang: In file included from arch/powerpc/xmon/xmon.c:51: ./arch/powerpc/include/asm/setjmp.h:15:13: error: declaration of built-in function 'setjmp' requires inclusion of the header [-Werror,-Wbuiltin-requires-header] extern long setjmp(long *); ^ ./arch/powerpc/include/asm/setjmp.h:16:13: error: declaration of built-in function 'longjmp' requires inclusion of the header [-Werror,-Wbuiltin-requires-header] extern void longjmp(long *, long); ^ This *is* the header and we're not using the built-in setjump but rather the one in arch/powerpc/kernel/misc.S. As the compiler warning does not make sense, it for the files where setjmp is used. Signed-off-by: Joel Stanley Reviewed-by: Nick Desaulniers [mpe: Move subdir-ccflags in xmon/Makefile to not clobber -Werror] Signed-off-by: Michael Ellerman Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/Makefile | 3 +++ arch/powerpc/xmon/Makefile | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 1479c61e29c5..a1089c9a9aa5 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -5,6 +5,9 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' +# Disable clang warning for using setjmp without setjmp.h header +CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header) + subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ifeq ($(CONFIG_PPC64),y) diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index 1bc3abb237cd..549e99e71112 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -1,7 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for xmon -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror +# Disable clang warning for using setjmp without setjmp.h header +subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header) + +subdir-ccflags-$(CONFIG_PPC_WERROR) += -Werror GCOV_PROFILE := n UBSAN_SANITIZE := n From 487467e7d031e73907c820ef05ce0d4d18c24767 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Mon, 17 Sep 2018 17:07:54 +0930 Subject: [PATCH 1344/2608] ftrace: Build with CPPFLAGS to get -Qunused-arguments When building to record the mcount locations the kernel uses KBUILD_CFLAGS but not KBUILD_CPPFLAGS. This means it lacks -Qunused-arguments when building with clang, resulting in a lot of noisy warnings. Signed-off-by: Joel Stanley Reviewed-by: Nick Desaulniers Signed-off-by: Masahiro Yamada [nc: Fix conflicts due to lack of 87a32e624037 and d503ac531a52] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- scripts/Makefile.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index be9e5deb58ba..3edc9c04cb46 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -242,7 +242,7 @@ else sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ "$(if $(CONFIG_64BIT),64,32)" \ - "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \ + "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)" \ "$(LD)" "$(NM)" "$(RM)" "$(MV)" \ "$(if $(part-of-module),1,0)" "$(@)"; recordmcount_source := $(srctree)/scripts/recordmcount.pl From 03df2c59562a2d8851fea32f833e48193f6cd8ad Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Thu, 5 Oct 2017 11:28:47 -0700 Subject: [PATCH 1345/2608] md: raid10: remove VLAIS commit 584ed9fa9532f8b9d5955628ff87ee3b2ab9f5a9 upstream. The raid10 driver can't be built with clang since it uses a variable length array in a structure (VLAIS): drivers/md/raid10.c:4583:17: error: fields must have a constant size: 'variable length array in structure' extension will never be supported Allocate the r10bio struct with kmalloc instead of using the VLAIS construct. Shaohua: set the MD_RECOVERY_INTR bit Neil Brown: use GFP_NOIO Signed-off-by: Matthias Kaehlcke Reviewed-by: Guenter Roeck Signed-off-by: Shaohua Li Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e786546bf3b8..52ddfa0fca94 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4591,15 +4591,18 @@ static int handle_reshape_read_error(struct mddev *mddev, /* Use sync reads to get the blocks from somewhere else */ int sectors = r10_bio->sectors; struct r10conf *conf = mddev->private; - struct { - struct r10bio r10_bio; - struct r10dev devs[conf->copies]; - } on_stack; - struct r10bio *r10b = &on_stack.r10_bio; + struct r10bio *r10b; int slot = 0; int idx = 0; struct page **pages; + r10b = kmalloc(sizeof(*r10b) + + sizeof(struct r10dev) * conf->copies, GFP_NOIO); + if (!r10b) { + set_bit(MD_RECOVERY_INTR, &mddev->recovery); + return -ENOMEM; + } + /* reshape IOs share pages from .devs[0].bio */ pages = get_resync_pages(r10_bio->devs[0].bio)->pages; @@ -4648,11 +4651,13 @@ static int handle_reshape_read_error(struct mddev *mddev, /* couldn't read this block, must give up */ set_bit(MD_RECOVERY_INTR, &mddev->recovery); + kfree(r10b); return -EIO; } sectors -= s; idx++; } + kfree(r10b); return 0; } From 23d9f5e4a3ad66061e96dc467e65b580fafe3648 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Nov 2018 12:04:54 +0900 Subject: [PATCH 1346/2608] kbuild: add -no-integrated-as Clang option unconditionally commit dbe27a002ef8573168cb64e181458ea23a74e2b6 upstream. We are still a way off the Clang's integrated assembler support for the kernel. Hence, -no-integrated-as is mandatory to build the kernel with Clang. If you had an ancient version of Clang that does not recognize this option, you would not be able to compile the kernel anyway. Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 2de4072bcc90..d5e3f762e022 100644 --- a/Makefile +++ b/Makefile @@ -489,8 +489,8 @@ CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) endif KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) $(CLANG_PREFIX) KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) $(CLANG_PREFIX) -KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) -KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) +KBUILD_CFLAGS += -no-integrated-as +KBUILD_AFLAGS += -no-integrated-as endif RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register From ff858d82207470c8612bb66d7129235f81567524 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Nov 2018 12:04:55 +0900 Subject: [PATCH 1347/2608] kbuild: consolidate Clang compiler flags commit 238bcbc4e07fad2fff99c5b157d0c37ccd4d093c upstream. Collect basic Clang options such as --target, --prefix, --gcc-toolchain, -no-integrated-as into a single variable CLANG_FLAGS so that it can be easily reused in other parts of Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Acked-by: Greg Hackmann Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index d5e3f762e022..0d53ecf5983f 100644 --- a/Makefile +++ b/Makefile @@ -479,18 +479,17 @@ endif ifeq ($(cc-name),clang) ifneq ($(CROSS_COMPILE),) -CLANG_TARGET := --target=$(notdir $(CROSS_COMPILE:%-=%)) +CLANG_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%)) GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD))) -CLANG_PREFIX := --prefix=$(GCC_TOOLCHAIN_DIR) +CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR) GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) endif ifneq ($(GCC_TOOLCHAIN),) -CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) +CLANG_FLAGS += --gcc-toolchain=$(GCC_TOOLCHAIN) endif -KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) $(CLANG_PREFIX) -KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) $(CLANG_PREFIX) -KBUILD_CFLAGS += -no-integrated-as -KBUILD_AFLAGS += -no-integrated-as +CLANG_FLAGS += -no-integrated-as +KBUILD_CFLAGS += $(CLANG_FLAGS) +KBUILD_AFLAGS += $(CLANG_FLAGS) endif RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register From 72d217d5df485027769cfc1a69e4d9bffaa30bfe Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Mon, 12 Nov 2018 14:51:15 +1030 Subject: [PATCH 1348/2608] Makefile: Export clang toolchain variables commit 3bd9805090af843b25f97ffe5049f20ade1d86d6 upstream. The powerpc makefile will use these in it's boot wrapper. Signed-off-by: Joel Stanley Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 0d53ecf5983f..6f7cca2abbd0 100644 --- a/Makefile +++ b/Makefile @@ -490,6 +490,7 @@ endif CLANG_FLAGS += -no-integrated-as KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_AFLAGS += $(CLANG_FLAGS) +export CLANG_FLAGS endif RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register From d75c8c0afd357e66b55b4548919bb6d5a5a8e8fb Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Mon, 12 Nov 2018 14:51:16 +1030 Subject: [PATCH 1349/2608] powerpc/boot: Set target when cross-compiling for clang commit 813af51f5d30a2da6a2523c08465f9726e51772e upstream. Clang needs to be told which target it is building for when cross compiling. Link: https://github.com/ClangBuiltLinux/linux/issues/259 Signed-off-by: Joel Stanley Tested-by: Daniel Axtens # powerpc 64-bit BE Acked-by: Michael Ellerman Reviewed-by: Nick Desaulniers Signed-off-by: Masahiro Yamada [nc: Use 'ifeq ($(cc-name),clang)' instead of 'ifdef CONFIG_CC_IS_CLANG' because that config does not exist in 4.14; the Kconfig rewrite that added that config happened in 4.18] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/boot/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index b479926f0167..e2a5a932c24a 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -49,6 +49,11 @@ endif BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc +ifeq ($(cc-name),clang) +BOOTCFLAGS += $(CLANG_FLAGS) +BOOTAFLAGS += $(CLANG_FLAGS) +endif + ifdef CONFIG_DEBUG_INFO BOOTCFLAGS += -g endif From 1f33700bdf421490aef65bb794dbd9d9837f7ea0 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 2 Nov 2018 11:14:55 +1030 Subject: [PATCH 1350/2608] raid6/ppc: Fix build for clang commit e213574a449f7a57d4202c1869bbc7680b6b5521 upstream. We cannot build these files with clang as it does not allow altivec instructions in assembly when -msoft-float is passed. Jinsong Ji wrote: > We currently disable Altivec/VSX support when enabling soft-float. So > any usage of vector builtins will break. > > Enable Altivec/VSX with soft-float may need quite some clean up work, so > I guess this is currently a limitation. > > Removing -msoft-float will make it work (and we are lucky that no > floating point instructions will be generated as well). This is a workaround until the issue is resolved in clang. Link: https://bugs.llvm.org/show_bug.cgi?id=31177 Link: https://github.com/ClangBuiltLinux/linux/issues/239 Signed-off-by: Joel Stanley Reviewed-by: Nick Desaulniers Signed-off-by: Michael Ellerman [nc: Use 'ifeq ($(cc-name),clang)' instead of 'ifdef CONFIG_CC_IS_CLANG' because that config does not exist in 4.14; the Kconfig rewrite that added that config happened in 4.18] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- lib/raid6/Makefile | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 4add700ddfe3..ad523be0313b 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -18,6 +18,21 @@ quiet_cmd_unroll = UNROLL $@ ifeq ($(CONFIG_ALTIVEC),y) altivec_flags := -maltivec $(call cc-option,-mabi=altivec) + +ifeq ($(cc-name),clang) +# clang ppc port does not yet support -maltivec when -msoft-float is +# enabled. A future release of clang will resolve this +# https://bugs.llvm.org/show_bug.cgi?id=31177 +CFLAGS_REMOVE_altivec1.o += -msoft-float +CFLAGS_REMOVE_altivec2.o += -msoft-float +CFLAGS_REMOVE_altivec4.o += -msoft-float +CFLAGS_REMOVE_altivec8.o += -msoft-float +CFLAGS_REMOVE_altivec8.o += -msoft-float +CFLAGS_REMOVE_vpermxor1.o += -msoft-float +CFLAGS_REMOVE_vpermxor2.o += -msoft-float +CFLAGS_REMOVE_vpermxor4.o += -msoft-float +CFLAGS_REMOVE_vpermxor8.o += -msoft-float +endif endif # The GCC option -ffreestanding is required in order to compile code containing From 14c2cd93e276832e92c1a811a1552bb4036108af Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 9 Nov 2017 13:29:10 +0000 Subject: [PATCH 1351/2608] vhost/vsock: fix uninitialized vhost_vsock->guest_cid commit a72b69dc083a931422cc8a5e33841aff7d5312f2 upstream. The vhost_vsock->guest_cid field is uninitialized when /dev/vhost-vsock is opened until the VHOST_VSOCK_SET_GUEST_CID ioctl is called. kvmalloc(..., GFP_KERNEL | __GFP_RETRY_MAYFAIL) does not zero memory. All other vhost_vsock fields are initialized explicitly so just initialize this field too. Signed-off-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin Cc: Daniel Verkamp Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vsock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 248533c0f9ac..831758335e2c 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -522,6 +522,8 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) goto out; } + vsock->guest_cid = 0; /* no CID assigned yet */ + atomic_set(&vsock->queued_replies, 0); vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX]; From 0be812a800391c6d9447ed15b4bedb63b941b5aa Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 22 Aug 2018 12:45:51 -0400 Subject: [PATCH 1352/2608] dm verity: fix crash on bufio buffer that was allocated with vmalloc commit e4b069e0945fa14c71cf8b5b89f8b1b2aa68dbc2 upstream. Since commit d1ac3ff008fb ("dm verity: switch to using asynchronous hash crypto API") dm-verity uses asynchronous crypto calls for verification, so that it can use hardware with asynchronous processing of crypto operations. These asynchronous calls don't support vmalloc memory, but the buffer data can be allocated with vmalloc if dm-bufio is short of memory and uses a reserved buffer that was preallocated in dm_bufio_client_create(). Fix verity_hash_update() so that it deals with vmalloc'd memory correctly. Reported-by: "Xiao, Jin" Signed-off-by: Mikulas Patocka Fixes: d1ac3ff008fb ("dm verity: switch to using asynchronous hash crypto API") Cc: stable@vger.kernel.org # 4.12+ Signed-off-by: Mike Snitzer Signed-off-by: Sudip Mukherjee Acked-by: Mikulas Patocka Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-target.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index bda3caca23ca..8573c70a1880 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -139,10 +139,26 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req, { struct scatterlist sg; - sg_init_one(&sg, data, len); - ahash_request_set_crypt(req, &sg, NULL, len); - - return verity_complete_op(res, crypto_ahash_update(req)); + if (likely(!is_vmalloc_addr(data))) { + sg_init_one(&sg, data, len); + ahash_request_set_crypt(req, &sg, NULL, len); + return verity_complete_op(res, crypto_ahash_update(req)); + } else { + do { + int r; + size_t this_step = min_t(size_t, len, PAGE_SIZE - offset_in_page(data)); + flush_kernel_vmap_range((void *)data, this_step); + sg_init_table(&sg, 1); + sg_set_page(&sg, vmalloc_to_page(data), this_step, offset_in_page(data)); + ahash_request_set_crypt(req, &sg, NULL, this_step); + r = verity_complete_op(res, crypto_ahash_update(req)); + if (unlikely(r)) + return r; + data += this_step; + len -= this_step; + } while (len); + return 0; + } } /* From a3c16e5f99c7805e3c76d94c4e1575fce11ae9d5 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 30 Nov 2018 15:31:48 +0900 Subject: [PATCH 1353/2608] dm zoned: Fix target BIO completion handling commit d57f9da890696af1484f4a47f7f123560197865a upstream. struct bioctx includes the ref refcount_t to track the number of I/O fragments used to process a target BIO as well as ensure that the zone of the BIO is kept in the active state throughout the lifetime of the BIO. However, since decrementing of this reference count is done in the target .end_io method, the function bio_endio() must be called multiple times for read and write target BIOs, which causes problems with the value of the __bi_remaining struct bio field for chained BIOs (e.g. the clone BIO passed by dm core is large and splits into fragments by the block layer), resulting in incorrect values and inconsistencies with the BIO_CHAIN flag setting. This is turn triggers the BUG_ON() call: BUG_ON(atomic_read(&bio->__bi_remaining) <= 0); in bio_remaining_done() called from bio_endio(). Fix this ensuring that bio_endio() is called only once for any target BIO by always using internal clone BIOs for processing any read or write target BIO. This allows reference counting using the target BIO context counter to trigger the target BIO completion bio_endio() call once all data, metadata and other zone work triggered by the BIO complete. Overall, this simplifies the code too as the target .end_io becomes unnecessary and differences between read and write BIO issuing and completion processing disappear. Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-zoned-target.c | 122 +++++++++++------------------------ 1 file changed, 38 insertions(+), 84 deletions(-) diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index ba6b0a90ecfb..532bfce7f072 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -20,7 +20,6 @@ struct dmz_bioctx { struct dm_zone *zone; struct bio *bio; atomic_t ref; - blk_status_t status; }; /* @@ -78,65 +77,66 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status) { struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); - if (bioctx->status == BLK_STS_OK && status != BLK_STS_OK) - bioctx->status = status; - bio_endio(bio); + if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK) + bio->bi_status = status; + + if (atomic_dec_and_test(&bioctx->ref)) { + struct dm_zone *zone = bioctx->zone; + + if (zone) { + if (bio->bi_status != BLK_STS_OK && + bio_op(bio) == REQ_OP_WRITE && + dmz_is_seq(zone)) + set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags); + dmz_deactivate_zone(zone); + } + bio_endio(bio); + } } /* - * Partial clone read BIO completion callback. This terminates the + * Completion callback for an internally cloned target BIO. This terminates the * target BIO when there are no more references to its context. */ -static void dmz_read_bio_end_io(struct bio *bio) +static void dmz_clone_endio(struct bio *clone) { - struct dmz_bioctx *bioctx = bio->bi_private; - blk_status_t status = bio->bi_status; + struct dmz_bioctx *bioctx = clone->bi_private; + blk_status_t status = clone->bi_status; - bio_put(bio); + bio_put(clone); dmz_bio_endio(bioctx->bio, status); } /* - * Issue a BIO to a zone. The BIO may only partially process the + * Issue a clone of a target BIO. The clone may only partially process the * original target BIO. */ -static int dmz_submit_read_bio(struct dmz_target *dmz, struct dm_zone *zone, - struct bio *bio, sector_t chunk_block, - unsigned int nr_blocks) +static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone, + struct bio *bio, sector_t chunk_block, + unsigned int nr_blocks) { struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); - sector_t sector; struct bio *clone; - /* BIO remap sector */ - sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block); - - /* If the read is not partial, there is no need to clone the BIO */ - if (nr_blocks == dmz_bio_blocks(bio)) { - /* Setup and submit the BIO */ - bio->bi_iter.bi_sector = sector; - atomic_inc(&bioctx->ref); - generic_make_request(bio); - return 0; - } - - /* Partial BIO: we need to clone the BIO */ clone = bio_clone_fast(bio, GFP_NOIO, dmz->bio_set); if (!clone) return -ENOMEM; - /* Setup the clone */ - clone->bi_iter.bi_sector = sector; + bio_set_dev(clone, dmz->dev->bdev); + clone->bi_iter.bi_sector = + dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block); clone->bi_iter.bi_size = dmz_blk2sect(nr_blocks) << SECTOR_SHIFT; - clone->bi_end_io = dmz_read_bio_end_io; + clone->bi_end_io = dmz_clone_endio; clone->bi_private = bioctx; bio_advance(bio, clone->bi_iter.bi_size); - /* Submit the clone */ atomic_inc(&bioctx->ref); generic_make_request(clone); + if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone)) + zone->wp_block += nr_blocks; + return 0; } @@ -214,7 +214,7 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone, if (nr_blocks) { /* Valid blocks found: read them */ nr_blocks = min_t(unsigned int, nr_blocks, end_block - chunk_block); - ret = dmz_submit_read_bio(dmz, rzone, bio, chunk_block, nr_blocks); + ret = dmz_submit_bio(dmz, rzone, bio, chunk_block, nr_blocks); if (ret) return ret; chunk_block += nr_blocks; @@ -228,25 +228,6 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone, return 0; } -/* - * Issue a write BIO to a zone. - */ -static void dmz_submit_write_bio(struct dmz_target *dmz, struct dm_zone *zone, - struct bio *bio, sector_t chunk_block, - unsigned int nr_blocks) -{ - struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); - - /* Setup and submit the BIO */ - bio_set_dev(bio, dmz->dev->bdev); - bio->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block); - atomic_inc(&bioctx->ref); - generic_make_request(bio); - - if (dmz_is_seq(zone)) - zone->wp_block += nr_blocks; -} - /* * Write blocks directly in a data zone, at the write pointer. * If a buffer zone is assigned, invalidate the blocks written @@ -265,7 +246,9 @@ static int dmz_handle_direct_write(struct dmz_target *dmz, return -EROFS; /* Submit write */ - dmz_submit_write_bio(dmz, zone, bio, chunk_block, nr_blocks); + ret = dmz_submit_bio(dmz, zone, bio, chunk_block, nr_blocks); + if (ret) + return ret; /* * Validate the blocks in the data zone and invalidate @@ -301,7 +284,9 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz, return -EROFS; /* Submit write */ - dmz_submit_write_bio(dmz, bzone, bio, chunk_block, nr_blocks); + ret = dmz_submit_bio(dmz, bzone, bio, chunk_block, nr_blocks); + if (ret) + return ret; /* * Validate the blocks in the buffer zone @@ -600,7 +585,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) bioctx->zone = NULL; bioctx->bio = bio; atomic_set(&bioctx->ref, 1); - bioctx->status = BLK_STS_OK; /* Set the BIO pending in the flush list */ if (!nr_sectors && bio_op(bio) == REQ_OP_WRITE) { @@ -623,35 +607,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } -/* - * Completed target BIO processing. - */ -static int dmz_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error) -{ - struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); - - if (bioctx->status == BLK_STS_OK && *error) - bioctx->status = *error; - - if (!atomic_dec_and_test(&bioctx->ref)) - return DM_ENDIO_INCOMPLETE; - - /* Done */ - bio->bi_status = bioctx->status; - - if (bioctx->zone) { - struct dm_zone *zone = bioctx->zone; - - if (*error && bio_op(bio) == REQ_OP_WRITE) { - if (dmz_is_seq(zone)) - set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags); - } - dmz_deactivate_zone(zone); - } - - return DM_ENDIO_DONE; -} - /* * Get zoned device information. */ @@ -946,7 +901,6 @@ static struct target_type dmz_type = { .ctr = dmz_ctr, .dtr = dmz_dtr, .map = dmz_map, - .end_io = dmz_end_io, .io_hints = dmz_io_hints, .prepare_ioctl = dmz_prepare_ioctl, .postsuspend = dmz_suspend, From 89c7ba90185dc9af684a17027300899e189039cd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 8 Jan 2019 10:43:30 +0300 Subject: [PATCH 1354/2608] ALSA: cs46xx: Potential NULL dereference in probe commit 1524f4e47f90b27a3ac84efbdd94c63172246a6f upstream. The "chip->dsp_spos_instance" can be NULL on some of the ealier error paths in snd_cs46xx_create(). Reported-by: "Yavuz, Tuba" Signed-off-by: Dan Carpenter Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/cs46xx/dsp_spos.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/cs46xx/dsp_spos.c b/sound/pci/cs46xx/dsp_spos.c index aa61615288ff..f03bbd0eb027 100644 --- a/sound/pci/cs46xx/dsp_spos.c +++ b/sound/pci/cs46xx/dsp_spos.c @@ -900,6 +900,9 @@ int cs46xx_dsp_proc_done (struct snd_cs46xx *chip) struct dsp_spos_instance * ins = chip->dsp_spos_instance; int i; + if (!ins) + return 0; + snd_info_free_entry(ins->proc_sym_info_entry); ins->proc_sym_info_entry = NULL; From 1117b7a380f4b4f575d90a318a43d2e168c01fbd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 19 Dec 2018 12:36:27 +0100 Subject: [PATCH 1355/2608] ALSA: usb-audio: Avoid access before bLength check in build_audio_procunit() commit f4351a199cc120ff9d59e06d02e8657d08e6cc46 upstream. The parser for the processing unit reads bNrInPins field before the bLength sanity check, which may lead to an out-of-bound access when a malformed descriptor is given. Fix it by assignment after the bLength check. Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 4d950b7c2f97..b3be0d432a75 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1888,7 +1888,7 @@ static int build_audio_procunit(struct mixer_build *state, int unitid, char *name) { struct uac_processing_unit_descriptor *desc = raw_desc; - int num_ins = desc->bNrInPins; + int num_ins; struct usb_mixer_elem_info *cval; struct snd_kcontrol *kctl; int i, err, nameid, type, len; @@ -1903,7 +1903,13 @@ static int build_audio_procunit(struct mixer_build *state, int unitid, 0, NULL, default_value_info }; - if (desc->bLength < 13 || desc->bLength < 13 + num_ins || + if (desc->bLength < 13) { + usb_audio_err(state->chip, "invalid %s descriptor (id %d)\n", name, unitid); + return -EINVAL; + } + + num_ins = desc->bNrInPins; + if (desc->bLength < 13 + num_ins || desc->bLength < num_ins + uac_processing_unit_bControlSize(desc, state->mixer->protocol)) { usb_audio_err(state->chip, "invalid %s descriptor (id %d)\n", name, unitid); return -EINVAL; From 385c23d626937494f90563a563c210b15c371b2a Mon Sep 17 00:00:00 2001 From: Hui Peng Date: Tue, 25 Dec 2018 18:11:52 -0500 Subject: [PATCH 1356/2608] ALSA: usb-audio: Fix an out-of-bound read in create_composite_quirks commit cbb2ebf70daf7f7d97d3811a2ff8e39655b8c184 upstream. In `create_composite_quirk`, the terminating condition of for loops is `quirk->ifnum < 0`. So any composite quirks should end with `struct snd_usb_audio_quirk` object with ifnum < 0. for (quirk = quirk_comp->data; quirk->ifnum >= 0; ++quirk) { ..... } the data field of Bower's & Wilkins PX headphones usb device device quirks do not end with {.ifnum = -1}, wihch may result in out-of-bound read. This Patch fix the bug by adding an ending quirk object. Fixes: 240a8af929c7 ("ALSA: usb-audio: Add a quirck for B&W PX headphones") Signed-off-by: Hui Peng Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks-table.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 15cbe2565703..d32727c74a16 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3321,6 +3321,9 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), } } }, + { + .ifnum = -1 + }, } } }, From 8f0bfd40724072cd79afa7c77b77036c4490aa8b Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 15 Nov 2018 13:15:05 +0300 Subject: [PATCH 1357/2608] dlm: fixed memory leaks after failed ls_remove_names allocation commit b982896cdb6e6a6b89d86dfb39df489d9df51e14 upstream. If allocation fails on last elements of array need to free already allocated elements. v2: just move existing out_rsbtbl label to right place Fixes 789924ba635f ("dlm: fix race between remove and lookup") Cc: stable@kernel.org # 3.6 Signed-off-by: Vasily Averin Signed-off-by: David Teigland Signed-off-by: Greg Kroah-Hartman --- fs/dlm/lockspace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 78a7c855b06b..610f72ae7ad6 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -680,11 +680,11 @@ static int new_lockspace(const char *name, const char *cluster, kfree(ls->ls_recover_buf); out_lkbidr: idr_destroy(&ls->ls_lkbidr); + out_rsbtbl: for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) { if (ls->ls_remove_names[i]) kfree(ls->ls_remove_names[i]); } - out_rsbtbl: vfree(ls->ls_rsbtbl); out_lsfree: if (do_unreg) From f3d0f0a912776fb03e3499c62002d162d047c8c4 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 15 Nov 2018 13:18:18 +0300 Subject: [PATCH 1358/2608] dlm: possible memory leak on error path in create_lkb() commit 23851e978f31eda8b2d01bd410d3026659ca06c7 upstream. Fixes 3d6aa675fff9 ("dlm: keep lkbs in idr") Cc: stable@kernel.org # 3.1 Signed-off-by: Vasily Averin Signed-off-by: David Teigland Signed-off-by: Greg Kroah-Hartman --- fs/dlm/lock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index d4aaddec1b16..eb58f385b662 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -1210,6 +1210,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) if (rv < 0) { log_error(ls, "create_lkb idr error %d", rv); + dlm_free_lkb(lkb); return rv; } From 6c03da607b3339011c56560f007afb5307b9aa73 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 15 Nov 2018 13:18:24 +0300 Subject: [PATCH 1359/2608] dlm: lost put_lkb on error path in receive_convert() and receive_unlock() commit c0174726c3976e67da8649ac62cae43220ae173a upstream. Fixes 6d40c4a708e0 ("dlm: improve error and debug messages") Cc: stable@kernel.org # 3.5 Signed-off-by: Vasily Averin Signed-off-by: David Teigland Signed-off-by: Greg Kroah-Hartman --- fs/dlm/lock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index eb58f385b662..311a1c24e5ea 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -4177,6 +4177,7 @@ static int receive_convert(struct dlm_ls *ls, struct dlm_message *ms) (unsigned long long)lkb->lkb_recover_seq, ms->m_header.h_nodeid, ms->m_lkid); error = -ENOENT; + dlm_put_lkb(lkb); goto fail; } @@ -4230,6 +4231,7 @@ static int receive_unlock(struct dlm_ls *ls, struct dlm_message *ms) lkb->lkb_id, lkb->lkb_remid, ms->m_header.h_nodeid, ms->m_lkid); error = -ENOENT; + dlm_put_lkb(lkb); goto fail; } From f69ed08c4197a131949c1ac4ec067ef7d8104bc5 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 15 Nov 2018 13:18:56 +0300 Subject: [PATCH 1360/2608] dlm: memory leaks on error path in dlm_user_request() commit d47b41aceeadc6b58abc9c7c6485bef7cfb75636 upstream. According to comment in dlm_user_request() ua should be freed in dlm_free_lkb() after successful attach to lkb. However ua is attached to lkb not in set_lock_args() but later, inside request_lock(). Fixes 597d0cae0f99 ("[DLM] dlm: user locks") Cc: stable@kernel.org # 2.6.19 Signed-off-by: Vasily Averin Signed-off-by: David Teigland Signed-off-by: Greg Kroah-Hartman --- fs/dlm/lock.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 311a1c24e5ea..21643d2b3fee 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -5792,20 +5792,20 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, goto out; } } - - /* After ua is attached to lkb it will be freed by dlm_free_lkb(). - When DLM_IFL_USER is set, the dlm knows that this is a userspace - lock and that lkb_astparam is the dlm_user_args structure. */ - error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs, fake_astfn, ua, fake_bastfn, &args); - lkb->lkb_flags |= DLM_IFL_USER; - if (error) { + kfree(ua->lksb.sb_lvbptr); + ua->lksb.sb_lvbptr = NULL; + kfree(ua); __put_lkb(ls, lkb); goto out; } + /* After ua is attached to lkb it will be freed by dlm_free_lkb(). + When DLM_IFL_USER is set, the dlm knows that this is a userspace + lock and that lkb_astparam is the dlm_user_args structure. */ + lkb->lkb_flags |= DLM_IFL_USER; error = request_lock(ls, lkb, name, namelen, &args); switch (error) { From 26538d51467f9255be56a354f103928d2acf5590 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 26 Nov 2018 18:45:35 +0100 Subject: [PATCH 1361/2608] gfs2: Get rid of potential double-freeing in gfs2_create_inode commit 6ff9b09e00a441599f3aacdf577254455a048bc9 upstream. In gfs2_create_inode, after setting and releasing the acl / default_acl, the acl / default_acl pointers are not set to NULL as they should be. In that state, when the function reaches label fail_free_acls, gfs2_create_inode will try to release the same acls again. Fix that by setting the pointers to NULL after releasing the acls. Slightly simplify the logic. Also, posix_acl_release checks for NULL already, so there is no need to duplicate those checks here. Fixes: e01580bf9e4d ("gfs2: use generic posix ACL infrastructure") Reported-by: Pan Bian Cc: Christoph Hellwig Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/inode.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 863749e29bf9..c850579ae5a4 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -743,17 +743,19 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, the gfs2 structures. */ if (default_acl) { error = __gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + if (error) + goto fail_gunlock3; posix_acl_release(default_acl); + default_acl = NULL; } if (acl) { - if (!error) - error = __gfs2_set_acl(inode, acl, ACL_TYPE_ACCESS); + error = __gfs2_set_acl(inode, acl, ACL_TYPE_ACCESS); + if (error) + goto fail_gunlock3; posix_acl_release(acl); + acl = NULL; } - if (error) - goto fail_gunlock3; - error = security_inode_init_security(&ip->i_inode, &dip->i_inode, name, &gfs2_initxattrs, NULL); if (error) @@ -788,10 +790,8 @@ fail_free_inode: } gfs2_rsqa_delete(ip, NULL); fail_free_acls: - if (default_acl) - posix_acl_release(default_acl); - if (acl) - posix_acl_release(acl); + posix_acl_release(default_acl); + posix_acl_release(acl); fail_gunlock: gfs2_dir_no_add(&da); gfs2_glock_dq_uninit(ghs); From 6d75eb875b67b984e9804fa485e60716b363eaf5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 4 Dec 2018 15:06:27 +0100 Subject: [PATCH 1362/2608] gfs2: Fix loop in gfs2_rbm_find commit 2d29f6b96d8f80322ed2dd895bca590491c38d34 upstream. Fix the resource group wrap-around logic in gfs2_rbm_find that commit e579ed4f44 broke. The bug can lead to unnecessary repeated scanning of the same bitmaps; there is a risk that future changes will turn this into an endless loop. Fixes: e579ed4f44 ("GFS2: Introduce rbm field bii") Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index b0eee90738ff..914cb3d72ddf 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1695,9 +1695,9 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, goto next_iter; } if (ret == -E2BIG) { + n += rbm->bii - initial_bii; rbm->bii = 0; rbm->offset = 0; - n += (rbm->bii - initial_bii); goto res_covered_end_of_rgrp; } return ret; From b274cc25da938698dbc525f9a914f82fb3678c19 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 19 Nov 2018 20:01:24 +0200 Subject: [PATCH 1363/2608] b43: Fix error in cordic routine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8ea3819c0bbef57a51d8abe579e211033e861677 upstream. The cordic routine for calculating sines and cosines that was added in commit 6f98e62a9f1b ("b43: update cordic code to match current specs") contains an error whereby a quantity declared u32 can in fact go negative. This problem was detected by Priit Laes who is switching b43 to use the routine in the library functions of the kernel. Fixes: 986504540306 ("b43: make cordic common (LP-PHY and N-PHY need it)") Reported-by: Priit Laes Cc: Rafał Miłecki Cc: Stable # 2.6.34 Signed-off-by: Larry Finger Signed-off-by: Priit Laes Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/b43/phy_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/b43/phy_common.c b/drivers/net/wireless/broadcom/b43/phy_common.c index 85f2ca989565..ef3ffa5ad466 100644 --- a/drivers/net/wireless/broadcom/b43/phy_common.c +++ b/drivers/net/wireless/broadcom/b43/phy_common.c @@ -616,7 +616,7 @@ struct b43_c32 b43_cordic(int theta) u8 i; s32 tmp; s8 signx = 1; - u32 angle = 0; + s32 angle = 0; struct b43_c32 ret = { .i = 39797, .q = 0, }; while (theta > (180 << 16)) From 2524f5d6f4603e6dece56ea62c09ced0d6a635b9 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Tue, 23 Oct 2018 09:02:17 +0200 Subject: [PATCH 1364/2608] selinux: policydb - fix byte order and alignment issues commit 5df275cd4cf51c86d49009f1397132f284ba515e upstream. Do the LE conversions before doing the Infiniband-related range checks. The incorrect checks are otherwise causing a failure to load any policy with an ibendportcon rule on BE systems. This can be reproduced by running (on e.g. ppc64): cat >my_module.cil < Cc: Eli Cohen Cc: James Morris Cc: Doug Ledford Cc: # 4.13+ Fixes: a806f7a1616f ("selinux: Create policydb version for Infiniband support") Signed-off-by: Ondrej Mosnacek Acked-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/ss/policydb.c | 51 ++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 6688ac5b991e..ffeb644bfecd 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -2107,6 +2107,7 @@ static int ocontext_read(struct policydb *p, struct policydb_compat_info *info, { int i, j, rc; u32 nel, len; + __be64 prefixbuf[1]; __le32 buf[3]; struct ocontext *l, *c; u32 nodebuf[8]; @@ -2216,21 +2217,30 @@ static int ocontext_read(struct policydb *p, struct policydb_compat_info *info, goto out; break; } - case OCON_IBPKEY: - rc = next_entry(nodebuf, fp, sizeof(u32) * 4); + case OCON_IBPKEY: { + u32 pkey_lo, pkey_hi; + + rc = next_entry(prefixbuf, fp, sizeof(u64)); if (rc) goto out; - c->u.ibpkey.subnet_prefix = be64_to_cpu(*((__be64 *)nodebuf)); + /* we need to have subnet_prefix in CPU order */ + c->u.ibpkey.subnet_prefix = be64_to_cpu(prefixbuf[0]); - if (nodebuf[2] > 0xffff || - nodebuf[3] > 0xffff) { + rc = next_entry(buf, fp, sizeof(u32) * 2); + if (rc) + goto out; + + pkey_lo = le32_to_cpu(buf[0]); + pkey_hi = le32_to_cpu(buf[1]); + + if (pkey_lo > U16_MAX || pkey_hi > U16_MAX) { rc = -EINVAL; goto out; } - c->u.ibpkey.low_pkey = le32_to_cpu(nodebuf[2]); - c->u.ibpkey.high_pkey = le32_to_cpu(nodebuf[3]); + c->u.ibpkey.low_pkey = pkey_lo; + c->u.ibpkey.high_pkey = pkey_hi; rc = context_read_and_validate(&c->context[0], p, @@ -2238,7 +2248,10 @@ static int ocontext_read(struct policydb *p, struct policydb_compat_info *info, if (rc) goto out; break; - case OCON_IBENDPORT: + } + case OCON_IBENDPORT: { + u32 port; + rc = next_entry(buf, fp, sizeof(u32) * 2); if (rc) goto out; @@ -2248,12 +2261,13 @@ static int ocontext_read(struct policydb *p, struct policydb_compat_info *info, if (rc) goto out; - if (buf[1] > 0xff || buf[1] == 0) { + port = le32_to_cpu(buf[1]); + if (port > U8_MAX || port == 0) { rc = -EINVAL; goto out; } - c->u.ibendport.port = le32_to_cpu(buf[1]); + c->u.ibendport.port = port; rc = context_read_and_validate(&c->context[0], p, @@ -2261,7 +2275,8 @@ static int ocontext_read(struct policydb *p, struct policydb_compat_info *info, if (rc) goto out; break; - } + } /* end case */ + } /* end switch */ } } rc = 0; @@ -3104,6 +3119,7 @@ static int ocontext_write(struct policydb *p, struct policydb_compat_info *info, { unsigned int i, j, rc; size_t nel, len; + __be64 prefixbuf[1]; __le32 buf[3]; u32 nodebuf[8]; struct ocontext *c; @@ -3191,12 +3207,17 @@ static int ocontext_write(struct policydb *p, struct policydb_compat_info *info, return rc; break; case OCON_IBPKEY: - *((__be64 *)nodebuf) = cpu_to_be64(c->u.ibpkey.subnet_prefix); + /* subnet_prefix is in CPU order */ + prefixbuf[0] = cpu_to_be64(c->u.ibpkey.subnet_prefix); - nodebuf[2] = cpu_to_le32(c->u.ibpkey.low_pkey); - nodebuf[3] = cpu_to_le32(c->u.ibpkey.high_pkey); + rc = put_entry(prefixbuf, sizeof(u64), 1, fp); + if (rc) + return rc; - rc = put_entry(nodebuf, sizeof(u32), 4, fp); + buf[0] = cpu_to_le32(c->u.ibpkey.low_pkey); + buf[1] = cpu_to_le32(c->u.ibpkey.high_pkey); + + rc = put_entry(buf, sizeof(u32), 2, fp); if (rc) return rc; rc = context_write(p, &c->context[0], fp); From 01b79d20008d26d9e908551bd32f01ed0cbf6f53 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 1 Nov 2018 13:39:49 -0400 Subject: [PATCH 1365/2608] lockd: Show pid of lockd for remote locks commit b8eee0e90f9797b747113638bc75e739b192ad38 upstream. Commit 9d5b86ac13c5 ("fs/locks: Remove fl_nspid and use fs-specific l_pid for remote locks") specified that the l_pid returned for F_GETLK on a local file that has a remote lock should be the pid of the lock manager process. That commit, while updating other filesystems, failed to update lockd, such that locks created by lockd had their fl_pid set to that of the remote process holding the lock. Fix that here to be the pid of lockd. Also, fix the client case so that the returned lock pid is negative, which indicates a remote lock on a remote file. Fixes: 9d5b86ac13c5 ("fs/locks: Remove fl_nspid and use fs-specific...") Cc: stable@vger.kernel.org Signed-off-by: Benjamin Coddington Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/lockd/clntproc.c | 2 +- fs/lockd/xdr.c | 4 ++-- fs/lockd/xdr4.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 066ac313ae5c..84857ffd2bb8 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -442,7 +442,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl) fl->fl_start = req->a_res.lock.fl.fl_start; fl->fl_end = req->a_res.lock.fl.fl_end; fl->fl_type = req->a_res.lock.fl.fl_type; - fl->fl_pid = 0; + fl->fl_pid = -req->a_res.lock.fl.fl_pid; break; default: status = nlm_stat_to_errno(req->a_res.status); diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 7147e4aebecc..9846f7e95282 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -127,7 +127,7 @@ nlm_decode_lock(__be32 *p, struct nlm_lock *lock) locks_init_lock(fl); fl->fl_owner = current->files; - fl->fl_pid = (pid_t)lock->svid; + fl->fl_pid = current->tgid; fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; /* as good as anything else */ start = ntohl(*p++); @@ -269,7 +269,7 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) memset(lock, 0, sizeof(*lock)); locks_init_lock(&lock->fl); lock->svid = ~(u32) 0; - lock->fl.fl_pid = (pid_t)lock->svid; + lock->fl.fl_pid = current->tgid; if (!(p = nlm_decode_cookie(p, &argp->cookie)) || !(p = xdr_decode_string_inplace(p, &lock->caller, diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 7ed9edf9aed4..70154f376695 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -119,7 +119,7 @@ nlm4_decode_lock(__be32 *p, struct nlm_lock *lock) locks_init_lock(fl); fl->fl_owner = current->files; - fl->fl_pid = (pid_t)lock->svid; + fl->fl_pid = current->tgid; fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; /* as good as anything else */ p = xdr_decode_hyper(p, &start); @@ -266,7 +266,7 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) memset(lock, 0, sizeof(*lock)); locks_init_lock(&lock->fl); lock->svid = ~(u32) 0; - lock->fl.fl_pid = (pid_t)lock->svid; + lock->fl.fl_pid = current->tgid; if (!(p = nlm4_decode_cookie(p, &argp->cookie)) || !(p = xdr_decode_string_inplace(p, &lock->caller, From 28493b4147ea3f982e9cff269dc068ac0a98be74 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 1 Mar 2018 17:19:01 +0000 Subject: [PATCH 1366/2608] scripts/kallsyms: filter arm64's __efistub_ symbols commit 1212f7a16af492d59304ba3abccbcc5b5e41423e upstream. On arm64, the EFI stub and the kernel proper are essentially the same binary, although the EFI stub executes at a different virtual address as the kernel. For this reason, the EFI stub is restricted in the symbols it can link to, which is ensured by prefixing all EFI stub symbols with __efistub_ (and emitting __efistub_ prefixed aliases for routines that may be shared between the core kernel and the stub) These symbols are leaking into kallsyms, polluting the namespace, so let's filter them explicitly. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon Cc: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- scripts/kallsyms.c | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 9ee9bf7fd1a2..1dd24c5b9b47 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -221,6 +221,7 @@ static int symbol_valid(struct sym_entry *s) static char *special_prefixes[] = { "__crc_", /* modversions */ + "__efistub_", /* arm64 EFI stub namespace */ NULL }; static char *special_suffixes[] = { From b3424be0753e229112d6c4ef266737f10e958b5e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 8 Jan 2019 13:15:49 -0800 Subject: [PATCH 1367/2608] arm64: drop linker script hack to hide __efistub_ symbols commit dd6846d774693bfa27d7db4dae5ea67dfe373fa1 upstream. Commit 1212f7a16af4 ("scripts/kallsyms: filter arm64's __efistub_ symbols") updated the kallsyms code to filter out symbols with the __efistub_ prefix explicitly, so we no longer require the hack in our linker script to emit them as absolute symbols. Cc: Nick Desaulniers Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon [ND: adjusted for context] Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/image.h | 44 +++++++++++++++------------------------ 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index c7fcb232fe47..40f9f0b078a4 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -75,16 +75,6 @@ __efistub_stext_offset = stext - _text; -/* - * Prevent the symbol aliases below from being emitted into the kallsyms - * table, by forcing them to be absolute symbols (which are conveniently - * ignored by scripts/kallsyms) rather than section relative symbols. - * The distinction is only relevant for partial linking, and only for symbols - * that are defined within a section declaration (which is not the case for - * the definitions below) so the resulting values will be identical. - */ -#define KALLSYMS_HIDE(sym) ABSOLUTE(sym) - /* * The EFI stub has its own symbol namespace prefixed by __efistub_, to * isolate it from the kernel proper. The following symbols are legally @@ -94,27 +84,27 @@ __efistub_stext_offset = stext - _text; * linked at. The routines below are all implemented in assembler in a * position independent manner */ -__efistub_memcmp = KALLSYMS_HIDE(__pi_memcmp); -__efistub_memchr = KALLSYMS_HIDE(__pi_memchr); -__efistub_memcpy = KALLSYMS_HIDE(__pi_memcpy); -__efistub_memmove = KALLSYMS_HIDE(__pi_memmove); -__efistub_memset = KALLSYMS_HIDE(__pi_memset); -__efistub_strlen = KALLSYMS_HIDE(__pi_strlen); -__efistub_strnlen = KALLSYMS_HIDE(__pi_strnlen); -__efistub_strcmp = KALLSYMS_HIDE(__pi_strcmp); -__efistub_strncmp = KALLSYMS_HIDE(__pi_strncmp); -__efistub___flush_dcache_area = KALLSYMS_HIDE(__pi___flush_dcache_area); +__efistub_memcmp = __pi_memcmp; +__efistub_memchr = __pi_memchr; +__efistub_memcpy = __pi_memcpy; +__efistub_memmove = __pi_memmove; +__efistub_memset = __pi_memset; +__efistub_strlen = __pi_strlen; +__efistub_strnlen = __pi_strnlen; +__efistub_strcmp = __pi_strcmp; +__efistub_strncmp = __pi_strncmp; +__efistub___flush_dcache_area = __pi___flush_dcache_area; #ifdef CONFIG_KASAN -__efistub___memcpy = KALLSYMS_HIDE(__pi_memcpy); -__efistub___memmove = KALLSYMS_HIDE(__pi_memmove); -__efistub___memset = KALLSYMS_HIDE(__pi_memset); +__efistub___memcpy = __pi_memcpy; +__efistub___memmove = __pi_memmove; +__efistub___memset = __pi_memset; #endif -__efistub__text = KALLSYMS_HIDE(_text); -__efistub__end = KALLSYMS_HIDE(_end); -__efistub__edata = KALLSYMS_HIDE(_edata); -__efistub_screen_info = KALLSYMS_HIDE(screen_info); +__efistub__text = _text; +__efistub__end = _end; +__efistub__edata = _edata; +__efistub_screen_info = screen_info; #endif From f21ce3cdff2f2be72fd85efab90785307384eed6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 3 Dec 2018 20:58:05 +0100 Subject: [PATCH 1368/2608] arm64: relocatable: fix inconsistencies in linker script and options commit 3bbd3db86470c701091fb1d67f1fab6621debf50 upstream. readelf complains about the section layout of vmlinux when building with CONFIG_RELOCATABLE=y (for KASLR): readelf: Warning: [21]: Link field (0) should index a symtab section. readelf: Warning: [21]: Info field (0) should index a relocatable section. Also, it seems that our use of '-pie -shared' is contradictory, and thus ambiguous. In general, the way KASLR is wired up at the moment is highly tailored to how ld.bfd happens to implement (and conflate) PIE executables and shared libraries, so given the current effort to support other toolchains, let's fix some of these issues as well. - Drop the -pie linker argument and just leave -shared. In ld.bfd, the differences between them are unclear (except for the ELF type of the produced image [0]) but lld chokes on seeing both at the same time. - Rename the .rela output section to .rela.dyn, as is customary for shared libraries and PIE executables, so that it is not misidentified by readelf as a static relocation section (producing the warnings above). - Pass the -z notext and -z norelro options to explicitly instruct the linker to permit text relocations, and to omit the RELRO program header (which requires a certain section layout that we don't adhere to in the kernel). These are the defaults for current versions of ld.bfd. - Discard .eh_frame and .gnu.hash sections to avoid them from being emitted between .head.text and .text, screwing up the section layout. These changes only affect the ELF image, and produce the same binary image. [0] b9dce7f1ba01 ("arm64: kernel: force ET_DYN ELF type for ...") Cc: Nick Desaulniers Cc: Peter Smith Tested-by: Nick Desaulniers Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Makefile | 2 +- arch/arm64/kernel/vmlinux.lds.S | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 48f2b3657507..0c5f70e6d5cf 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -18,7 +18,7 @@ ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour # for relative relocs, since this leads to better Image compression # with the relocation offsets always being zero. -LDFLAGS_vmlinux += -pie -shared -Bsymbolic \ +LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro \ $(call ld-option, --no-apply-dynamic-relocs) endif diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index ddfd3c0942f7..6edfdf5b061d 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -99,7 +99,8 @@ SECTIONS *(.discard) *(.discard.*) *(.interp .dynamic) - *(.dynsym .dynstr .hash) + *(.dynsym .dynstr .hash .gnu.hash) + *(.eh_frame) } . = KIMAGE_VADDR + TEXT_OFFSET; @@ -176,12 +177,12 @@ SECTIONS PERCPU_SECTION(L1_CACHE_BYTES) - .rela : ALIGN(8) { + .rela.dyn : ALIGN(8) { *(.rela .rela*) } - __rela_offset = ABSOLUTE(ADDR(.rela) - KIMAGE_VADDR); - __rela_size = SIZEOF(.rela); + __rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR); + __rela_size = SIZEOF(.rela.dyn); . = ALIGN(SEGMENT_ALIGN); __initdata_end = .; From b57b508bdcfd365b63e04ec6d9152e045e1cd476 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 21 Nov 2018 17:21:09 -0200 Subject: [PATCH 1369/2608] powerpc/tm: Set MSR[TS] just prior to recheckpoint commit e1c3743e1a20647c53b719dbf28b48f45d23f2cd upstream. On a signal handler return, the user could set a context with MSR[TS] bits set, and these bits would be copied to task regs->msr. At restore_tm_sigcontexts(), after current task regs->msr[TS] bits are set, several __get_user() are called and then a recheckpoint is executed. This is a problem since a page fault (in kernel space) could happen when calling __get_user(). If it happens, the process MSR[TS] bits were already set, but recheckpoint was not executed, and SPRs are still invalid. The page fault can cause the current process to be de-scheduled, with MSR[TS] active and without tm_recheckpoint() being called. More importantly, without TEXASR[FS] bit set also. Since TEXASR might not have the FS bit set, and when the process is scheduled back, it will try to reclaim, which will be aborted because of the CPU is not in the suspended state, and, then, recheckpoint. This recheckpoint will restore thread->texasr into TEXASR SPR, which might be zero, hitting a BUG_ON(). kernel BUG at /build/linux-sf3Co9/linux-4.9.30/arch/powerpc/kernel/tm.S:434! cpu 0xb: Vector: 700 (Program Check) at [c00000041f1576d0] pc: c000000000054550: restore_gprs+0xb0/0x180 lr: 0000000000000000 sp: c00000041f157950 msr: 8000000100021033 current = 0xc00000041f143000 paca = 0xc00000000fb86300 softe: 0 irq_happened: 0x01 pid = 1021, comm = kworker/11:1 kernel BUG at /build/linux-sf3Co9/linux-4.9.30/arch/powerpc/kernel/tm.S:434! Linux version 4.9.0-3-powerpc64le (debian-kernel@lists.debian.org) (gcc version 6.3.0 20170516 (Debian 6.3.0-18) ) #1 SMP Debian 4.9.30-2+deb9u2 (2017-06-26) enter ? for help [c00000041f157b30] c00000000001bc3c tm_recheckpoint.part.11+0x6c/0xa0 [c00000041f157b70] c00000000001d184 __switch_to+0x1e4/0x4c0 [c00000041f157bd0] c00000000082eeb8 __schedule+0x2f8/0x990 [c00000041f157cb0] c00000000082f598 schedule+0x48/0xc0 [c00000041f157ce0] c0000000000f0d28 worker_thread+0x148/0x610 [c00000041f157d80] c0000000000f96b0 kthread+0x120/0x140 [c00000041f157e30] c00000000000c0e0 ret_from_kernel_thread+0x5c/0x7c This patch simply delays the MSR[TS] set, so, if there is any page fault in the __get_user() section, it does not have regs->msr[TS] set, since the TM structures are still invalid, thus avoiding doing TM operations for in-kernel exceptions and possible process reschedule. With this patch, the MSR[TS] will only be set just before recheckpointing and setting TEXASR[FS] = 1, thus avoiding an interrupt with TM registers in invalid state. Other than that, if CONFIG_PREEMPT is set, there might be a preemption just after setting MSR[TS] and before tm_recheckpoint(), thus, this block must be atomic from a preemption perspective, thus, calling preempt_disable/enable() on this code. It is not possible to move tm_recheckpoint to happen earlier, because it is required to get the checkpointed registers from userspace, with __get_user(), thus, the only way to avoid this undesired behavior is delaying the MSR[TS] set. The 32-bits signal handler seems to be safe this current issue, but, it might be exposed to the preemption issue, thus, disabling preemption in this chunk of code. Changes from v2: * Run the critical section with preempt_disable. Fixes: 87b4e5393af7 ("powerpc/tm: Fix return of active 64bit signals") Cc: stable@vger.kernel.org (v3.9+) Signed-off-by: Breno Leitao Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/signal_32.c | 20 ++++++++++++++- arch/powerpc/kernel/signal_64.c | 44 ++++++++++++++++++++++----------- 2 files changed, 49 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 92fb1c8dbbd8..636ea854808e 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -866,7 +866,23 @@ static long restore_tm_user_regs(struct pt_regs *regs, /* If TM bits are set to the reserved value, it's an invalid context */ if (MSR_TM_RESV(msr_hi)) return 1; - /* Pull in the MSR TM bits from the user context */ + + /* + * Disabling preemption, since it is unsafe to be preempted + * with MSR[TS] set without recheckpointing. + */ + preempt_disable(); + + /* + * CAUTION: + * After regs->MSR[TS] being updated, make sure that get_user(), + * put_user() or similar functions are *not* called. These + * functions can generate page faults which will cause the process + * to be de-scheduled with MSR[TS] set but without calling + * tm_recheckpoint(). This can cause a bug. + * + * Pull in the MSR TM bits from the user context + */ regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK); /* Now, recheckpoint. This loads up all of the checkpointed (older) * registers, including FP and V[S]Rs. After recheckpointing, the @@ -891,6 +907,8 @@ static long restore_tm_user_regs(struct pt_regs *regs, } #endif + preempt_enable(); + return 0; } #endif diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index b2c002993d78..979b9463e17b 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -452,20 +452,6 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, if (MSR_TM_RESV(msr)) return -EINVAL; - /* pull in MSR TS bits from user context */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); - - /* - * Ensure that TM is enabled in regs->msr before we leave the signal - * handler. It could be the case that (a) user disabled the TM bit - * through the manipulation of the MSR bits in uc_mcontext or (b) the - * TM bit was disabled because a sufficient number of context switches - * happened whilst in the signal handler and load_tm overflowed, - * disabling the TM bit. In either case we can end up with an illegal - * TM state leading to a TM Bad Thing when we return to userspace. - */ - regs->msr |= MSR_TM; - /* pull in MSR LE from user context */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); @@ -557,6 +543,34 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, tm_enable(); /* Make sure the transaction is marked as failed */ tsk->thread.tm_texasr |= TEXASR_FS; + + /* + * Disabling preemption, since it is unsafe to be preempted + * with MSR[TS] set without recheckpointing. + */ + preempt_disable(); + + /* pull in MSR TS bits from user context */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); + + /* + * Ensure that TM is enabled in regs->msr before we leave the signal + * handler. It could be the case that (a) user disabled the TM bit + * through the manipulation of the MSR bits in uc_mcontext or (b) the + * TM bit was disabled because a sufficient number of context switches + * happened whilst in the signal handler and load_tm overflowed, + * disabling the TM bit. In either case we can end up with an illegal + * TM state leading to a TM Bad Thing when we return to userspace. + * + * CAUTION: + * After regs->MSR[TS] being updated, make sure that get_user(), + * put_user() or similar functions are *not* called. These + * functions can generate page faults which will cause the process + * to be de-scheduled with MSR[TS] set but without calling + * tm_recheckpoint(). This can cause a bug. + */ + regs->msr |= MSR_TM; + /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(&tsk->thread, msr); @@ -570,6 +584,8 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, regs->msr |= MSR_VEC; } + preempt_enable(); + return err; } #endif From 186213114b74bd52f5014e2877c3a6d72299762a Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Mon, 5 Nov 2018 09:52:48 +0100 Subject: [PATCH 1370/2608] 9p/net: put a lower bound on msize commit 574d356b7a02c7e1b01a1d9cba8a26b3c2888f45 upstream. If the requested msize is too small (either from command line argument or from the server version reply), we won't get any work done. If it's *really* too small, nothing will work, and this got caught by syzbot recently (on a new kmem_cache_create_usercopy() call) Just set a minimum msize to 4k in both code paths, until someone complains they have a use-case for a smaller msize. We need to check in both mount option and server reply individually because the msize for the first version request would be unchecked with just a global check on clnt->msize. Link: http://lkml.kernel.org/r/1541407968-31350-1-git-send-email-asmadeus@codewreck.org Reported-by: syzbot+0c1d61e4db7db94102ca@syzkaller.appspotmail.com Signed-off-by: Dominique Martinet Cc: Eric Van Hensbergen Cc: Latchesar Ionkov Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- net/9p/client.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/net/9p/client.c b/net/9p/client.c index 3ec5a82929b2..ef0f8fe3ac08 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -181,6 +181,12 @@ static int parse_opts(char *opts, struct p9_client *clnt) ret = r; continue; } + if (option < 4096) { + p9_debug(P9_DEBUG_ERROR, + "msize should be at least 4k\n"); + ret = -EINVAL; + continue; + } clnt->msize = option; break; case Opt_trans: @@ -996,10 +1002,18 @@ static int p9_client_version(struct p9_client *c) else if (!strncmp(version, "9P2000", 6)) c->proto_version = p9_proto_legacy; else { + p9_debug(P9_DEBUG_ERROR, + "server returned an unknown version: %s\n", version); err = -EREMOTEIO; goto error; } + if (msize < 4096) { + p9_debug(P9_DEBUG_ERROR, + "server returned a msize < 4096: %d\n", msize); + err = -EREMOTEIO; + goto error; + } if (msize < c->msize) c->msize = msize; @@ -1064,6 +1078,13 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (clnt->msize > clnt->trans_mod->maxsize) clnt->msize = clnt->trans_mod->maxsize; + if (clnt->msize < 4096) { + p9_debug(P9_DEBUG_ERROR, + "Please specify a msize of at least 4k\n"); + err = -EINVAL; + goto free_client; + } + err = p9_client_version(clnt); if (err) goto close_trans; From 2063fe0ec52344dc9b784735a58afbb97c29dcae Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 25 Oct 2018 12:40:57 -0700 Subject: [PATCH 1371/2608] rxe: fix error completion wr_id and qp_num commit e48d8ed9c6193502d849b35767fd18e20bbd7ba2 upstream. Error completions must still contain a valid wr_id and qp_num such that the consumer can rely on. Correctly fill these fields in receive error completions. Reported-by: Walker Benjamin Cc: stable@vger.kernel.org Signed-off-by: Sagi Grimberg Reviewed-by: Zhu Yanjun Tested-by: Zhu Yanjun Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_resp.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 4d84b010b3ee..74328561bee2 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -845,11 +845,16 @@ static enum resp_states do_complete(struct rxe_qp *qp, memset(&cqe, 0, sizeof(cqe)); - wc->wr_id = wqe->wr_id; - wc->status = qp->resp.status; - wc->qp = &qp->ibqp; + if (qp->rcq->is_user) { + uwc->status = qp->resp.status; + uwc->qp_num = qp->ibqp.qp_num; + uwc->wr_id = wqe->wr_id; + } else { + wc->status = qp->resp.status; + wc->qp = &qp->ibqp; + wc->wr_id = wqe->wr_id; + } - /* fields after status are not required for errors */ if (wc->status == IB_WC_SUCCESS) { wc->opcode = (pkt->mask & RXE_IMMDT_MASK && pkt->mask & RXE_WRITE_MASK) ? From d59964607842725de662d112c125ec8223a82a20 Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Wed, 21 Nov 2018 15:29:33 -0800 Subject: [PATCH 1372/2608] iommu/vt-d: Handle domain agaw being less than iommu agaw commit 3569dd07aaad71920c5ea4da2d5cc9a167c1ffd4 upstream. The Intel IOMMU driver opportunistically skips a few top level page tables from the domain paging directory while programming the IOMMU context entry. However there is an implicit assumption in the code that domain's adjusted guest address width (agaw) would always be greater than IOMMU's agaw. The IOMMU capabilities in an upcoming platform cause the domain's agaw to be lower than IOMMU's agaw. The issue is seen when the IOMMU supports both 4-level and 5-level paging. The domain builds a 4-level page table based on agaw of 2. However the IOMMU's agaw is set as 3 (5-level). In this case the code incorrectly tries to skip page page table levels. This causes the IOMMU driver to avoid programming the context entry. The fix handles this case and programs the context entry accordingly. Fixes: de24e55395698 ("iommu/vt-d: Simplify domain_context_mapping_one") Cc: Cc: Ashok Raj Cc: Jacob Pan Cc: Lu Baolu Reviewed-by: Lu Baolu Reported-by: Ramos Falcon, Ernesto R Tested-by: Ricardo Neri Signed-off-by: Sohil Mehta Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index e86c1c8ec7f6..34006354d2eb 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2093,7 +2093,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, * than default. Unnecessary for PT mode. */ if (translation != CONTEXT_TT_PASS_THROUGH) { - for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { ret = -ENOMEM; pgd = phys_to_virt(dma_pte_addr(pgd)); if (!dma_pte_present(pgd)) @@ -2107,7 +2107,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, translation = CONTEXT_TT_MULTI_LEVEL; context_set_address_root(context, virt_to_phys(pgd)); - context_set_address_width(context, iommu->agaw); + context_set_address_width(context, agaw); } else { /* * In pass through mode, AW must be programmed to From c6a9a1ccafc49fe95d8de54eef154ad5c3b94077 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 27 Dec 2018 13:46:17 -0800 Subject: [PATCH 1373/2608] sched/fair: Fix infinite loop in update_blocked_averages() by reverting a9e7f6544b9c commit c40f7d74c741a907cfaeb73a7697081881c497d0 upstream. Zhipeng Xie, Xie XiuQi and Sargun Dhillon reported lockups in the scheduler under high loads, starting at around the v4.18 time frame, and Zhipeng Xie tracked it down to bugs in the rq->leaf_cfs_rq_list manipulation. Do a (manual) revert of: a9e7f6544b9c ("sched/fair: Fix O(nr_cgroups) in load balance path") It turns out that the list_del_leaf_cfs_rq() introduced by this commit is a surprising property that was not considered in followup commits such as: 9c2791f936ef ("sched/fair: Fix hierarchical order in rq->leaf_cfs_rq_list") As Vincent Guittot explains: "I think that there is a bigger problem with commit a9e7f6544b9c and cfs_rq throttling: Let take the example of the following topology TG2 --> TG1 --> root: 1) The 1st time a task is enqueued, we will add TG2 cfs_rq then TG1 cfs_rq to leaf_cfs_rq_list and we are sure to do the whole branch in one path because it has never been used and can't be throttled so tmp_alone_branch will point to leaf_cfs_rq_list at the end. 2) Then TG1 is throttled 3) and we add TG3 as a new child of TG1. 4) The 1st enqueue of a task on TG3 will add TG3 cfs_rq just before TG1 cfs_rq and tmp_alone_branch will stay on rq->leaf_cfs_rq_list. With commit a9e7f6544b9c, we can del a cfs_rq from rq->leaf_cfs_rq_list. So if the load of TG1 cfs_rq becomes NULL before step 2) above, TG1 cfs_rq is removed from the list. Then at step 4), TG3 cfs_rq is added at the beginning of rq->leaf_cfs_rq_list but tmp_alone_branch still points to TG3 cfs_rq because its throttled parent can't be enqueued when the lock is released. tmp_alone_branch doesn't point to rq->leaf_cfs_rq_list whereas it should. So if TG3 cfs_rq is removed or destroyed before tmp_alone_branch points on another TG cfs_rq, the next TG cfs_rq that will be added, will be linked outside rq->leaf_cfs_rq_list - which is bad. In addition, we can break the ordering of the cfs_rq in rq->leaf_cfs_rq_list but this ordering is used to update and propagate the update from leaf down to root." Instead of trying to work through all these cases and trying to reproduce the very high loads that produced the lockup to begin with, simplify the code temporarily by reverting a9e7f6544b9c - which change was clearly not thought through completely. This (hopefully) gives us a kernel that doesn't lock up so people can continue to enjoy their holidays without worrying about regressions. ;-) [ mingo: Wrote changelog, fixed weird spelling in code comment while at it. ] Analyzed-by: Xie XiuQi Analyzed-by: Vincent Guittot Reported-by: Zhipeng Xie Reported-by: Sargun Dhillon Reported-by: Xie XiuQi Tested-by: Zhipeng Xie Tested-by: Sargun Dhillon Signed-off-by: Linus Torvalds Acked-by: Vincent Guittot Cc: # v4.13+ Cc: Bin Li Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Tejun Heo Cc: Thomas Gleixner Fixes: a9e7f6544b9c ("sched/fair: Fix O(nr_cgroups) in load balance path") Link: http://lkml.kernel.org/r/1545879866-27809-1-git-send-email-xiexiuqi@huawei.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 43 +++++++++---------------------------------- 1 file changed, 9 insertions(+), 34 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7240bb4a4090..6e108af21481 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -369,10 +369,9 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq) } } -/* Iterate thr' all leaf cfs_rq's on a runqueue */ -#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) \ - list_for_each_entry_safe(cfs_rq, pos, &rq->leaf_cfs_rq_list, \ - leaf_cfs_rq_list) +/* Iterate through all leaf cfs_rq's on a runqueue: */ +#define for_each_leaf_cfs_rq(rq, cfs_rq) \ + list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list) /* Do the two (enqueued) entities belong to the same group ? */ static inline struct cfs_rq * @@ -465,8 +464,8 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq) { } -#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) \ - for (cfs_rq = &rq->cfs, pos = NULL; cfs_rq; cfs_rq = pos) +#define for_each_leaf_cfs_rq(rq, cfs_rq) \ + for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL) static inline struct sched_entity *parent_entity(struct sched_entity *se) { @@ -6970,27 +6969,10 @@ static void attach_tasks(struct lb_env *env) #ifdef CONFIG_FAIR_GROUP_SCHED -static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq) -{ - if (cfs_rq->load.weight) - return false; - - if (cfs_rq->avg.load_sum) - return false; - - if (cfs_rq->avg.util_sum) - return false; - - if (cfs_rq->runnable_load_sum) - return false; - - return true; -} - static void update_blocked_averages(int cpu) { struct rq *rq = cpu_rq(cpu); - struct cfs_rq *cfs_rq, *pos; + struct cfs_rq *cfs_rq; struct rq_flags rf; rq_lock_irqsave(rq, &rf); @@ -7000,7 +6982,7 @@ static void update_blocked_averages(int cpu) * Iterates the task_group tree in a bottom up fashion, see * list_add_leaf_cfs_rq() for details. */ - for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) { + for_each_leaf_cfs_rq(rq, cfs_rq) { struct sched_entity *se; /* throttled entities do not contribute to load */ @@ -7014,13 +6996,6 @@ static void update_blocked_averages(int cpu) se = cfs_rq->tg->se[cpu]; if (se && !skip_blocked_update(se)) update_load_avg(se, 0); - - /* - * There can be a lot of idle CPU cgroups. Don't let fully - * decayed cfs_rqs linger on the list. - */ - if (cfs_rq_is_decayed(cfs_rq)) - list_del_leaf_cfs_rq(cfs_rq); } rq_unlock_irqrestore(rq, &rf); } @@ -9580,10 +9555,10 @@ const struct sched_class fair_sched_class = { #ifdef CONFIG_SCHED_DEBUG void print_cfs_stats(struct seq_file *m, int cpu) { - struct cfs_rq *cfs_rq, *pos; + struct cfs_rq *cfs_rq; rcu_read_lock(); - for_each_leaf_cfs_rq_safe(cpu_rq(cpu), cfs_rq, pos) + for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq) print_cfs_rq(m, cpu, cfs_rq); rcu_read_unlock(); } From 7983d29437b99e1e9786e2621f297aa6cd6bf3ba Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 29 Nov 2018 11:22:50 +0800 Subject: [PATCH 1374/2608] ceph: don't update importing cap's mseq when handing cap export commit 3c1392d4c49962a31874af14ae9ff289cb2b3851 upstream. Updating mseq makes client think importer mds has accepted all prior cap messages and importer mds knows what caps client wants. Actually some cap messages may have been dropped because of mseq mismatch. If mseq is left untouched, importing cap's mds_wanted later will get reset by cap import message. Cc: stable@vger.kernel.org Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/caps.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ff5d32cf9578..92eb9c3052ee 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3438,7 +3438,6 @@ retry: tcap->cap_id = t_cap_id; tcap->seq = t_seq - 1; tcap->issue_seq = t_seq - 1; - tcap->mseq = t_mseq; tcap->issued |= issued; tcap->implemented |= issued; if (cap == ci->i_auth_cap) From a1fe71105652b463bf551b9e525e0d6d12f0c2f5 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 12 Dec 2018 14:45:18 +0100 Subject: [PATCH 1375/2608] genwqe: Fix size check commit fdd669684655c07dacbdb0d753fd13833de69a33 upstream. Calling the test program genwqe_cksum with the default buffer size of 2MB triggers the following kernel warning on s390: WARNING: CPU: 30 PID: 9311 at mm/page_alloc.c:3189 __alloc_pages_nodemask+0x45c/0xbe0 CPU: 30 PID: 9311 Comm: genwqe_cksum Kdump: loaded Not tainted 3.10.0-957.el7.s390x #1 task: 00000005e5d13980 ti: 00000005e7c6c000 task.ti: 00000005e7c6c000 Krnl PSW : 0704c00180000000 00000000002780ac (__alloc_pages_nodemask+0x45c/0xbe0) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 EA:3 Krnl GPRS: 00000000002932b8 0000000000b73d7c 0000000000000010 0000000000000009 0000000000000041 00000005e7c6f9b8 0000000000000001 00000000000080d0 0000000000000000 0000000000b70500 0000000000000001 0000000000000000 0000000000b70528 00000000007682c0 0000000000277df2 00000005e7c6f9a0 Krnl Code: 000000000027809e: de7195001000 ed 1280(114,%r9),0(%r1) 00000000002780a4: a774fead brc 7,277dfe #00000000002780a8: a7f40001 brc 15,2780aa >00000000002780ac: 92011000 mvi 0(%r1),1 00000000002780b0: a7f4fea7 brc 15,277dfe 00000000002780b4: 9101c6b6 tm 1718(%r12),1 00000000002780b8: a784ff3a brc 8,277f2c 00000000002780bc: a7f4fe2e brc 15,277d18 Call Trace: ([<0000000000277df2>] __alloc_pages_nodemask+0x1a2/0xbe0) [<000000000013afae>] s390_dma_alloc+0xfe/0x310 [<000003ff8065f362>] __genwqe_alloc_consistent+0xfa/0x148 [genwqe_card] [<000003ff80658f7a>] genwqe_mmap+0xca/0x248 [genwqe_card] [<00000000002b2712>] mmap_region+0x4e2/0x778 [<00000000002b2c54>] do_mmap+0x2ac/0x3e0 [<0000000000292d7e>] vm_mmap_pgoff+0xd6/0x118 [<00000000002b081c>] SyS_mmap_pgoff+0xdc/0x268 [<00000000002b0a34>] SyS_old_mmap+0x8c/0xb0 [<000000000074e518>] sysc_tracego+0x14/0x1e [<000003ffacf87dc6>] 0x3ffacf87dc6 turns out the check in __genwqe_alloc_consistent uses "> MAX_ORDER" while the mm code uses ">= MAX_ORDER". Fix genwqe. Cc: stable@vger.kernel.org Signed-off-by: Christian Borntraeger Signed-off-by: Frank Haverkamp Signed-off-by: Greg Kroah-Hartman --- drivers/misc/genwqe/card_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c index 147b83011b58..2769eb0dfcf5 100644 --- a/drivers/misc/genwqe/card_utils.c +++ b/drivers/misc/genwqe/card_utils.c @@ -217,7 +217,7 @@ u32 genwqe_crc32(u8 *buff, size_t len, u32 init) void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, dma_addr_t *dma_handle) { - if (get_order(size) > MAX_ORDER) + if (get_order(size) >= MAX_ORDER) return NULL; return dma_zalloc_coherent(&cd->pci_dev->dev, size, dma_handle, From 6293890419baea40d95b71beb671ab472c93e2c9 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 19 Dec 2018 17:19:22 +0200 Subject: [PATCH 1376/2608] intel_th: msu: Fix an off-by-one in attribute store commit ec5b5ad6e272d8d6b92d1007f79574919862a2d2 upstream. The 'nr_pages' attribute of the 'msc' subdevices parses a comma-separated list of window sizes, passed from userspace. However, there is a bug in the string parsing logic wherein it doesn't exclude the comma character from the range of characters as it consumes them. This leads to an out-of-bounds access given a sufficiently long list. For example: > # echo 8,8,8,8 > /sys/bus/intel_th/devices/0-msc0/nr_pages > ================================================================== > BUG: KASAN: slab-out-of-bounds in memchr+0x1e/0x40 > Read of size 1 at addr ffff8803ffcebcd1 by task sh/825 > > CPU: 3 PID: 825 Comm: npktest.sh Tainted: G W 4.20.0-rc1+ > Call Trace: > dump_stack+0x7c/0xc0 > print_address_description+0x6c/0x23c > ? memchr+0x1e/0x40 > kasan_report.cold.5+0x241/0x308 > memchr+0x1e/0x40 > nr_pages_store+0x203/0xd00 [intel_th_msu] Fix this by accounting for the comma character. Signed-off-by: Alexander Shishkin Fixes: ba82664c134ef ("intel_th: Add Memory Storage Unit driver") Cc: stable@vger.kernel.org # v4.4+ Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/msu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 58ac786634dc..82f2b70ca5bf 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -1431,7 +1431,8 @@ nr_pages_store(struct device *dev, struct device_attribute *attr, if (!end) break; - len -= end - p; + /* consume the number and the following comma, hence +1 */ + len -= end - p + 1; p = end + 1; } while (len); From 8936b67c81e14e7f8a63f3ebd3e3a29c1dddfed8 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Fri, 16 Nov 2018 17:23:47 +0100 Subject: [PATCH 1377/2608] power: supply: olpc_battery: correct the temperature units commit ed54ffbe554f0902689fd6d1712bbacbacd11376 upstream. According to [1] and [2], the temperature values are in tenths of degree Celsius. Exposing the Celsius value makes the battery appear on fire: $ upower -i /org/freedesktop/UPower/devices/battery_olpc_battery ... temperature: 236.9 degrees C Tested on OLPC XO-1 and OLPC XO-1.75 laptops. [1] include/linux/power_supply.h [2] Documentation/power/power_supply_class.txt Fixes: fb972873a767 ("[BATTERY] One Laptop Per Child power/battery driver") Cc: stable@vger.kernel.org Signed-off-by: Lubomir Rintel Acked-by: Pavel Machek Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/olpc_battery.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/power/supply/olpc_battery.c b/drivers/power/supply/olpc_battery.c index 3bc2eea7b3b7..62926804949d 100644 --- a/drivers/power/supply/olpc_battery.c +++ b/drivers/power/supply/olpc_battery.c @@ -427,14 +427,14 @@ static int olpc_bat_get_property(struct power_supply *psy, if (ret) return ret; - val->intval = (s16)be16_to_cpu(ec_word) * 100 / 256; + val->intval = (s16)be16_to_cpu(ec_word) * 10 / 256; break; case POWER_SUPPLY_PROP_TEMP_AMBIENT: ret = olpc_ec_cmd(EC_AMB_TEMP, NULL, 0, (void *)&ec_word, 2); if (ret) return ret; - val->intval = (int)be16_to_cpu(ec_word) * 100 / 256; + val->intval = (int)be16_to_cpu(ec_word) * 10 / 256; break; case POWER_SUPPLY_PROP_CHARGE_COUNTER: ret = olpc_ec_cmd(EC_BAT_ACR, NULL, 0, (void *)&ec_word, 2); From ff420ff182322f5f81925a0de7081b28a0c2d5af Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 10 Dec 2018 08:08:28 +0000 Subject: [PATCH 1378/2608] lib: fix build failure in CONFIG_DEBUG_VIRTUAL test commit 10fdf838e5f540beca466e9d1325999c072e5d3f upstream. On several arches, virt_to_phys() is in io.h Build fails without it: CC lib/test_debug_virtual.o lib/test_debug_virtual.c: In function 'test_debug_virtual_init': lib/test_debug_virtual.c:26:7: error: implicit declaration of function 'virt_to_phys' [-Werror=implicit-function-declaration] pa = virt_to_phys(va); ^ Fixes: e4dace361552 ("lib: add test module for CONFIG_DEBUG_VIRTUAL") CC: stable@vger.kernel.org Signed-off-by: Christophe Leroy Reviewed-by: Kees Cook Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- lib/test_debug_virtual.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/test_debug_virtual.c b/lib/test_debug_virtual.c index b9cdeecc19dc..777b491df25d 100644 --- a/lib/test_debug_virtual.c +++ b/lib/test_debug_virtual.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #ifdef CONFIG_MIPS From e956139f32412160616d52f5f630da68a40e4c47 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 9 Oct 2018 15:24:46 +0200 Subject: [PATCH 1379/2608] drm/vc4: Set ->is_yuv to false when num_planes == 1 commit 2b02a05bdc3a62d36e0d0b015351897109e25991 upstream. When vc4_plane_state is duplicated ->is_yuv is left assigned to its previous value, and we never set it back to false when switching to a non-YUV format. Fix that by setting ->is_yuv to false in the 'num_planes == 1' branch of the vc4_plane_setup_clipping_and_scaling() function. Fixes: fc04023fafecf ("drm/vc4: Add support for YUV planes.") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20181009132446.21960-1-boris.brezillon@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_plane.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 502c7eb708c2..5bd3c2ef0067 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -354,6 +354,7 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) if (vc4_state->is_unity) vc4_state->x_scaling[0] = VC4_SCALING_PPF; } else { + vc4_state->is_yuv = false; vc4_state->x_scaling[1] = VC4_SCALING_NONE; vc4_state->y_scaling[1] = VC4_SCALING_NONE; } From b018b00c39767e4ddc4d802471eda46abcb9c87b Mon Sep 17 00:00:00 2001 From: Ivan Mironov Date: Mon, 24 Dec 2018 20:13:05 +0500 Subject: [PATCH 1380/2608] bnx2x: Fix NULL pointer dereference in bnx2x_del_all_vlans() on some hw commit 38355a5f9a22bfa5bd5b1bb79805aca39fa53729 upstream. This happened when I tried to boot normal Fedora 29 system with latest available kernel (from fedora rawhide, plus some unrelated custom patches): BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 0 P4D 0 Oops: 0010 [#1] SMP PTI CPU: 6 PID: 1422 Comm: libvirtd Tainted: G I 4.20.0-0.rc7.git3.hpsa2.1.fc29.x86_64 #1 Hardware name: HP ProLiant BL460c G6, BIOS I24 05/21/2018 RIP: 0010: (null) Code: Bad RIP value. RSP: 0018:ffffa47ccdc9fbe0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00000000000003e8 RCX: ffffa47ccdc9fbf8 RDX: ffffa47ccdc9fc00 RSI: ffff97d9ee7b01f8 RDI: ffff97d9f0150b80 RBP: ffff97d9f0150b80 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000003 R13: ffff97d9ef1e53e8 R14: 0000000000000009 R15: ffff97d9f0ac6730 FS: 00007f4d224ef700(0000) GS:ffff97d9fa200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 00000011ece52006 CR4: 00000000000206e0 Call Trace: ? bnx2x_chip_cleanup+0x195/0x610 [bnx2x] ? bnx2x_nic_unload+0x1e2/0x8f0 [bnx2x] ? bnx2x_reload_if_running+0x24/0x40 [bnx2x] ? bnx2x_set_features+0x79/0xa0 [bnx2x] ? __netdev_update_features+0x244/0x9e0 ? netlink_broadcast_filtered+0x136/0x4b0 ? netdev_update_features+0x22/0x60 ? dev_disable_lro+0x1c/0xe0 ? devinet_sysctl_forward+0x1c6/0x211 ? proc_sys_call_handler+0xab/0x100 ? __vfs_write+0x36/0x1a0 ? rcu_read_lock_sched_held+0x79/0x80 ? rcu_sync_lockdep_assert+0x2e/0x60 ? __sb_start_write+0x14c/0x1b0 ? vfs_write+0x159/0x1c0 ? vfs_write+0xba/0x1c0 ? ksys_write+0x52/0xc0 ? do_syscall_64+0x60/0x1f0 ? entry_SYSCALL_64_after_hwframe+0x49/0xbe After some investigation I figured out that recently added cleanup code tries to call VLAN filtering de-initialization function which exist only for newer hardware. Corresponding function pointer is not set (== 0) for older hardware, namely these chips: #define CHIP_NUM_57710 0x164e #define CHIP_NUM_57711 0x164f #define CHIP_NUM_57711E 0x1650 And I have one of those in my test system: Broadcom Inc. and subsidiaries NetXtreme II BCM57711E 10-Gigabit PCIe [14e4:1650] Function bnx2x_init_vlan_mac_fp_objs() from drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h decides whether to initialize relevant pointers in bnx2x_sp_objs.vlan_obj or not. This regression was introduced after v4.20-rc7, and still exists in v4.20 release. Fixes: 04f05230c5c13 ("bnx2x: Remove configured vlans as part of unload sequence.") Signed-off-by: Ivan Mironov Signed-off-by: Ivan Mironov Acked-by: Sudarsana Kalluru Signed-off-by: David S. Miller Cc: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 85a320e5c0a8..022b06e770d1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9347,10 +9347,16 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link) BNX2X_ERR("Failed to schedule DEL commands for UC MACs list: %d\n", rc); - /* Remove all currently configured VLANs */ - rc = bnx2x_del_all_vlans(bp); - if (rc < 0) - BNX2X_ERR("Failed to delete all VLANs\n"); + /* The whole *vlan_obj structure may be not initialized if VLAN + * filtering offload is not supported by hardware. Currently this is + * true for all hardware covered by CHIP_IS_E1x(). + */ + if (!CHIP_IS_E1x(bp)) { + /* Remove all currently configured VLANs */ + rc = bnx2x_del_all_vlans(bp); + if (rc < 0) + BNX2X_ERR("Failed to delete all VLANs\n"); + } /* Disable LLH */ if (!CHIP_IS_E1(bp)) From 59ed81e1eede2cb90010ee923c853a58535aff85 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 24 Apr 2018 09:43:44 +0200 Subject: [PATCH 1381/2608] tools: power/acpi, revert to LD = gcc commit 755396163148b50fe1afb4bdd3365e47f3ff7a42 upstream. Commit 7ed1c1901fe5 (tools: fix cross-compile var clobbering) removed setting of LD to $(CROSS_COMPILE)gcc. This broke build of acpica (acpidump) in power/acpi: ld: unrecognized option '-D_LINUX' The tools pass CFLAGS to the linker (incl. -D_LINUX), so revert this particular change and let LD be $(CC) again. Note that the old behaviour was a bit different, it used $(CROSS_COMPILE)gcc which was eliminated by the commit 7ed1c1901fe5. We use $(CC) for that reason. Fixes: 7ed1c1901fe5 (tools: fix cross-compile var clobbering) Signed-off-by: Jiri Slaby Cc: 4.16+ # 4.16+ Signed-off-by: Rafael J. Wysocki Cc: Sudip Mukherjee Cc: Martin Kelly Signed-off-by: Greg Kroah-Hartman --- tools/power/acpi/Makefile.config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index 2cccbba64418..f304be71c278 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -56,6 +56,7 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM} # to compile vs uClibc, that can be done here as well. CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- CROSS_COMPILE ?= $(CROSS) +LD = $(CC) HOSTCC = gcc # check if compiler option is supported From 9c07fc259396fb5a26713755f92f60eb477ed567 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 13 Jan 2019 10:01:07 +0100 Subject: [PATCH 1382/2608] Linux 4.14.93 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6f7cca2abbd0..a521e4cbd66f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 92 +SUBLEVEL = 93 EXTRAVERSION = NAME = Petit Gorille From 4124a4cff344abbf8187775eb643d9827830e715 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 14 Nov 2017 16:54:23 -0500 Subject: [PATCH 1383/2608] x86,kvm: move qemu/guest FPU switching out to vcpu_run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f775b13eedee2f7f3c6fdd4e90fb79090ce5d339 upstream. Currently, every time a VCPU is scheduled out, the host kernel will first save the guest FPU/xstate context, then load the qemu userspace FPU context, only to then immediately save the qemu userspace FPU context back to memory. When scheduling in a VCPU, the same extraneous FPU loads and saves are done. This could be avoided by moving from a model where the guest FPU is loaded and stored with preemption disabled, to a model where the qemu userspace FPU is swapped out for the guest FPU context for the duration of the KVM_RUN ioctl. This is done under the VCPU mutex, which is also taken when other tasks inspect the VCPU FPU context, so the code should already be safe for this change. That should come as no surprise, given that s390 already has this optimization. This can fix a bug where KVM calls get_user_pages while owning the FPU, and the file system ends up requesting the FPU again: [258270.527947] __warn+0xcb/0xf0 [258270.527948] warn_slowpath_null+0x1d/0x20 [258270.527951] kernel_fpu_disable+0x3f/0x50 [258270.527953] __kernel_fpu_begin+0x49/0x100 [258270.527955] kernel_fpu_begin+0xe/0x10 [258270.527958] crc32c_pcl_intel_update+0x84/0xb0 [258270.527961] crypto_shash_update+0x3f/0x110 [258270.527968] crc32c+0x63/0x8a [libcrc32c] [258270.527975] dm_bm_checksum+0x1b/0x20 [dm_persistent_data] [258270.527978] node_prepare_for_write+0x44/0x70 [dm_persistent_data] [258270.527985] dm_block_manager_write_callback+0x41/0x50 [dm_persistent_data] [258270.527988] submit_io+0x170/0x1b0 [dm_bufio] [258270.527992] __write_dirty_buffer+0x89/0x90 [dm_bufio] [258270.527994] __make_buffer_clean+0x4f/0x80 [dm_bufio] [258270.527996] __try_evict_buffer+0x42/0x60 [dm_bufio] [258270.527998] dm_bufio_shrink_scan+0xc0/0x130 [dm_bufio] [258270.528002] shrink_slab.part.40+0x1f5/0x420 [258270.528004] shrink_node+0x22c/0x320 [258270.528006] do_try_to_free_pages+0xf5/0x330 [258270.528008] try_to_free_pages+0xe9/0x190 [258270.528009] __alloc_pages_slowpath+0x40f/0xba0 [258270.528011] __alloc_pages_nodemask+0x209/0x260 [258270.528014] alloc_pages_vma+0x1f1/0x250 [258270.528017] do_huge_pmd_anonymous_page+0x123/0x660 [258270.528021] handle_mm_fault+0xfd3/0x1330 [258270.528025] __get_user_pages+0x113/0x640 [258270.528027] get_user_pages+0x4f/0x60 [258270.528063] __gfn_to_pfn_memslot+0x120/0x3f0 [kvm] [258270.528108] try_async_pf+0x66/0x230 [kvm] [258270.528135] tdp_page_fault+0x130/0x280 [kvm] [258270.528149] kvm_mmu_page_fault+0x60/0x120 [kvm] [258270.528158] handle_ept_violation+0x91/0x170 [kvm_intel] [258270.528162] vmx_handle_exit+0x1ca/0x1400 [kvm_intel] No performance changes were detected in quick ping-pong tests on my 4 socket system, which is expected since an FPU+xstate load is on the order of 0.1us, while ping-ponging between CPUs is on the order of 20us, and somewhat noisy. Cc: stable@vger.kernel.org Signed-off-by: Rik van Riel Suggested-by: Christian Borntraeger Signed-off-by: Paolo Bonzini [Fixed a bug where reset_vcpu called put_fpu without preceding load_fpu, which happened inside from KVM_CREATE_VCPU ioctl. - Radim] Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 13 +++++++++++++ arch/x86/kvm/x86.c | 34 +++++++++++++-------------------- include/linux/kvm_host.h | 2 +- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 523308d030d2..72fac8646e9b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -539,7 +539,20 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_page_cache; struct kvm_mmu_memory_cache mmu_page_header_cache; + /* + * QEMU userspace and the guest each have their own FPU state. + * In vcpu_run, we switch between the user and guest FPU contexts. + * While running a VCPU, the VCPU thread will have the guest FPU + * context. + * + * Note that while the PKRU state lives inside the fpu registers, + * it is switched out separately at VMENTER and VMEXIT time. The + * "guest_fpu" state here contains the guest FPU context, with the + * host PRKU bits. + */ + struct fpu user_fpu; struct fpu guest_fpu; + u64 xcr0; u64 guest_supported_xcr0; u32 guest_xstate_size; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ac431fa778aa..130be2efafbe 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3020,7 +3020,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) srcu_read_unlock(&vcpu->kvm->srcu, idx); pagefault_enable(); kvm_x86_ops->vcpu_put(vcpu); - kvm_put_guest_fpu(vcpu); vcpu->arch.last_host_tsc = rdtsc(); /* * If userspace has set any breakpoints or watchpoints, dr6 is restored @@ -5377,13 +5376,10 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt) static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) { - preempt_disable(); - kvm_load_guest_fpu(emul_to_vcpu(ctxt)); } static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) { - preempt_enable(); } static int emulator_intercept(struct x86_emulate_ctxt *ctxt, @@ -7083,7 +7079,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) preempt_disable(); kvm_x86_ops->prepare_guest_switch(vcpu); - kvm_load_guest_fpu(vcpu); /* * Disable IRQs before setting IN_GUEST_MODE. Posted interrupt @@ -7428,12 +7423,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } + kvm_load_guest_fpu(vcpu); + if (unlikely(vcpu->arch.complete_userspace_io)) { int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io; vcpu->arch.complete_userspace_io = NULL; r = cui(vcpu); if (r <= 0) - goto out; + goto out_fpu; } else WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); @@ -7442,6 +7439,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) else r = vcpu_run(vcpu); +out_fpu: + kvm_put_guest_fpu(vcpu); out: kvm_put_guest_fpu(vcpu); post_kvm_run_save(vcpu); @@ -7865,32 +7864,25 @@ static void fx_init(struct kvm_vcpu *vcpu) vcpu->arch.cr0 |= X86_CR0_ET; } +/* Swap (qemu) user FPU context for the guest FPU context. */ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { - if (vcpu->guest_fpu_loaded) - return; - - /* - * Restore all possible states in the guest, - * and assume host would use all available bits. - * Guest xcr0 would be loaded later. - */ - vcpu->guest_fpu_loaded = 1; - __kernel_fpu_begin(); + preempt_disable(); + copy_fpregs_to_fpstate(&vcpu->arch.user_fpu); /* PKRU is separately restored in kvm_x86_ops->run. */ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state, ~XFEATURE_MASK_PKRU); + preempt_enable(); trace_kvm_fpu(1); } +/* When vcpu_run ends, restore user space FPU context. */ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - if (!vcpu->guest_fpu_loaded) - return; - - vcpu->guest_fpu_loaded = 0; + preempt_disable(); copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); - __kernel_fpu_end(); + copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state); + preempt_enable(); ++vcpu->stat.fpu_reload; trace_kvm_fpu(0); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b81d458ad4fb..b6962ae6237e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -232,7 +232,7 @@ struct kvm_vcpu { struct mutex mutex; struct kvm_run *run; - int guest_fpu_loaded, guest_xcr0_loaded; + int guest_xcr0_loaded; struct swait_queue_head wq; struct pid __rcu *pid; int sigset_active; From 64ac5483a1b187f4e3fa47c65ba53546f9089bd6 Mon Sep 17 00:00:00 2001 From: WANG Chao Date: Tue, 11 Dec 2018 00:37:25 +0800 Subject: [PATCH 1384/2608] x86, modpost: Replace last remnants of RETPOLINE with CONFIG_RETPOLINE commit e4f358916d528d479c3c12bd2fd03f2d5a576380 upstream. Commit 4cd24de3a098 ("x86/retpoline: Make CONFIG_RETPOLINE depend on compiler support") replaced the RETPOLINE define with CONFIG_RETPOLINE checks. Remove the remaining pieces. [ bp: Massage commit message. ] Fixes: 4cd24de3a098 ("x86/retpoline: Make CONFIG_RETPOLINE depend on compiler support") Signed-off-by: WANG Chao Signed-off-by: Borislav Petkov Reviewed-by: Zhenzhong Duan Reviewed-by: Masahiro Yamada Cc: "H. Peter Anvin" Cc: Andi Kleen Cc: Andrew Morton Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Daniel Borkmann Cc: David Woodhouse Cc: Geert Uytterhoeven Cc: Jessica Yu Cc: Jiri Kosina Cc: Kees Cook Cc: Konrad Rzeszutek Wilk Cc: Luc Van Oostenryck Cc: Michal Marek Cc: Miguel Ojeda Cc: Peter Zijlstra Cc: Tim Chen Cc: Vasily Gorbik Cc: linux-kbuild@vger.kernel.org Cc: srinivas.eeda@oracle.com Cc: stable Cc: x86-ml Link: https://lkml.kernel.org/r/20181210163725.95977-1-chao.wang@ucloud.cn Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- include/linux/compiler-gcc.h | 2 +- include/linux/module.h | 2 +- scripts/mod/modpost.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 98b24d668b08..004e60470a77 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -212,7 +212,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init = SPECTRE_V2_USER_NONE; -#ifdef RETPOLINE +#ifdef CONFIG_RETPOLINE static bool spectre_v2_bad_module; bool retpoline_module_ok(bool has_retpoline) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index c11032b06d68..00b06d7efb83 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -108,7 +108,7 @@ #define __weak __attribute__((weak)) #define __alias(symbol) __attribute__((alias(#symbol))) -#ifdef RETPOLINE +#ifdef CONFIG_RETPOLINE #define __noretpoline __attribute__((indirect_branch("keep"))) #endif diff --git a/include/linux/module.h b/include/linux/module.h index b1cc541f2ddf..a9d546c5b9aa 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -794,7 +794,7 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ -#ifdef RETPOLINE +#ifdef CONFIG_RETPOLINE extern bool retpoline_module_ok(bool has_retpoline); #else static inline bool retpoline_module_ok(bool has_retpoline) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 957f6041dd79..18bc8738e989 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2168,7 +2168,7 @@ static void add_intree_flag(struct buffer *b, int is_intree) /* Cannot check for assembler */ static void add_retpoline(struct buffer *b) { - buf_printf(b, "\n#ifdef RETPOLINE\n"); + buf_printf(b, "\n#ifdef CONFIG_RETPOLINE\n"); buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n"); buf_printf(b, "#endif\n"); } From 711fd1f564c3e8bdc4e344055abe06f2f6c6b8f7 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 3 Jan 2019 15:53:39 +0800 Subject: [PATCH 1385/2608] ALSA: hda/realtek - Support Dell headset mode for New AIO platform commit c2a7c55a04065c3b0c32d23b099db7ea1dbf6250 upstream. Dell has new platform for ALC274. This will support to enable headset mode. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 31c91e0a815e..6f146f13851c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6311,6 +6311,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0871, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0872, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0873, "Dell Precision 3930", ALC255_FIXUP_DUMMY_LINEOUT_VERB), + SND_PCI_QUIRK(0x1028, 0x0935, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From cd5a27c87820dd613518243c6422d1ea59b4759c Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 9 Jan 2019 16:23:37 +0800 Subject: [PATCH 1386/2608] ALSA: hda/realtek - Add unplug function into unplug state of Headset Mode for ALC225 commit 4d4b0c52bde470c379f5d168d5c139ad866cb808 upstream. Forgot to add unplug function to unplug state of headset mode for ALC225. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6f146f13851c..fb8a242d724c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4005,6 +4005,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) case 0x10ec0225: case 0x10ec0295: case 0x10ec0299: + alc_process_coef_fw(codec, alc225_pre_hsmode); alc_process_coef_fw(codec, coef0225); break; case 0x10ec0867: From e1008331ce7a1033d70ef3e6559eaf5e072b8218 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 9 Jan 2019 17:05:24 +0800 Subject: [PATCH 1387/2608] ALSA: hda/realtek - Disable headset Mic VREF for headset mode of ALC225 commit d1dd42110d2727e81b9265841a62bc84c454c3a2 upstream. Disable Headset Mic VREF for headset mode of ALC225. This will be controlled by coef bits of headset mode functions. [ Fixed a compile warning and code simplification -- tiwai ] Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fb8a242d724c..1191d8925c44 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5253,6 +5253,13 @@ static void alc274_fixup_bind_dacs(struct hda_codec *codec, spec->gen.preferred_dacs = preferred_pairs; } +static void alc_fixup_disable_mic_vref(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) + snd_hda_codec_set_pin_target(codec, 0x19, PIN_VREFHIZ); +} + /* for hda_fixup_thinkpad_acpi() */ #include "thinkpad_helper.c" @@ -5362,6 +5369,7 @@ enum { ALC293_FIXUP_LENOVO_SPK_NOISE, ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, ALC255_FIXUP_DELL_SPK_NOISE, + ALC225_FIXUP_DISABLE_MIC_VREF, ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, ALC295_FIXUP_DISABLE_DAC3, ALC280_FIXUP_HP_HEADSET_MIC, @@ -6063,6 +6071,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC225_FIXUP_DISABLE_MIC_VREF] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_mic_vref, + .chained = true, + .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE + }, [ALC225_FIXUP_DELL1_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -6072,7 +6086,7 @@ static const struct hda_fixup alc269_fixups[] = { {} }, .chained = true, - .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE + .chain_id = ALC225_FIXUP_DISABLE_MIC_VREF }, [ALC280_FIXUP_HP_HEADSET_MIC] = { .type = HDA_FIXUP_FUNC, From edc8099b54ec5c465987b4f6bcd44a0130ed2ffd Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 19 Dec 2018 22:49:09 +0000 Subject: [PATCH 1388/2608] CIFS: Fix adjustment of credits for MTU requests commit b983f7e92348d7e7d091db1b78b7915e9dd3d63a upstream. Currently for MTU requests we allocate maximum possible credits in advance and then adjust them according to the request size. While we were adjusting the number of credits belonging to the server, we were skipping adjustment of credits belonging to the request. This patch fixes it by setting request credits to CreditCharge field value of SMB2 packet header. Also ask 1 credit more for async read and write operations to increase parallelism and match the behavior of other operations. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 1581e8668b09..c0f8087d9819 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2632,12 +2632,14 @@ smb2_async_readv(struct cifs_readdata *rdata) if (rdata->credits) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes, SMB2_MAX_BUFFER_SIZE)); - shdr->CreditRequest = shdr->CreditCharge; + shdr->CreditRequest = + cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1); spin_lock(&server->req_lock); server->credits += rdata->credits - le16_to_cpu(shdr->CreditCharge); spin_unlock(&server->req_lock); wake_up(&server->request_q); + rdata->credits = le16_to_cpu(shdr->CreditCharge); flags |= CIFS_HAS_CREDITS; } @@ -2842,12 +2844,14 @@ smb2_async_writev(struct cifs_writedata *wdata, if (wdata->credits) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, SMB2_MAX_BUFFER_SIZE)); - shdr->CreditRequest = shdr->CreditCharge; + shdr->CreditRequest = + cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1); spin_lock(&server->req_lock); server->credits += wdata->credits - le16_to_cpu(shdr->CreditCharge); spin_unlock(&server->req_lock); wake_up(&server->request_q); + wdata->credits = le16_to_cpu(shdr->CreditCharge); flags |= CIFS_HAS_CREDITS; } From 4092ad400ea6ef5c31871ccefdaf809ef8dd32cc Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 10 Jan 2019 11:27:28 -0800 Subject: [PATCH 1389/2608] CIFS: Do not hide EINTR after sending network packets commit ee13919c2e8d1f904e035ad4b4239029a8994131 upstream. Currently we hide EINTR code returned from sock_sendmsg() and return 0 instead. This makes a caller think that we successfully completed the network operation which is not true. Fix this by properly returning EINTR to callers. Cc: Signed-off-by: Pavel Shilovsky Reviewed-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index a10f51dfa7f5..ffc8757e5a3c 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -318,7 +318,7 @@ uncork: if (rc < 0 && rc != -EINTR) cifs_dbg(VFS, "Error %d sending data on socket to server\n", rc); - else + else if (rc > 0) rc = 0; return rc; From 5c467ac09db61069e409d1249c23a9deff747410 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Tue, 8 Jan 2019 18:30:57 +0000 Subject: [PATCH 1390/2608] cifs: Fix potential OOB access of lock element array commit b9a74cde94957d82003fb9f7ab4777938ca851cd upstream. If maxBuf is small but non-zero, it could result in a zero sized lock element array which we would then try and access OOB. Signed-off-by: Ross Lagerwall Signed-off-by: Steve French CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 8 ++++---- fs/cifs/smb2file.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 7d6539a04fac..1e176e11dbfa 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1120,10 +1120,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) /* * Accessing maxBuf is racy with cifs_reconnect - need to store value - * and check it for zero before using. + * and check it before using. */ max_buf = tcon->ses->server->maxBuf; - if (!max_buf) { + if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) { free_xid(xid); return -EINVAL; } @@ -1460,10 +1460,10 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, /* * Accessing maxBuf is racy with cifs_reconnect - need to store value - * and check it for zero before using. + * and check it before using. */ max_buf = tcon->ses->server->maxBuf; - if (!max_buf) + if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) return -EINVAL; max_num = (max_buf - sizeof(struct smb_hdr)) / diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index b4b1f0305f29..79078533f807 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -124,10 +124,10 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, /* * Accessing maxBuf is racy with cifs_reconnect - need to store value - * and check it for zero before using. + * and check it before using. */ max_buf = tcon->ses->server->maxBuf; - if (!max_buf) + if (max_buf < sizeof(struct smb2_lock_element)) return -EINVAL; max_num = max_buf / sizeof(struct smb2_lock_element); From d42666bd9f35a91ef870344d27b86f5bb1bfe220 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 28 Dec 2018 16:15:41 +0100 Subject: [PATCH 1391/2608] usb: cdc-acm: send ZLP for Telit 3G Intel based modems commit 34aabf918717dd14e05051896aaecd3b16b53d95 upstream. Telit 3G Intel based modems require zero packet to be sent if out data size is equal to the endpoint max packet size. Signed-off-by: Daniele Palmas Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 423a339e53bc..8ab0195f8d32 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1893,6 +1893,13 @@ static const struct usb_device_id acm_ids[] = { .driver_info = IGNORE_DEVICE, }, + { USB_DEVICE(0x1bc7, 0x0021), /* Telit 3G ACM only composition */ + .driver_info = SEND_ZERO_PACKET, + }, + { USB_DEVICE(0x1bc7, 0x0023), /* Telit 3G ACM + ECM composition */ + .driver_info = SEND_ZERO_PACKET, + }, + /* control interfaces without any protocol set */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_PROTO_NONE) }, From e43d3143ad116b0559bd8fc0f2948d2e60404343 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 3 Jan 2019 11:26:17 +0800 Subject: [PATCH 1392/2608] USB: storage: don't insert sane sense for SPC3+ when bad sense specified commit c5603d2fdb424849360fe7e3f8c1befc97571b8c upstream. Currently the code will set US_FL_SANE_SENSE flag unconditionally if device claims SPC3+, however we should allow US_FL_BAD_SENSE flag to prevent this behavior, because SMI SM3350 UFS-USB bridge controller, which claims SPC4, will show strange behavior with 96-byte sense (put the chip into a wrong state that cannot read/write anything). Check the presence of US_FL_BAD_SENSE when assuming US_FL_SANE_SENSE on SPC4+ devices. Signed-off-by: Icenowy Zheng Cc: stable Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/scsiglue.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 344ec8631481..13f2c051dbf2 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -251,8 +251,12 @@ static int slave_configure(struct scsi_device *sdev) if (!(us->fflags & US_FL_NEEDS_CAP16)) sdev->try_rc_10_first = 1; - /* assume SPC3 or latter devices support sense size > 18 */ - if (sdev->scsi_level > SCSI_SPC_2) + /* + * assume SPC3 or latter devices support sense size > 18 + * unless US_FL_BAD_SENSE quirk is specified. + */ + if (sdev->scsi_level > SCSI_SPC_2 && + !(us->fflags & US_FL_BAD_SENSE)) us->fflags |= US_FL_SANE_SENSE; /* From e8497e3b0b7b165d728941eb652bef6eb7684e4a Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 3 Jan 2019 11:26:18 +0800 Subject: [PATCH 1393/2608] USB: storage: add quirk for SMI SM3350 commit 0a99cc4b8ee83885ab9f097a3737d1ab28455ac0 upstream. The SMI SM3350 USB-UFS bridge controller cannot handle long sense request correctly and will make the chip refuse to do read/write when requested long sense. Add a bad sense quirk for it. Signed-off-by: Icenowy Zheng Cc: stable Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 5e9b35a91431..0fa88daed149 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1284,6 +1284,18 @@ UNUSUAL_DEV( 0x090c, 0x1132, 0x0000, 0xffff, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_FIX_CAPACITY ), +/* + * Reported by Icenowy Zheng + * The SMI SM3350 USB-UFS bridge controller will enter a wrong state + * that do not process read/write command if a long sense is requested, + * so force to use 18-byte sense. + */ +UNUSUAL_DEV( 0x090c, 0x3350, 0x0000, 0xffff, + "SMI", + "SM3350 UFS-to-USB-Mass-Storage bridge", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_BAD_SENSE ), + /* * Reported by Paul Hartman * This card reader returns "Illegal Request, Logical Block Address From ec197f07d3eaee6d78228760bf6ee3ca40cff2b2 Mon Sep 17 00:00:00 2001 From: Jack Stocker Date: Thu, 3 Jan 2019 21:56:53 +0000 Subject: [PATCH 1394/2608] USB: Add USB_QUIRK_DELAY_CTRL_MSG quirk for Corsair K70 RGB commit 3483254b89438e60f719937376c5e0ce2bc46761 upstream. To match the Corsair Strafe RGB, the Corsair K70 RGB also requires USB_QUIRK_DELAY_CTRL_MSG to completely resolve boot connection issues discussed here: https://github.com/ckb-next/ckb-next/issues/42. Otherwise roughly 1 in 10 boots the keyboard will fail to be detected. Patch that applied delay control quirk for Corsair Strafe RGB: cb88a0588717 ("usb: quirks: add control message delay for 1b1c:1b20") Previous K70 RGB patch to add delay-init quirk: 7a1646d92257 ("Add delay-init quirk for Corsair K70 RGB keyboards") Signed-off-by: Jack Stocker Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index cf378b1ed373..733479ddf8a7 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -240,7 +240,8 @@ static const struct usb_device_id usb_quirk_list[] = { USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, /* Corsair K70 RGB */ - { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT | + USB_QUIRK_DELAY_CTRL_MSG }, /* Corsair Strafe */ { USB_DEVICE(0x1b1c, 0x1b15), .driver_info = USB_QUIRK_DELAY_INIT | From 90bcbcfb745059e555943090ff3f2f615d360e98 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 8 Jan 2019 15:23:00 -0800 Subject: [PATCH 1395/2608] slab: alien caches must not be initialized if the allocation of the alien cache failed commit 09c2e76ed734a1d36470d257a778aaba28e86531 upstream. Callers of __alloc_alien() check for NULL. We must do the same check in __alloc_alien_cache to avoid NULL pointer dereferences on allocation failures. Link: http://lkml.kernel.org/r/010001680f42f192-82b4e12e-1565-4ee0-ae1f-1e98974906aa-000000@email.amazonses.com Fixes: 49dfc304ba241 ("slab: use the lock on alien_cache, instead of the lock on array_cache") Fixes: c8522a3a5832b ("Slab: introduce alloc_alien") Signed-off-by: Christoph Lameter Reported-by: syzbot+d6ed4ec679652b4fd4e4@syzkaller.appspotmail.com Reviewed-by: Andrew Morton Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slab.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 68ab88e2920e..09df506ae830 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -679,8 +679,10 @@ static struct alien_cache *__alloc_alien_cache(int node, int entries, struct alien_cache *alc = NULL; alc = kmalloc_node(memsize, gfp, node); - init_arraycache(&alc->ac, entries, batch); - spin_lock_init(&alc->lock); + if (alc) { + init_arraycache(&alc->ac, entries, batch); + spin_lock_init(&alc->lock); + } return alc; } From e973b3929a13c235a474532e640ddff426c5346e Mon Sep 17 00:00:00 2001 From: Jan Stancek Date: Tue, 8 Jan 2019 15:23:28 -0800 Subject: [PATCH 1396/2608] mm: page_mapped: don't assume compound page is huge or THP commit 8ab88c7169b7fba98812ead6524b9d05bc76cf00 upstream. LTP proc01 testcase has been observed to rarely trigger crashes on arm64: page_mapped+0x78/0xb4 stable_page_flags+0x27c/0x338 kpageflags_read+0xfc/0x164 proc_reg_read+0x7c/0xb8 __vfs_read+0x58/0x178 vfs_read+0x90/0x14c SyS_read+0x60/0xc0 The issue is that page_mapped() assumes that if compound page is not huge, then it must be THP. But if this is 'normal' compound page (COMPOUND_PAGE_DTOR), then following loop can keep running (for HPAGE_PMD_NR iterations) until it tries to read from memory that isn't mapped and triggers a panic: for (i = 0; i < hpage_nr_pages(page); i++) { if (atomic_read(&page[i]._mapcount) >= 0) return true; } I could replicate this on x86 (v4.20-rc4-98-g60b548237fed) only with a custom kernel module [1] which: - allocates compound page (PAGEC) of order 1 - allocates 2 normal pages (COPY), which are initialized to 0xff (to satisfy _mapcount >= 0) - 2 PAGEC page structs are copied to address of first COPY page - second page of COPY is marked as not present - call to page_mapped(COPY) now triggers fault on access to 2nd COPY page at offset 0x30 (_mapcount) [1] https://github.com/jstancek/reproducers/blob/master/kernel/page_mapped_crash/repro.c Fix the loop to iterate for "1 << compound_order" pages. Kirrill said "IIRC, sound subsystem can producuce custom mapped compound pages". Link: http://lkml.kernel.org/r/c440d69879e34209feba21e12d236d06bc0a25db.1543577156.git.jstancek@redhat.com Fixes: e1534ae95004 ("mm: differentiate page_mapped() from page_mapcount() for compound pages") Signed-off-by: Jan Stancek Debugged-by: Laszlo Ersek Suggested-by: "Kirill A. Shutemov" Acked-by: Michal Hocko Acked-by: Kirill A. Shutemov Reviewed-by: David Hildenbrand Reviewed-by: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/util.c b/mm/util.c index 547e04b5cfff..842ba5fb662e 100644 --- a/mm/util.c +++ b/mm/util.c @@ -449,7 +449,7 @@ bool page_mapped(struct page *page) return true; if (PageHuge(page)) return false; - for (i = 0; i < hpage_nr_pages(page); i++) { + for (i = 0; i < (1 << compound_order(page)); i++) { if (atomic_read(&page[i]._mapcount) >= 0) return true; } From 8c4da1134aa39b1cd9f77be5df349dc08f3aa569 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 8 Jan 2019 15:23:07 -0800 Subject: [PATCH 1397/2608] mm, memcg: fix reclaim deadlock with writeback commit 63f3655f950186752236bb88a22f8252c11ce394 upstream. Liu Bo has experienced a deadlock between memcg (legacy) reclaim and the ext4 writeback task1: wait_on_page_bit+0x82/0xa0 shrink_page_list+0x907/0x960 shrink_inactive_list+0x2c7/0x680 shrink_node_memcg+0x404/0x830 shrink_node+0xd8/0x300 do_try_to_free_pages+0x10d/0x330 try_to_free_mem_cgroup_pages+0xd5/0x1b0 try_charge+0x14d/0x720 memcg_kmem_charge_memcg+0x3c/0xa0 memcg_kmem_charge+0x7e/0xd0 __alloc_pages_nodemask+0x178/0x260 alloc_pages_current+0x95/0x140 pte_alloc_one+0x17/0x40 __pte_alloc+0x1e/0x110 alloc_set_pte+0x5fe/0xc20 do_fault+0x103/0x970 handle_mm_fault+0x61e/0xd10 __do_page_fault+0x252/0x4d0 do_page_fault+0x30/0x80 page_fault+0x28/0x30 task2: __lock_page+0x86/0xa0 mpage_prepare_extent_to_map+0x2e7/0x310 [ext4] ext4_writepages+0x479/0xd60 do_writepages+0x1e/0x30 __writeback_single_inode+0x45/0x320 writeback_sb_inodes+0x272/0x600 __writeback_inodes_wb+0x92/0xc0 wb_writeback+0x268/0x300 wb_workfn+0xb4/0x390 process_one_work+0x189/0x420 worker_thread+0x4e/0x4b0 kthread+0xe6/0x100 ret_from_fork+0x41/0x50 He adds "task1 is waiting for the PageWriteback bit of the page that task2 has collected in mpd->io_submit->io_bio, and tasks2 is waiting for the LOCKED bit the page which tasks1 has locked" More precisely task1 is handling a page fault and it has a page locked while it charges a new page table to a memcg. That in turn hits a memory limit reclaim and the memcg reclaim for legacy controller is waiting on the writeback but that is never going to finish because the writeback itself is waiting for the page locked in the #PF path. So this is essentially ABBA deadlock: lock_page(A) SetPageWriteback(A) unlock_page(A) lock_page(B) lock_page(B) pte_alloc_pne shrink_page_list wait_on_page_writeback(A) SetPageWriteback(B) unlock_page(B) # flush A, B to clear the writeback This accumulating of more pages to flush is used by several filesystems to generate a more optimal IO patterns. Waiting for the writeback in legacy memcg controller is a workaround for pre-mature OOM killer invocations because there is no dirty IO throttling available for the controller. There is no easy way around that unfortunately. Therefore fix this specific issue by pre-allocating the page table outside of the page lock. We have that handy infrastructure for that already so simply reuse the fault-around pattern which already does this. There are probably other hidden __GFP_ACCOUNT | GFP_KERNEL allocations from under a fs page locked but they should be really rare. I am not aware of a better solution unfortunately. [akpm@linux-foundation.org: fix mm/memory.c:__do_fault()] [akpm@linux-foundation.org: coding-style fixes] [mhocko@kernel.org: enhance comment, per Johannes] Link: http://lkml.kernel.org/r/20181214084948.GA5624@dhcp22.suse.cz Link: http://lkml.kernel.org/r/20181213092221.27270-1-mhocko@kernel.org Fixes: c3b94f44fcb0 ("memcg: further prevent OOM with too many dirty pages") Signed-off-by: Michal Hocko Reported-by: Liu Bo Debugged-by: Liu Bo Acked-by: Kirill A. Shutemov Acked-by: Johannes Weiner Reviewed-by: Liu Bo Cc: Jan Kara Cc: Dave Chinner Cc: Theodore Ts'o Cc: Vladimir Davydov Cc: Shakeel Butt Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index b6cfe0cf0ead..fb9f7737c1ff 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3191,6 +3191,29 @@ static int __do_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; int ret; + /* + * Preallocate pte before we take page_lock because this might lead to + * deadlocks for memcg reclaim which waits for pages under writeback: + * lock_page(A) + * SetPageWriteback(A) + * unlock_page(A) + * lock_page(B) + * lock_page(B) + * pte_alloc_pne + * shrink_page_list + * wait_on_page_writeback(A) + * SetPageWriteback(B) + * unlock_page(B) + * # flush A, B to clear the writeback + */ + if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) { + vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm, + vmf->address); + if (!vmf->prealloc_pte) + return VM_FAULT_OOM; + smp_wmb(); /* See comment in __pte_alloc() */ + } + ret = vma->vm_ops->fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | VM_FAULT_DONE_COW))) From ec697eb3b840abae279a59725223d4a10dee8938 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 30 Dec 2018 18:25:00 +0100 Subject: [PATCH 1398/2608] ACPI: power: Skip duplicate power resource references in _PRx commit 7d7b467cb95bf29597b417d4990160d4ea6d69b9 upstream. Some ACPI tables contain duplicate power resource references like this: Name (_PR0, Package (0x04) // _PR0: Power Resources for D0 { P28P, P18P, P18P, CLK4 }) This causes a WARN_ON in sysfs_add_link_to_group() because we end up adding a link to the same acpi_device twice: sysfs: cannot create duplicate filename '/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0A08:00/808622C1:00/OVTI2680:00/power_resources_D0/LNXPOWER:0a' CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.12-301.fc29.x86_64 #1 Hardware name: Insyde CherryTrail/Type2 - Board Product Name, BIOS jumperx.T87.KFBNEEA02 04/13/2016 Call Trace: dump_stack+0x5c/0x80 sysfs_warn_dup.cold.3+0x17/0x2a sysfs_do_create_link_sd.isra.2+0xa9/0xb0 sysfs_add_link_to_group+0x30/0x50 acpi_power_expose_list+0x74/0xa0 acpi_power_add_remove_device+0x50/0xa0 acpi_add_single_object+0x26b/0x5f0 acpi_bus_check_add+0xc4/0x250 ... To address this issue, make acpi_extract_power_resources() check for duplicates and simply skip them when found. Cc: All applicable Signed-off-by: Hans de Goede [ rjw: Subject & changelog, comments ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/power.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 1b475bc1ae16..665e93ca0b40 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -131,6 +131,23 @@ void acpi_power_resources_list_free(struct list_head *list) } } +static bool acpi_power_resource_is_dup(union acpi_object *package, + unsigned int start, unsigned int i) +{ + acpi_handle rhandle, dup; + unsigned int j; + + /* The caller is expected to check the package element types */ + rhandle = package->package.elements[i].reference.handle; + for (j = start; j < i; j++) { + dup = package->package.elements[j].reference.handle; + if (dup == rhandle) + return true; + } + + return false; +} + int acpi_extract_power_resources(union acpi_object *package, unsigned int start, struct list_head *list) { @@ -150,6 +167,11 @@ int acpi_extract_power_resources(union acpi_object *package, unsigned int start, err = -ENODEV; break; } + + /* Some ACPI tables contain duplicate power resource references */ + if (acpi_power_resource_is_dup(package, start, i)) + continue; + err = acpi_add_power_resource(rhandle); if (err) break; From d3db93d2be900a920aca76a557010a90e2c4a877 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 4 Jan 2019 23:10:54 +0100 Subject: [PATCH 1399/2608] ACPI / PMIC: xpower: Fix TS-pin current-source handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2b531d71595d2b5b12782a49b23c335869e2621e upstream. The current-source used for the battery temp-sensor (TS) is shared with the GPADC. For proper fuel-gauge and charger operation the TS current-source needs to be permanently on. But to read the GPADC we need to temporary switch the TS current-source to ondemand, so that the GPADC can use it, otherwise we will always read an all 0 value. The switching from on to on-ondemand is not necessary when the TS current-source is off (this happens on devices which do not have a TS). Prior to this commit there were 2 issues with our handling of the TS current-source switching: 1) We were writing hardcoded values to the ADC TS pin-ctrl register, overwriting various other unrelated bits. Specifically we were overwriting the current-source setting for the TS and GPIO0 pins, forcing it to 80ųA independent of its original setting. On a Chuwi Vi10 tablet this was causing us to get a too high adc value (due to a too high current-source) resulting in acpi_lpat_raw_to_temp() returning -ENOENT, resulting in: ACPI Error: AE_ERROR, Returned by Handler for [UserDefinedRegion] ACPI Error: Method parse/execution failed \_SB.SXP1._TMP, AE_ERROR This commit fixes this by using regmap_update_bits to change only the relevant bits. 2) At the end of intel_xpower_pmic_get_raw_temp() we were unconditionally enabling the TS current-source even on devices where the TS-pin is not used and the current-source thus was off on entry of the function. This commit fixes this by checking if the TS current-source is off when entering intel_xpower_pmic_get_raw_temp() and if so it is left as is. Fixes: 58eefe2f3f53 (ACPI / PMIC: xpower: Do pinswitch ... reading GPADC) Signed-off-by: Hans de Goede Acked-by: Andy Shevchenko Cc: 4.14+ # 4.14+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/pmic/intel_pmic_xpower.c | 41 +++++++++++++++++++++------ 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/pmic/intel_pmic_xpower.c b/drivers/acpi/pmic/intel_pmic_xpower.c index 6c99d3f81095..8735ac99566f 100644 --- a/drivers/acpi/pmic/intel_pmic_xpower.c +++ b/drivers/acpi/pmic/intel_pmic_xpower.c @@ -27,8 +27,11 @@ #define GPI1_LDO_ON (3 << 0) #define GPI1_LDO_OFF (4 << 0) -#define AXP288_ADC_TS_PIN_GPADC 0xf2 -#define AXP288_ADC_TS_PIN_ON 0xf3 +#define AXP288_ADC_TS_CURRENT_ON_OFF_MASK GENMASK(1, 0) +#define AXP288_ADC_TS_CURRENT_OFF (0 << 0) +#define AXP288_ADC_TS_CURRENT_ON_WHEN_CHARGING (1 << 0) +#define AXP288_ADC_TS_CURRENT_ON_ONDEMAND (2 << 0) +#define AXP288_ADC_TS_CURRENT_ON (3 << 0) static struct pmic_table power_table[] = { { @@ -211,22 +214,44 @@ static int intel_xpower_pmic_update_power(struct regmap *regmap, int reg, */ static int intel_xpower_pmic_get_raw_temp(struct regmap *regmap, int reg) { + int ret, adc_ts_pin_ctrl; u8 buf[2]; - int ret; - ret = regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, - AXP288_ADC_TS_PIN_GPADC); + /* + * The current-source used for the battery temp-sensor (TS) is shared + * with the GPADC. For proper fuel-gauge and charger operation the TS + * current-source needs to be permanently on. But to read the GPADC we + * need to temporary switch the TS current-source to ondemand, so that + * the GPADC can use it, otherwise we will always read an all 0 value. + * + * Note that the switching from on to on-ondemand is not necessary + * when the TS current-source is off (this happens on devices which + * do not use the TS-pin). + */ + ret = regmap_read(regmap, AXP288_ADC_TS_PIN_CTRL, &adc_ts_pin_ctrl); if (ret) return ret; - /* After switching to the GPADC pin give things some time to settle */ - usleep_range(6000, 10000); + if (adc_ts_pin_ctrl & AXP288_ADC_TS_CURRENT_ON_OFF_MASK) { + ret = regmap_update_bits(regmap, AXP288_ADC_TS_PIN_CTRL, + AXP288_ADC_TS_CURRENT_ON_OFF_MASK, + AXP288_ADC_TS_CURRENT_ON_ONDEMAND); + if (ret) + return ret; + + /* Wait a bit after switching the current-source */ + usleep_range(6000, 10000); + } ret = regmap_bulk_read(regmap, AXP288_GP_ADC_H, buf, 2); if (ret == 0) ret = (buf[0] << 4) + ((buf[1] >> 4) & 0x0f); - regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, AXP288_ADC_TS_PIN_ON); + if (adc_ts_pin_ctrl & AXP288_ADC_TS_CURRENT_ON_OFF_MASK) { + regmap_update_bits(regmap, AXP288_ADC_TS_PIN_CTRL, + AXP288_ADC_TS_CURRENT_ON_OFF_MASK, + AXP288_ADC_TS_CURRENT_ON); + } return ret; } From 78e5ef1add51528d0e3818bacdb4739fc6661c78 Mon Sep 17 00:00:00 2001 From: Yi Zeng Date: Wed, 9 Jan 2019 15:33:07 +0800 Subject: [PATCH 1400/2608] i2c: dev: prevent adapter retries and timeout being set as minus value commit 6ebec961d59bccf65d08b13fc1ad4e6272a89338 upstream. If adapter->retries is set to a minus value from user space via ioctl, it will make __i2c_transfer and __i2c_smbus_xfer skip the calling to adapter->algo->master_xfer and adapter->algo->smbus_xfer that is registered by the underlying bus drivers, and return value 0 to all the callers. The bus driver will never be accessed anymore by all users, besides, the users may still get successful return value without any error or information log print out. If adapter->timeout is set to minus value from user space via ioctl, it will make the retrying loop in __i2c_transfer and __i2c_smbus_xfer always break after the the first try, due to the time_after always returns true. Signed-off-by: Yi Zeng [wsa: minor grammar updates to commit message] Signed-off-by: Wolfram Sang Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-dev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 6f638bbc922d..00e8e675cbeb 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -461,9 +461,15 @@ static long i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return i2cdev_ioctl_smbus(client, arg); case I2C_RETRIES: + if (arg > INT_MAX) + return -EINVAL; + client->adapter->retries = arg; break; case I2C_TIMEOUT: + if (arg > INT_MAX) + return -EINVAL; + /* For historical reasons, user-space sets the timeout * value in units of 10 ms. */ From 8b99f1705b5d545c23fc629e51063915e7bb3fce Mon Sep 17 00:00:00 2001 From: Ivan Mironov Date: Tue, 8 Jan 2019 12:23:52 +0500 Subject: [PATCH 1401/2608] drm/fb-helper: Partially bring back workaround for bugs of SDL 1.2 commit 62d85b3bf9d978ed4b6b2aeef5cf0ccf1423906e upstream. SDL 1.2 sets all fields related to the pixel format to zero in some cases[1]. Prior to commit db05c48197759 ("drm: fb-helper: Reject all pixel format changing requests"), there was an unintentional workaround for this that existed for more than a decade. First in device-specific DRM drivers, then here in drm_fb_helper.c. Previous code containing this workaround just ignores pixel format fields from userspace code. Not a good thing either, as this way, driver may silently use pixel format different from what client actually requested, and this in turn will lead to displaying garbage on the screen. I think that returning EINVAL to userspace in this particular case is the right option, so I decided to left code from problematic commit untouched instead of just reverting it entirely. Here is the steps required to reproduce this problem exactly: 1) Compile fceux[2] with SDL 1.2.15 and without GTK or OpenGL support. SDL should be compiled with fbdev support (which is on by default). 2) Create /etc/fb.modes with following contents (values seems not used, and just required to trigger problematic code in SDL): mode "test" geometry 1 1 1 1 1 timings 1 1 1 1 1 1 1 endmode 3) Create ~/.fceux/fceux.cfg with following contents: SDL.Hotkeys.Quit = 27 SDL.DoubleBuffering = 1 4) Ensure that screen resolution is at least 1280x960 (e.g. append "video=Virtual-1:1280x960-32" to the kernel cmdline for qemu/QXL). 5) Try to run fceux on VT with some ROM file[3]: # ./fceux color_test.nes [1] SDL 1.2.15 source code, src/video/fbcon/SDL_fbvideo.c, FB_SetVideoMode() [2] http://www.fceux.com [3] Example ROM: https://github.com/bokuweb/rustynes/blob/master/roms/color_test.nes Reported-by: saahriktu Suggested-by: saahriktu Cc: stable@vger.kernel.org Fixes: db05c48197759 ("drm: fb-helper: Reject all pixel format changing requests") Signed-off-by: Ivan Mironov [danvet: Delete misleading comment.] Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20190108072353.28078-2-mironov.ivan@gmail.com Link: https://patchwork.freedesktop.org/patch/msgid/20190108072353.28078-2-mironov.ivan@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_fb_helper.c | 126 ++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 29d1d3df3164..ad6812baa611 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1509,6 +1509,64 @@ static bool drm_fb_pixel_format_equal(const struct fb_var_screeninfo *var_1, var_1->transp.msb_right == var_2->transp.msb_right; } +static void drm_fb_helper_fill_pixel_fmt(struct fb_var_screeninfo *var, + u8 depth) +{ + switch (depth) { + case 8: + var->red.offset = 0; + var->green.offset = 0; + var->blue.offset = 0; + var->red.length = 8; /* 8bit DAC */ + var->green.length = 8; + var->blue.length = 8; + var->transp.offset = 0; + var->transp.length = 0; + break; + case 15: + var->red.offset = 10; + var->green.offset = 5; + var->blue.offset = 0; + var->red.length = 5; + var->green.length = 5; + var->blue.length = 5; + var->transp.offset = 15; + var->transp.length = 1; + break; + case 16: + var->red.offset = 11; + var->green.offset = 5; + var->blue.offset = 0; + var->red.length = 5; + var->green.length = 6; + var->blue.length = 5; + var->transp.offset = 0; + break; + case 24: + var->red.offset = 16; + var->green.offset = 8; + var->blue.offset = 0; + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + var->transp.offset = 0; + var->transp.length = 0; + break; + case 32: + var->red.offset = 16; + var->green.offset = 8; + var->blue.offset = 0; + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + var->transp.offset = 24; + var->transp.length = 8; + break; + default: + break; + } +} + /** * drm_fb_helper_check_var - implementation for &fb_ops.fb_check_var * @var: screeninfo to check @@ -1538,6 +1596,20 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, return -EINVAL; } + /* + * Workaround for SDL 1.2, which is known to be setting all pixel format + * fields values to zero in some cases. We treat this situation as a + * kind of "use some reasonable autodetected values". + */ + if (!var->red.offset && !var->green.offset && + !var->blue.offset && !var->transp.offset && + !var->red.length && !var->green.length && + !var->blue.length && !var->transp.length && + !var->red.msb_right && !var->green.msb_right && + !var->blue.msb_right && !var->transp.msb_right) { + drm_fb_helper_fill_pixel_fmt(var, fb->format->depth); + } + /* * drm fbdev emulation doesn't support changing the pixel format at all, * so reject all pixel format changing requests. @@ -1848,59 +1920,7 @@ void drm_fb_helper_fill_var(struct fb_info *info, struct drm_fb_helper *fb_helpe info->var.yoffset = 0; info->var.activate = FB_ACTIVATE_NOW; - switch (fb->format->depth) { - case 8: - info->var.red.offset = 0; - info->var.green.offset = 0; - info->var.blue.offset = 0; - info->var.red.length = 8; /* 8bit DAC */ - info->var.green.length = 8; - info->var.blue.length = 8; - info->var.transp.offset = 0; - info->var.transp.length = 0; - break; - case 15: - info->var.red.offset = 10; - info->var.green.offset = 5; - info->var.blue.offset = 0; - info->var.red.length = 5; - info->var.green.length = 5; - info->var.blue.length = 5; - info->var.transp.offset = 15; - info->var.transp.length = 1; - break; - case 16: - info->var.red.offset = 11; - info->var.green.offset = 5; - info->var.blue.offset = 0; - info->var.red.length = 5; - info->var.green.length = 6; - info->var.blue.length = 5; - info->var.transp.offset = 0; - break; - case 24: - info->var.red.offset = 16; - info->var.green.offset = 8; - info->var.blue.offset = 0; - info->var.red.length = 8; - info->var.green.length = 8; - info->var.blue.length = 8; - info->var.transp.offset = 0; - info->var.transp.length = 0; - break; - case 32: - info->var.red.offset = 16; - info->var.green.offset = 8; - info->var.blue.offset = 0; - info->var.red.length = 8; - info->var.green.length = 8; - info->var.blue.length = 8; - info->var.transp.offset = 24; - info->var.transp.length = 8; - break; - default: - break; - } + drm_fb_helper_fill_pixel_fmt(&info->var, fb->format->depth); info->var.xres = fb_width; info->var.yres = fb_height; From 022ce60cc621c84df6d0272984e791f60983fd7a Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 8 Jan 2019 19:47:38 +0100 Subject: [PATCH 1402/2608] rbd: don't return 0 on unmap if RBD_DEV_FLAG_REMOVING is set commit 85f5a4d666fd9be73856ed16bb36c5af5b406b29 upstream. There is a window between when RBD_DEV_FLAG_REMOVING is set and when the device is removed from rbd_dev_list. During this window, we set "already" and return 0. Returning 0 from write(2) can confuse userspace tools because 0 indicates that nothing was written. In particular, "rbd unmap" will retry the write multiple times a second: 10:28:05.463299 write(4, "0", 1) = 0 10:28:05.463509 write(4, "0", 1) = 0 10:28:05.463720 write(4, "0", 1) = 0 10:28:05.463942 write(4, "0", 1) = 0 10:28:05.464155 write(4, "0", 1) = 0 Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Tested-by: Dongsheng Yang Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 9057dad2a64c..f2b1994d58a0 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -6308,7 +6308,6 @@ static ssize_t do_rbd_remove(struct bus_type *bus, struct list_head *tmp; int dev_id; char opt_buf[6]; - bool already = false; bool force = false; int ret; @@ -6341,13 +6340,13 @@ static ssize_t do_rbd_remove(struct bus_type *bus, spin_lock_irq(&rbd_dev->lock); if (rbd_dev->open_count && !force) ret = -EBUSY; - else - already = test_and_set_bit(RBD_DEV_FLAG_REMOVING, - &rbd_dev->flags); + else if (test_and_set_bit(RBD_DEV_FLAG_REMOVING, + &rbd_dev->flags)) + ret = -EINPROGRESS; spin_unlock_irq(&rbd_dev->lock); } spin_unlock(&rbd_dev_list_lock); - if (ret < 0 || already) + if (ret) return ret; if (force) { From eb13c60d82db21411ba2ea4557494450a9d6fa11 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 24 Dec 2018 20:27:08 -0500 Subject: [PATCH 1403/2608] ext4: make sure enough credits are reserved for dioread_nolock writes commit 812c0cab2c0dfad977605dbadf9148490ca5d93f upstream. There are enough credits reserved for most dioread_nolock writes; however, if the extent tree is sufficiently deep, and/or quota is enabled, the code was not allowing for all eventualities when reserving journal credits for the unwritten extent conversion. This problem can be seen using xfstests ext4/034: WARNING: CPU: 1 PID: 257 at fs/ext4/ext4_jbd2.c:271 __ext4_handle_dirty_metadata+0x10c/0x180 Workqueue: ext4-rsv-conversion ext4_end_io_rsv_work RIP: 0010:__ext4_handle_dirty_metadata+0x10c/0x180 ... EXT4-fs: ext4_free_blocks:4938: aborting transaction: error 28 in __ext4_handle_dirty_metadata EXT4: jbd2_journal_dirty_metadata failed: handle type 11 started at line 4921, credits 4/0, errcode -28 EXT4-fs error (device dm-1) in ext4_free_blocks:4950: error 28 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 22c9bb8c671f..5eb28dcaa0f0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2776,7 +2776,8 @@ static int ext4_writepages(struct address_space *mapping, * We may need to convert up to one extent per block in * the page and we may dirty the inode. */ - rsv_blocks = 1 + (PAGE_SIZE >> inode->i_blkbits); + rsv_blocks = 1 + ext4_chunk_trans_blocks(inode, + PAGE_SIZE >> inode->i_blkbits); } /* From b4727299f58df8c7feaac247f7b841b610b5cbaf Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 25 Dec 2018 00:56:33 -0500 Subject: [PATCH 1404/2608] ext4: fix a potential fiemap/page fault deadlock w/ inline_data commit 2b08b1f12cd664dc7d5c84ead9ff25ae97ad5491 upstream. The ext4_inline_data_fiemap() function calls fiemap_fill_next_extent() while still holding the xattr semaphore. This is not necessary and it triggers a circular lockdep warning. This is because fiemap_fill_next_extent() could trigger a page fault when it writes into page which triggers a page fault. If that page is mmaped from the inline file in question, this could very well result in a deadlock. This problem can be reproduced using generic/519 with a file system configuration which has the inline_data feature enabled. Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 40a2d1a428c2..137c752ab985 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1864,12 +1864,12 @@ int ext4_inline_data_fiemap(struct inode *inode, physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; physical += offsetof(struct ext4_inode, i_block); - if (physical) - error = fiemap_fill_next_extent(fieinfo, start, physical, - inline_len, flags); brelse(iloc.bh); out: up_read(&EXT4_I(inode)->xattr_sem); + if (physical) + error = fiemap_fill_next_extent(fieinfo, start, physical, + inline_len, flags); return (error < 0 ? error : 0); } From b17971aeaf144ec627b0516ecc990c0a3fd278fe Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 30 Dec 2018 23:20:39 -0500 Subject: [PATCH 1405/2608] ext4: avoid kernel warning when writing the superblock to a dead device commit e86807862e6880809f191c4cea7f88a489f0ed34 upstream. The xfstests generic/475 test switches the underlying device with dm-error while running a stress test. This results in a large number of file system errors, and since we can't lock the buffer head when marking the superblock dirty in the ext4_grp_locked_error() case, it's possible the superblock to be !buffer_uptodate() without buffer_write_io_error() being true. We need to set buffer_uptodate() before we call mark_buffer_dirty() or this will trigger a WARN_ON. It's safe to do this since the superblock must have been properly read into memory or the mount would have been successful. So if buffer_uptodate() is not set, we can safely assume that this happened due to a failed attempt to write the superblock. Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 77300b8ca211..d0049064f62f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4844,7 +4844,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) ext4_superblock_csum_set(sb); if (sync) lock_buffer(sbh); - if (buffer_write_io_error(sbh)) { + if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { /* * Oh, dear. A previous attempt to write the * superblock failed. This could happen because the From 82f71b8bc05c6866b7dfae96ebca5ee102f76afa Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 31 Dec 2018 00:10:48 -0500 Subject: [PATCH 1406/2608] ext4: use ext4_write_inode() when fsyncing w/o a journal commit ad211f3e94b314a910d4af03178a0b52a7d1ee0a upstream. In no-journal mode, we previously used __generic_file_fsync() in no-journal mode. This triggers a lockdep warning, and in addition, it's not safe to depend on the inode writeback mechanism in the case ext4. We can solve both problems by calling ext4_write_inode() directly. Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/fsync.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 26a7fe5c4fd3..87a7ff00ef62 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -116,8 +116,16 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } + ret = file_write_and_wait_range(file, start, end); + if (ret) + return ret; + if (!journal) { - ret = __generic_file_fsync(file, start, end, datasync); + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL + }; + + ret = ext4_write_inode(inode, &wbc); if (!ret) ret = ext4_sync_parent(inode); if (test_opt(inode->i_sb, BARRIER)) @@ -125,9 +133,6 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } - ret = file_write_and_wait_range(file, start, end); - if (ret) - return ret; /* * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. From 5903fc6477b1aed7db1165a7f328948e6ef93ef1 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 31 Dec 2018 00:11:07 -0500 Subject: [PATCH 1407/2608] ext4: track writeback errors using the generic tracking infrastructure commit 95cb67138746451cc84cf8e516e14989746e93b0 upstream. We already using mapping_set_error() in fs/ext4/page_io.c, so all we need to do is to use file_check_and_advance_wb_err() when handling fsync() requests in ext4_sync_file(). Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/fsync.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 87a7ff00ef62..712f00995390 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -164,6 +164,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ret = err; } out: + err = file_check_and_advance_wb_err(file); + if (ret == 0) + ret = err; trace_ext4_sync_file_exit(inode, ret); return ret; } From 65dba32522065b79a16393efc75f8006c2c3dbb8 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 24 Dec 2018 14:44:52 +0300 Subject: [PATCH 1408/2608] sunrpc: use-after-free in svc_process_common() commit d4b09acf924b84bae77cad090a9d108e70b43643 upstream. if node have NFSv41+ mounts inside several net namespaces it can lead to use-after-free in svc_process_common() svc_process_common() /* Setup reply header */ rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); <<< HERE svc_process_common() can use incorrect rqstp->rq_xprt, its caller function bc_svc_process() takes it from serv->sv_bc_xprt. The problem is that serv is global structure but sv_bc_xprt is assigned per-netnamespace. According to Trond, the whole "let's set up rqstp->rq_xprt for the back channel" is nothing but a giant hack in order to work around the fact that svc_process_common() uses it to find the xpt_ops, and perform a couple of (meaningless for the back channel) tests of xpt_flags. All we really need in svc_process_common() is to be able to run rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr() Bruce J Fields points that this xpo_prep_reply_hdr() call is an awfully roundabout way just to do "svc_putnl(resv, 0);" in the tcp case. This patch does not initialiuze rqstp->rq_xprt in bc_svc_process(), now it calls svc_process_common() with rqstp->rq_xprt = NULL. To adjust reply header svc_process_common() just check rqstp->rq_prot and calls svc_tcp_prep_reply_hdr() for tcp case. To handle rqstp->rq_xprt = NULL case in functions called from svc_process_common() patch intruduces net namespace pointer svc_rqst->rq_bc_net and adjust SVC_NET() definition. Some other function was also adopted to properly handle described case. Signed-off-by: Vasily Averin Cc: stable@vger.kernel.org Fixes: 23c20ecd4475 ("NFS: callback up - users counting cleanup") Signed-off-by: J. Bruce Fields v2: - added lost extern svc_tcp_prep_reply_hdr() - dropped trace_svc_process() changes Signed-off-by: Vasily Averin Signed-off-by: Greg Kroah-Hartman --- include/linux/sunrpc/svc.h | 5 ++++- net/sunrpc/svc.c | 11 +++++++---- net/sunrpc/svc_xprt.c | 5 +++-- net/sunrpc/svcsock.c | 2 +- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3b9f0d1dbb80..e1aa80c4d6db 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -292,9 +292,12 @@ struct svc_rqst { struct svc_cacherep * rq_cacherep; /* cache info */ struct task_struct *rq_task; /* service thread */ spinlock_t rq_lock; /* per-request lock */ + struct net *rq_bc_net; /* pointer to backchannel's + * net namespace + */ }; -#define SVC_NET(svc_rqst) (svc_rqst->rq_xprt->xpt_net) +#define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) /* * Rigorous type checking on sockaddr type conversions diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index aa04666f929d..3a9a03717212 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1144,6 +1144,8 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {} #endif +extern void svc_tcp_prep_reply_hdr(struct svc_rqst *); + /* * Common routine for processing the RPC request. */ @@ -1172,7 +1174,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) clear_bit(RQ_DROPME, &rqstp->rq_flags); /* Setup reply header */ - rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); + if (rqstp->rq_prot == IPPROTO_TCP) + svc_tcp_prep_reply_hdr(rqstp); svc_putu32(resv, rqstp->rq_xid); @@ -1244,7 +1247,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) * for lower versions. RPC_PROG_MISMATCH seems to be the closest * fit. */ - if (versp->vs_need_cong_ctrl && + if (versp->vs_need_cong_ctrl && rqstp->rq_xprt && !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags)) goto err_bad_vers; @@ -1335,7 +1338,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) return 0; close: - if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) + if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) svc_close_xprt(rqstp->rq_xprt); dprintk("svc: svc_process close\n"); return 0; @@ -1462,10 +1465,10 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, dprintk("svc: %s(%p)\n", __func__, req); /* Build the svc_rqst used by the common processing routine */ - rqstp->rq_xprt = serv->sv_bc_xprt; rqstp->rq_xid = req->rq_xid; rqstp->rq_prot = req->rq_xprt->prot; rqstp->rq_server = serv; + rqstp->rq_bc_net = req->rq_xprt->xprt_net; rqstp->rq_addrlen = sizeof(req->rq_xprt->addr); memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index ea7b5a3a53f0..7e5f849b44cd 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -510,10 +510,11 @@ out: */ void svc_reserve(struct svc_rqst *rqstp, int space) { + struct svc_xprt *xprt = rqstp->rq_xprt; + space += rqstp->rq_res.head[0].iov_len; - if (space < rqstp->rq_reserved) { - struct svc_xprt *xprt = rqstp->rq_xprt; + if (xprt && space < rqstp->rq_reserved) { atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved); rqstp->rq_reserved = space; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c83df30e9655..d6771f3b715b 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1207,7 +1207,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) /* * Setup response header. TCP has a 4B record length field. */ -static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) +void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) { struct kvec *resv = &rqstp->rq_res.head[0]; From cb754d67c084d4e673c3dd420636d7e8eb81b024 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 11 Dec 2018 13:23:57 +0100 Subject: [PATCH 1409/2608] KVM: arm/arm64: Fix VMID alloc race by reverting to lock-less commit fb544d1ca65a89f7a3895f7531221ceeed74ada7 upstream. We recently addressed a VMID generation race by introducing a read/write lock around accesses and updates to the vmid generation values. However, kvm_arch_vcpu_ioctl_run() also calls need_new_vmid_gen() but does so without taking the read lock. As far as I can tell, this can lead to the same kind of race: VM 0, VCPU 0 VM 0, VCPU 1 ------------ ------------ update_vttbr (vmid 254) update_vttbr (vmid 1) // roll over read_lock(kvm_vmid_lock); force_vm_exit() local_irq_disable need_new_vmid_gen == false //because vmid gen matches enter_guest (vmid 254) kvm_arch.vttbr = : read_unlock(kvm_vmid_lock); enter_guest (vmid 1) Which results in running two VCPUs in the same VM with different VMIDs and (even worse) other VCPUs from other VMs could now allocate clashing VMID 254 from the new generation as long as VCPU 0 is not exiting. Attempt to solve this by making sure vttbr is updated before another CPU can observe the updated VMID generation. Cc: stable@vger.kernel.org Fixes: f0cf47d939d0 "KVM: arm/arm64: Close VMID generation race" Reviewed-by: Julien Thierry Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/arm.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index ed42b8cf6f5b..32aa88c19b8d 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -61,7 +61,7 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u32 kvm_next_vmid; static unsigned int kvm_vmid_bits __read_mostly; -static DEFINE_RWLOCK(kvm_vmid_lock); +static DEFINE_SPINLOCK(kvm_vmid_lock); static bool vgic_present; @@ -447,7 +447,9 @@ void force_vm_exit(const cpumask_t *mask) */ static bool need_new_vmid_gen(struct kvm *kvm) { - return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen)); + u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen); + smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */ + return unlikely(READ_ONCE(kvm->arch.vmid_gen) != current_vmid_gen); } /** @@ -462,16 +464,11 @@ static void update_vttbr(struct kvm *kvm) { phys_addr_t pgd_phys; u64 vmid; - bool new_gen; - read_lock(&kvm_vmid_lock); - new_gen = need_new_vmid_gen(kvm); - read_unlock(&kvm_vmid_lock); - - if (!new_gen) + if (!need_new_vmid_gen(kvm)) return; - write_lock(&kvm_vmid_lock); + spin_lock(&kvm_vmid_lock); /* * We need to re-check the vmid_gen here to ensure that if another vcpu @@ -479,7 +476,7 @@ static void update_vttbr(struct kvm *kvm) * use the same vmid. */ if (!need_new_vmid_gen(kvm)) { - write_unlock(&kvm_vmid_lock); + spin_unlock(&kvm_vmid_lock); return; } @@ -502,7 +499,6 @@ static void update_vttbr(struct kvm *kvm) kvm_call_hyp(__kvm_flush_vm_context); } - kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen); kvm->arch.vmid = kvm_next_vmid; kvm_next_vmid++; kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; @@ -513,7 +509,10 @@ static void update_vttbr(struct kvm *kvm) vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); kvm->arch.vttbr = pgd_phys | vmid; - write_unlock(&kvm_vmid_lock); + smp_wmb(); + WRITE_ONCE(kvm->arch.vmid_gen, atomic64_read(&kvm_vmid_gen)); + + spin_unlock(&kvm_vmid_lock); } static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) From 8979da2558a4993989542e1d2db23b426b148ae9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 16 Jan 2019 22:07:13 +0100 Subject: [PATCH 1410/2608] Linux 4.14.94 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a521e4cbd66f..e9a138dd964a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 93 +SUBLEVEL = 94 EXTRAVERSION = NAME = Petit Gorille From fe13700236298671d23dc864eda6a3166092f502 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 1 Nov 2018 00:24:46 +0000 Subject: [PATCH 1411/2608] tty/ldsem: Wake up readers after timed out down_write() commit 231f8fd0cca078bd4396dd7e380db813ac5736e2 upstream. ldsem_down_read() will sleep if there is pending writer in the queue. If the writer times out, readers in the queue should be woken up, otherwise they may miss a chance to acquire the semaphore until the last active reader will do ldsem_up_read(). There was a couple of reports where there was one active reader and other readers soft locked up: Showing all locks held in the system: 2 locks held by khungtaskd/17: #0: (rcu_read_lock){......}, at: watchdog+0x124/0x6d1 #1: (tasklist_lock){.+.+..}, at: debug_show_all_locks+0x72/0x2d3 2 locks held by askfirst/123: #0: (&tty->ldisc_sem){.+.+.+}, at: ldsem_down_read+0x46/0x58 #1: (&ldata->atomic_read_lock){+.+...}, at: n_tty_read+0x115/0xbe4 Prevent readers wait for active readers to release ldisc semaphore. Link: lkml.kernel.org/r/20171121132855.ajdv4k6swzhvktl6@wfg-t540p.sh.intel.com Link: lkml.kernel.org/r/20180907045041.GF1110@shao2-debian Cc: Jiri Slaby Cc: Peter Zijlstra Cc: stable@vger.kernel.org Reported-by: kernel test robot Signed-off-by: Dmitry Safonov Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ldsem.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c index 52b7baef4f7a..5c2cec298816 100644 --- a/drivers/tty/tty_ldsem.c +++ b/drivers/tty/tty_ldsem.c @@ -307,6 +307,16 @@ down_write_failed(struct ld_semaphore *sem, long count, long timeout) if (!locked) ldsem_atomic_update(-LDSEM_WAIT_BIAS, sem); list_del(&waiter.list); + + /* + * In case of timeout, wake up every reader who gave the right of way + * to writer. Prevent separation readers into two groups: + * one that helds semaphore and another that sleeps. + * (in case of no contention with a writer) + */ + if (!locked && list_empty(&sem->write_wait)) + __ldsem_wake_readers(sem); + raw_spin_unlock_irq(&sem->wait_lock); __set_current_state(TASK_RUNNING); From 108bf6a21ee3ea982af8682517afb63f536edcc2 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 1 Nov 2018 00:24:47 +0000 Subject: [PATCH 1412/2608] tty: Hold tty_ldisc_lock() during tty_reopen() commit 83d817f41070c48bc3eb7ec18e43000a548fca5c upstream. tty_ldisc_reinit() doesn't race with neither tty_ldisc_hangup() nor set_ldisc() nor tty_ldisc_release() as they use tty lock. But it races with anyone who expects line discipline to be the same after hoding read semaphore in tty_ldisc_ref(). We've seen the following crash on v4.9.108 stable: BUG: unable to handle kernel paging request at 0000000000002260 IP: [..] n_tty_receive_buf_common+0x5f/0x86d Workqueue: events_unbound flush_to_ldisc Call Trace: [..] n_tty_receive_buf2 [..] tty_ldisc_receive_buf [..] flush_to_ldisc [..] process_one_work [..] worker_thread [..] kthread [..] ret_from_fork tty_ldisc_reinit() should be called with ldisc_sem hold for writing, which will protect any reader against line discipline changes. Cc: Jiri Slaby Cc: stable@vger.kernel.org # b027e2298bd5 ("tty: fix data race between tty_init_dev and flush of buf") Reviewed-by: Jiri Slaby Reported-by: syzbot+3aa9784721dfb90e984d@syzkaller.appspotmail.com Tested-by: Mark Rutland Tested-by: Tetsuo Handa Signed-off-by: Dmitry Safonov Tested-by: Tycho Andersen Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 83376caa571b..8523e41ba221 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1266,15 +1266,20 @@ static int tty_reopen(struct tty_struct *tty) if (test_bit(TTY_EXCLUSIVE, &tty->flags) && !capable(CAP_SYS_ADMIN)) return -EBUSY; - tty->count++; + retval = tty_ldisc_lock(tty, 5 * HZ); + if (retval) + return retval; + tty->count++; if (tty->ldisc) - return 0; + goto out_unlock; retval = tty_ldisc_reinit(tty, tty->termios.c_line); if (retval) tty->count--; +out_unlock: + tty_ldisc_unlock(tty); return retval; } From 4086e2872f72fcc3539dcae9b6daebe29161dfc4 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 1 Nov 2018 00:24:49 +0000 Subject: [PATCH 1413/2608] tty: Simplify tty->count math in tty_reopen() commit cf62a1a13749db0d32b5cdd800ea91a4087319de upstream. As notted by Jiri, tty_ldisc_reinit() shouldn't rely on tty counter. Simplify math by increasing the counter after reinit success. Cc: Jiri Slaby Link: lkml.kernel.org/r/<20180829022353.23568-2-dima@arista.com> Suggested-by: Jiri Slaby Reviewed-by: Jiri Slaby Tested-by: Mark Rutland Signed-off-by: Dmitry Safonov Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 8523e41ba221..ae271f9edc04 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1270,16 +1270,13 @@ static int tty_reopen(struct tty_struct *tty) if (retval) return retval; - tty->count++; - if (tty->ldisc) - goto out_unlock; - - retval = tty_ldisc_reinit(tty, tty->termios.c_line); - if (retval) - tty->count--; - -out_unlock: + if (!tty->ldisc) + retval = tty_ldisc_reinit(tty, tty->termios.c_line); tty_ldisc_unlock(tty); + + if (retval == 0) + tty->count++; + return retval; } From cb7f9a466349842d39c2ccb27bd939baf333b6a4 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 9 Jan 2019 01:17:40 +0000 Subject: [PATCH 1414/2608] tty: Don't hold ldisc lock in tty_reopen() if ldisc present commit d3736d82e8169768218ee0ef68718875918091a0 upstream. Try to get reference for ldisc during tty_reopen(). If ldisc present, we don't need to do tty_ldisc_reinit() and lock the write side for line discipline semaphore. Effectively, it optimizes fast-path for tty_reopen(), but more importantly it won't interrupt ongoing IO on the tty as no ldisc change is needed. Fixes user-visible issue when tty_reopen() interrupted login process for user with a long password, observed and reported by Lukas. Fixes: c96cf923a98d ("tty: Don't block on IO when ldisc change is pending") Fixes: 83d817f41070 ("tty: Hold tty_ldisc_lock() during tty_reopen()") Cc: Jiri Slaby Reported-by: Lukas F. Hartmann Tested-by: Lukas F. Hartmann Cc: stable Signed-off-by: Dmitry Safonov Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index ae271f9edc04..417b81c67fe9 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1254,7 +1254,8 @@ static void tty_driver_remove_tty(struct tty_driver *driver, struct tty_struct * static int tty_reopen(struct tty_struct *tty) { struct tty_driver *driver = tty->driver; - int retval; + struct tty_ldisc *ld; + int retval = 0; if (driver->type == TTY_DRIVER_TYPE_PTY && driver->subtype == PTY_TYPE_MASTER) @@ -1266,13 +1267,18 @@ static int tty_reopen(struct tty_struct *tty) if (test_bit(TTY_EXCLUSIVE, &tty->flags) && !capable(CAP_SYS_ADMIN)) return -EBUSY; - retval = tty_ldisc_lock(tty, 5 * HZ); - if (retval) - return retval; + ld = tty_ldisc_ref_wait(tty); + if (ld) { + tty_ldisc_deref(ld); + } else { + retval = tty_ldisc_lock(tty, 5 * HZ); + if (retval) + return retval; - if (!tty->ldisc) - retval = tty_ldisc_reinit(tty, tty->termios.c_line); - tty_ldisc_unlock(tty); + if (!tty->ldisc) + retval = tty_ldisc_reinit(tty, tty->termios.c_line); + tty_ldisc_unlock(tty); + } if (retval == 0) tty->count++; From 39ff087b5c6be2ff0b08e617d334e5bf72a08b44 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 4 Jan 2019 15:55:26 +0100 Subject: [PATCH 1415/2608] can: gw: ensure DLC boundaries after CAN frame modification commit 0aaa81377c5a01f686bcdb8c7a6929a7bf330c68 upstream. Muyu Yu provided a POC where user root with CAP_NET_ADMIN can create a CAN frame modification rule that makes the data length code a higher value than the available CAN frame data size. In combination with a configured checksum calculation where the result is stored relatively to the end of the data (e.g. cgw_csum_xor_rel) the tail of the skb (e.g. frag_list pointer in skb_shared_info) can be rewritten which finally can cause a system crash. Michael Kubecek suggested to drop frames that have a DLC exceeding the available space after the modification process and provided a patch that can handle CAN FD frames too. Within this patch we also limit the length for the checksum calculations to the maximum of Classic CAN data length (8). CAN frames that are dropped by these additional checks are counted with the CGW_DELETED counter which indicates misconfigurations in can-gw rules. This fixes CVE-2019-3701. Reported-by: Muyu Yu Reported-by: Marcus Meissner Suggested-by: Michal Kubecek Tested-by: Muyu Yu Tested-by: Oliver Hartkopp Signed-off-by: Oliver Hartkopp Cc: linux-stable # >= v3.2 Signed-off-by: Marc Kleine-Budde Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/can/gw.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/net/can/gw.c b/net/can/gw.c index 73a02af4b5d7..5114b8f07fd4 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -416,13 +416,29 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod); - /* check for checksum updates when the CAN frame has been modified */ + /* Has the CAN frame been modified? */ if (modidx) { - if (gwj->mod.csumfunc.crc8) - (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + /* get available space for the processed CAN frame type */ + int max_len = nskb->len - offsetof(struct can_frame, data); + + /* dlc may have changed, make sure it fits to the CAN frame */ + if (cf->can_dlc > max_len) + goto out_delete; + + /* check for checksum updates in classic CAN length only */ + if (gwj->mod.csumfunc.crc8) { + if (cf->can_dlc > 8) + goto out_delete; + + (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + } + + if (gwj->mod.csumfunc.xor) { + if (cf->can_dlc > 8) + goto out_delete; - if (gwj->mod.csumfunc.xor) (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); + } } /* clear the skb timestamp if not configured the other way */ @@ -434,6 +450,14 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) gwj->dropped_frames++; else gwj->handled_frames++; + + return; + + out_delete: + /* delete frame due to misconfiguration */ + gwj->deleted_frames++; + kfree_skb(nskb); + return; } static inline int cgw_register_filter(struct net *net, struct cgw_job *gwj) From 4abb6960f61ca52ff5a61c97bde10e9e7edf548e Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Tue, 4 Dec 2018 13:25:32 +0100 Subject: [PATCH 1416/2608] mmc: sdhci-msm: Disable CDR function on TX commit a89e7bcb18081c611eb6cf50edd440fa4983a71a upstream. The Clock Data Recovery (CDR) circuit allows to automatically adjust the RX sampling-point/phase for high frequency cards (SDR104, HS200...). CDR is automatically enabled during DLL configuration. However, according to the APQ8016 reference manual, this function must be disabled during TX and tuning phase in order to prevent any interferences during tuning challenges and unexpected phase alteration during TX transfers. This patch enables/disables CDR according to the current transfer mode. This fixes sporadic write transfer issues observed with some SDR104 and HS200 cards. Inspired by sdhci-msm downstream patch: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/432516/ Reported-by: Leonid Segal Reported-by: Manabu Igusa Signed-off-by: Loic Poulain Acked-by: Adrian Hunter Acked-by: Georgi Djakov Signed-off-by: Ulf Hansson [georgi: backport to v4.14] Signed-off-by: Georgi Djakov Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-msm.c | 51 +++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 92c483ec6cb2..192844b50c69 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -138,6 +138,8 @@ struct sdhci_msm_host { bool calibration_done; u8 saved_tuning_phase; bool use_cdclp533; + bool use_cdr; + u32 transfer_mode; }; static unsigned int msm_get_clock_rate_for_bus_mode(struct sdhci_host *host, @@ -815,6 +817,23 @@ out: return ret; } +static void sdhci_msm_set_cdr(struct sdhci_host *host, bool enable) +{ + u32 config, oldconfig = readl_relaxed(host->ioaddr + CORE_DLL_CONFIG); + + config = oldconfig; + if (enable) { + config |= CORE_CDR_EN; + config &= ~CORE_CDR_EXT_EN; + } else { + config &= ~CORE_CDR_EN; + config |= CORE_CDR_EXT_EN; + } + + if (config != oldconfig) + writel_relaxed(config, host->ioaddr + CORE_DLL_CONFIG); +} + static int sdhci_msm_execute_tuning(struct mmc_host *mmc, u32 opcode) { struct sdhci_host *host = mmc_priv(mmc); @@ -832,8 +851,14 @@ static int sdhci_msm_execute_tuning(struct mmc_host *mmc, u32 opcode) if (host->clock <= CORE_FREQ_100MHZ || !(ios.timing == MMC_TIMING_MMC_HS400 || ios.timing == MMC_TIMING_MMC_HS200 || - ios.timing == MMC_TIMING_UHS_SDR104)) + ios.timing == MMC_TIMING_UHS_SDR104)) { + msm_host->use_cdr = false; + sdhci_msm_set_cdr(host, false); return 0; + } + + /* Clock-Data-Recovery used to dynamically adjust RX sampling point */ + msm_host->use_cdr = true; /* * For HS400 tuning in HS200 timing requires: @@ -1092,6 +1117,29 @@ out: __sdhci_msm_set_clock(host, clock); } +static void sdhci_msm_write_w(struct sdhci_host *host, u16 val, int reg) +{ + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); + + switch (reg) { + case SDHCI_TRANSFER_MODE: + msm_host->transfer_mode = val; + break; + case SDHCI_COMMAND: + if (!msm_host->use_cdr) + break; + if ((msm_host->transfer_mode & SDHCI_TRNS_READ) && + (SDHCI_GET_CMD(val) != MMC_SEND_TUNING_BLOCK_HS200) && + (SDHCI_GET_CMD(val) != MMC_SEND_TUNING_BLOCK)) + sdhci_msm_set_cdr(host, true); + else + sdhci_msm_set_cdr(host, false); + break; + } + writew(val, host->ioaddr + reg); +} + static const struct of_device_id sdhci_msm_dt_match[] = { { .compatible = "qcom,sdhci-msm-v4" }, {}, @@ -1107,6 +1155,7 @@ static const struct sdhci_ops sdhci_msm_ops = { .set_bus_width = sdhci_set_bus_width, .set_uhs_signaling = sdhci_msm_set_uhs_signaling, .voltage_switch = sdhci_msm_voltage_switch, + .write_w = sdhci_msm_write_w, }; static const struct sdhci_pltfm_data sdhci_msm_pdata = { From 8e643473c99ef5224315f0825fb00326fb984693 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 17 Jan 2019 00:25:58 +0000 Subject: [PATCH 1417/2608] media: em28xx: Fix misplaced reset of dev->v4l::field_count The backport of commit afeaade90db4 "media: em28xx: make v4l2-compliance happier by starting sequence on zero" added a reset on em28xx_v4l2::field_count to em28xx_enable_analog_tuner() but it should be done in em28xx_start_analog_streaming(). Signed-off-by: Ben Hutchings Cc: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/em28xx/em28xx-video.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/em28xx/em28xx-video.c b/drivers/media/usb/em28xx/em28xx-video.c index 92a74bc34527..bd8de78e0ffd 100644 --- a/drivers/media/usb/em28xx/em28xx-video.c +++ b/drivers/media/usb/em28xx/em28xx-video.c @@ -900,8 +900,6 @@ static int em28xx_enable_analog_tuner(struct em28xx *dev) if (!mdev || !v4l2->decoder) return 0; - dev->v4l2->field_count = 0; - /* * This will find the tuner that is connected into the decoder. * Technically, this is not 100% correct, as the device may be @@ -1074,6 +1072,8 @@ int em28xx_start_analog_streaming(struct vb2_queue *vq, unsigned int count) em28xx_videodbg("%s\n", __func__); + dev->v4l2->field_count = 0; + /* Make sure streaming is not already in progress for this type of filehandle (e.g. video, vbi) */ rc = res_get(dev, vq->type); From d93cef31a56bcf111a92977f70df8d6a9f0bde47 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 20 Jun 2018 18:18:33 +0800 Subject: [PATCH 1418/2608] sched/fair: Fix bandwidth timer clock drift condition commit 512ac999d2755d2b7109e996a76b6fb8b888631d upstream. I noticed that cgroup task groups constantly get throttled even if they have low CPU usage, this causes some jitters on the response time to some of our business containers when enabling CPU quotas. It's very simple to reproduce: mkdir /sys/fs/cgroup/cpu/test cd /sys/fs/cgroup/cpu/test echo 100000 > cpu.cfs_quota_us echo $$ > tasks then repeat: cat cpu.stat | grep nr_throttled # nr_throttled will increase steadily After some analysis, we found that cfs_rq::runtime_remaining will be cleared by expire_cfs_rq_runtime() due to two equal but stale "cfs_{b|q}->runtime_expires" after period timer is re-armed. The current condition to judge clock drift in expire_cfs_rq_runtime() is wrong, the two runtime_expires are actually the same when clock drift happens, so this condtion can never hit. The orginal design was correctly done by this commit: a9cf55b28610 ("sched: Expire invalid runtime") ... but was changed to be the current implementation due to its locking bug. This patch introduces another way, it adds a new field in both structures cfs_rq and cfs_bandwidth to record the expiration update sequence, and uses them to figure out if clock drift happens (true if they are equal). Signed-off-by: Xunlei Pang Signed-off-by: Peter Zijlstra (Intel) [alakeshh: backport: Fixed merge conflicts: - sched.h: Fix the indentation and order in which the variables are declared to match with coding style of the existing code in 4.14 Struct members of same type were declared in separate lines in upstream patch which has been changed back to having multiple members of same type in the same line. e.g. int a; int b; -> int a, b; ] Signed-off-by: Alakesh Haloi Reviewed-by: Ben Segall Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: # 4.14.x Fixes: 51f2176d74ac ("sched/fair: Fix unlocked reads of some cfs_b->quota/period") Link: http://lkml.kernel.org/r/20180620101834.24455-1-xlpang@linux.alibaba.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 14 ++++++++------ kernel/sched/sched.h | 4 +++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6e108af21481..f33b24080b1c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4087,6 +4087,7 @@ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) now = sched_clock_cpu(smp_processor_id()); cfs_b->runtime = cfs_b->quota; cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period); + cfs_b->expires_seq++; } static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) @@ -4109,6 +4110,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) struct task_group *tg = cfs_rq->tg; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); u64 amount = 0, min_amount, expires; + int expires_seq; /* note: this is a positive sum as runtime_remaining <= 0 */ min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining; @@ -4125,6 +4127,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) cfs_b->idle = 0; } } + expires_seq = cfs_b->expires_seq; expires = cfs_b->runtime_expires; raw_spin_unlock(&cfs_b->lock); @@ -4134,8 +4137,10 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) * spread between our sched_clock and the one on which runtime was * issued. */ - if ((s64)(expires - cfs_rq->runtime_expires) > 0) + if (cfs_rq->expires_seq != expires_seq) { + cfs_rq->expires_seq = expires_seq; cfs_rq->runtime_expires = expires; + } return cfs_rq->runtime_remaining > 0; } @@ -4161,12 +4166,9 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq) * has not truly expired. * * Fortunately we can check determine whether this the case by checking - * whether the global deadline has advanced. It is valid to compare - * cfs_b->runtime_expires without any locks since we only care about - * exact equality, so a partial write will still work. + * whether the global deadline(cfs_b->expires_seq) has advanced. */ - - if (cfs_rq->runtime_expires != cfs_b->runtime_expires) { + if (cfs_rq->expires_seq == cfs_b->expires_seq) { /* extend local deadline, drift is bounded above by 2 ticks */ cfs_rq->runtime_expires += TICK_NSEC; } else { diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b3ba6e5e99f2..452b56923c6d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -281,8 +281,9 @@ struct cfs_bandwidth { u64 quota, runtime; s64 hierarchical_quota; u64 runtime_expires; + int expires_seq; - int idle, period_active; + short idle, period_active; struct hrtimer period_timer, slack_timer; struct list_head throttled_cfs_rq; @@ -488,6 +489,7 @@ struct cfs_rq { #ifdef CONFIG_CFS_BANDWIDTH int runtime_enabled; + int expires_seq; u64 runtime_expires; s64 runtime_remaining; From 1c62825e9765e01cc73171256588d9ca67216029 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 14 Jan 2019 10:01:30 -0500 Subject: [PATCH 1419/2608] Revert "scsi: target: iscsi: cxgbit: fix csk leak" This reverts commit b831528038e3cad0d745c53bcaeedb642f5cbc1f. A wrong commit message was used for the stable commit because of a human error (and duplicate commit subject lines). This patch reverts this error, and the following patches add the two upstream commits. Signed-off-by: Sasha Levin --- drivers/target/iscsi/cxgbit/cxgbit_cm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index 0c00bb27c9c5..d4fa41be80f9 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -631,11 +631,8 @@ static void cxgbit_send_halfclose(struct cxgbit_sock *csk) static void cxgbit_arp_failure_discard(void *handle, struct sk_buff *skb) { - struct cxgbit_sock *csk = handle; - pr_debug("%s cxgbit_device %p\n", __func__, handle); kfree_skb(skb); - cxgbit_put_csk(csk); } static void cxgbit_abort_arp_failure(void *handle, struct sk_buff *skb) @@ -1150,7 +1147,7 @@ cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req) rpl5->opt0 = cpu_to_be64(opt0); rpl5->opt2 = cpu_to_be32(opt2); set_wr_txq(skb, CPL_PRIORITY_SETUP, csk->ctrlq_idx); - t4_set_arp_err_handler(skb, csk, cxgbit_arp_failure_discard); + t4_set_arp_err_handler(skb, NULL, cxgbit_arp_failure_discard); cxgbit_l2t_send(csk->com.cdev, skb, csk->l2t); } From ccc67efce720211d0a3dceaf1ec7949777a1836a Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Fri, 9 Nov 2018 20:59:01 +0530 Subject: [PATCH 1420/2608] scsi: target: iscsi: cxgbit: fix csk leak [ Upstream commit ed076c55b359cc9982ca8b065bcc01675f7365f6 ] In case of arp failure call cxgbit_put_csk() to free csk. Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/iscsi/cxgbit/cxgbit_cm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index d4fa41be80f9..0c00bb27c9c5 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -631,8 +631,11 @@ static void cxgbit_send_halfclose(struct cxgbit_sock *csk) static void cxgbit_arp_failure_discard(void *handle, struct sk_buff *skb) { + struct cxgbit_sock *csk = handle; + pr_debug("%s cxgbit_device %p\n", __func__, handle); kfree_skb(skb); + cxgbit_put_csk(csk); } static void cxgbit_abort_arp_failure(void *handle, struct sk_buff *skb) @@ -1147,7 +1150,7 @@ cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req) rpl5->opt0 = cpu_to_be64(opt0); rpl5->opt2 = cpu_to_be32(opt2); set_wr_txq(skb, CPL_PRIORITY_SETUP, csk->ctrlq_idx); - t4_set_arp_err_handler(skb, NULL, cxgbit_arp_failure_discard); + t4_set_arp_err_handler(skb, csk, cxgbit_arp_failure_discard); cxgbit_l2t_send(csk->com.cdev, skb, csk->l2t); } From 4ef8d21b719391d2fd831a1fabbb33cc75bef0c1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 18 Jan 2019 17:56:32 +0000 Subject: [PATCH 1421/2608] arm64/kvm: consistently handle host HCR_EL2 flags [ Backport of upstream commit 4eaed6aa2c628101246bcabc91b203bfac1193f8 ] In KVM we define the configuration of HCR_EL2 for a VHE HOST in HCR_HOST_VHE_FLAGS, but we don't have a similar definition for the non-VHE host flags, and open-code HCR_RW. Further, in head.S we open-code the flags for VHE and non-VHE configurations. In future, we're going to want to configure more flags for the host, so lets add a HCR_HOST_NVHE_FLAGS defintion, and consistently use both HCR_HOST_VHE_FLAGS and HCR_HOST_NVHE_FLAGS in the kvm code and head.S. We now use mov_q to generate the HCR_EL2 value, as we use when configuring other registers in head.S. Reviewed-by: Marc Zyngier Reviewed-by: Richard Henderson Signed-off-by: Mark Rutland Reviewed-by: Christoffer Dall Cc: Catalin Marinas Cc: Marc Zyngier Cc: Will Deacon Cc: kvmarm@lists.cs.columbia.edu Signed-off-by: Will Deacon [kristina: backport to 4.14.y: adjust context] Signed-off-by: Kristina Martsenko Signed-off-by: Sasha Levin --- arch/arm64/include/asm/kvm_arm.h | 1 + arch/arm64/kernel/head.S | 5 ++--- arch/arm64/kvm/hyp/switch.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 73cc4309fe01..9b25a1e2d583 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -82,6 +82,7 @@ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) #define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) +#define HCR_HOST_NVHE_FLAGS (HCR_RW) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 261f3f88364c..ec393275ba04 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -414,10 +414,9 @@ CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 #endif /* Hyp configuration. */ - mov x0, #HCR_RW // 64-bit EL1 + mov_q x0, HCR_HOST_NVHE_FLAGS cbz x2, set_hcr - orr x0, x0, #HCR_TGE // Enable Host Extensions - orr x0, x0, #HCR_E2H + mov_q x0, HCR_HOST_VHE_FLAGS set_hcr: msr hcr_el2, x0 isb diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index b2f1992c6234..44845996b554 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -127,7 +127,7 @@ static void __hyp_text __deactivate_traps_nvhe(void) mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; write_sysreg(mdcr_el2, mdcr_el2); - write_sysreg(HCR_RW, hcr_el2); + write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); } From fea3f83ee00525e604dccd2673a33cefd25bc76d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 18 Jan 2019 17:56:33 +0000 Subject: [PATCH 1422/2608] arm64: Don't trap host pointer auth use to EL2 [ Backport of upstream commit b3669b1e1c09890d61109a1a8ece2c5b66804714 ] To allow EL0 (and/or EL1) to use pointer authentication functionality, we must ensure that pointer authentication instructions and accesses to pointer authentication keys are not trapped to EL2. This patch ensures that HCR_EL2 is configured appropriately when the kernel is booted at EL2. For non-VHE kernels we set HCR_EL2.{API,APK}, ensuring that EL1 can access keys and permit EL0 use of instructions. For VHE kernels host EL0 (TGE && E2H) is unaffected by these settings, and it doesn't matter how we configure HCR_EL2.{API,APK}, so we don't bother setting them. This does not enable support for KVM guests, since KVM manages HCR_EL2 itself when running VMs. Reviewed-by: Richard Henderson Signed-off-by: Mark Rutland Acked-by: Christoffer Dall Cc: Catalin Marinas Cc: Marc Zyngier Cc: Will Deacon Cc: kvmarm@lists.cs.columbia.edu Signed-off-by: Will Deacon [kristina: backport to 4.14.y: adjust context] Signed-off-by: Kristina Martsenko Signed-off-by: Sasha Levin --- arch/arm64/include/asm/kvm_arm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 9b25a1e2d583..1d6d980f80ac 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -23,6 +23,8 @@ #include /* Hyp Configuration Register (HCR) bits */ +#define HCR_API (UL(1) << 41) +#define HCR_APK (UL(1) << 40) #define HCR_E2H (UL(1) << 34) #define HCR_ID (UL(1) << 33) #define HCR_CD (UL(1) << 32) @@ -82,7 +84,7 @@ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) #define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) -#define HCR_HOST_NVHE_FLAGS (HCR_RW) +#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) /* TCR_EL2 Registers bits */ From c809028e773de4f4c5c40af3caad9e3e936ecfb9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Jan 2019 04:06:14 -0800 Subject: [PATCH 1423/2608] ipv6: fix kernel-infoleak in ipv6_local_error() [ Upstream commit 7d033c9f6a7fd3821af75620a0257db87c2b552a ] This patch makes sure the flow label in the IPv6 header forged in ipv6_local_error() is initialized. BUG: KMSAN: kernel-infoleak in _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32 CPU: 1 PID: 24675 Comm: syz-executor1 Not tainted 4.20.0-rc7+ #4 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:613 kmsan_internal_check_memory+0x455/0xb00 mm/kmsan/kmsan.c:675 kmsan_copy_to_user+0xab/0xc0 mm/kmsan/kmsan_hooks.c:601 _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32 copy_to_user include/linux/uaccess.h:177 [inline] move_addr_to_user+0x2e9/0x4f0 net/socket.c:227 ___sys_recvmsg+0x5d7/0x1140 net/socket.c:2284 __sys_recvmsg net/socket.c:2327 [inline] __do_sys_recvmsg net/socket.c:2337 [inline] __se_sys_recvmsg+0x2fa/0x450 net/socket.c:2334 __x64_sys_recvmsg+0x4a/0x70 net/socket.c:2334 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x457ec9 Code: 6d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 3b b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f8750c06c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002f RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457ec9 RDX: 0000000000002000 RSI: 0000000020000400 RDI: 0000000000000005 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f8750c076d4 R13: 00000000004c4a60 R14: 00000000004d8140 R15: 00000000ffffffff Uninit was stored to memory at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:204 [inline] kmsan_save_stack mm/kmsan/kmsan.c:219 [inline] kmsan_internal_chain_origin+0x134/0x230 mm/kmsan/kmsan.c:439 __msan_chain_origin+0x70/0xe0 mm/kmsan/kmsan_instr.c:200 ipv6_recv_error+0x1e3f/0x1eb0 net/ipv6/datagram.c:475 udpv6_recvmsg+0x398/0x2ab0 net/ipv6/udp.c:335 inet_recvmsg+0x4fb/0x600 net/ipv4/af_inet.c:830 sock_recvmsg_nosec net/socket.c:794 [inline] sock_recvmsg+0x1d1/0x230 net/socket.c:801 ___sys_recvmsg+0x4d5/0x1140 net/socket.c:2278 __sys_recvmsg net/socket.c:2327 [inline] __do_sys_recvmsg net/socket.c:2337 [inline] __se_sys_recvmsg+0x2fa/0x450 net/socket.c:2334 __x64_sys_recvmsg+0x4a/0x70 net/socket.c:2334 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:204 [inline] kmsan_internal_poison_shadow+0x92/0x150 mm/kmsan/kmsan.c:158 kmsan_kmalloc+0xa6/0x130 mm/kmsan/kmsan_hooks.c:176 kmsan_slab_alloc+0xe/0x10 mm/kmsan/kmsan_hooks.c:185 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2759 [inline] __kmalloc_node_track_caller+0xe18/0x1030 mm/slub.c:4383 __kmalloc_reserve net/core/skbuff.c:137 [inline] __alloc_skb+0x309/0xa20 net/core/skbuff.c:205 alloc_skb include/linux/skbuff.h:998 [inline] ipv6_local_error+0x1a7/0x9e0 net/ipv6/datagram.c:334 __ip6_append_data+0x129f/0x4fd0 net/ipv6/ip6_output.c:1311 ip6_make_skb+0x6cc/0xcf0 net/ipv6/ip6_output.c:1775 udpv6_sendmsg+0x3f8e/0x45d0 net/ipv6/udp.c:1384 inet_sendmsg+0x54a/0x720 net/ipv4/af_inet.c:798 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] __sys_sendto+0x8c4/0xac0 net/socket.c:1788 __do_sys_sendto net/socket.c:1800 [inline] __se_sys_sendto+0x107/0x130 net/socket.c:1796 __x64_sys_sendto+0x6e/0x90 net/socket.c:1796 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 Bytes 4-7 of 28 are uninitialized Memory access of size 28 starts at ffff8881937bfce0 Data copied to user address 0000000020000000 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/datagram.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 461825e0680f..db98d7fcbe19 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -349,6 +349,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) skb_reset_network_header(skb); iph = ipv6_hdr(skb); iph->daddr = fl6->daddr; + ip6_flow_hdr(iph, 0, 0); serr = SKB_EXT_ERR(skb); serr->ee.ee_errno = err; From b4683849b9c2b803f4dbbffe9619e56d025e8683 Mon Sep 17 00:00:00 2001 From: JianJhen Chen Date: Sun, 6 Jan 2019 11:28:13 +0800 Subject: [PATCH 1424/2608] net: bridge: fix a bug on using a neighbour cache entry without checking its state [ Upstream commit 4c84edc11b76590859b1e45dd676074c59602dc4 ] When handling DNAT'ed packets on a bridge device, the neighbour cache entry from lookup was used without checking its state. It means that a cache entry in the NUD_STALE state will be used directly instead of entering the NUD_DELAY state to confirm the reachability of the neighbor. This problem becomes worse after commit 2724680bceee ("neigh: Keep neighbour cache entries if number of them is small enough."), since all neighbour cache entries in the NUD_STALE state will be kept in the neighbour table as long as the number of cache entries does not exceed the value specified in gc_thresh1. This commit validates the state of a neighbour cache entry before using the entry. Signed-off-by: JianJhen Chen Reviewed-by: JinLin Chen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_netfilter_hooks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 7582f28ab306..3f3859b8d49f 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -275,7 +275,7 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); int ret; - if (neigh->hh.hh_len) { + if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) { neigh_hh_bridge(&neigh->hh, skb); skb->dev = nf_bridge->physindev; ret = br_handle_frame_finish(net, sk, skb); From 6740236de302817818c319567dea9ecd3cb1454c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 8 Jan 2019 23:27:06 +0000 Subject: [PATCH 1425/2608] packet: Do not leak dev refcounts on error exit [ Upstream commit d972f3dce8d161e2142da0ab1ef25df00e2f21a9 ] 'dev' is non NULL when the addr_len check triggers so it must goto a label that does the dev_put otherwise dev will have a leaked refcount. This bug causes the ib_ipoib module to become unloadable when using systemd-network as it triggers this check on InfiniBand links. Fixes: 99137b7888f4 ("packet: validate address length") Reported-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 91a323f99d47..44a093c75567 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2666,7 +2666,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out; + goto out_put; } err = -ENXIO; @@ -2866,7 +2866,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out; + goto out_unlock; } err = -ENXIO; From 0bab999063b9bb3f6c9addc52f54c7688f2a51b6 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 8 Jan 2019 12:32:42 -0500 Subject: [PATCH 1426/2608] bonding: update nest level on unlink [ Upstream commit 001e465f09a18857443489a57e74314a3368c805 ] A network device stack with multiple layers of bonding devices can trigger a false positive lockdep warning. Adding lockdep nest levels fixes this. Update the level on both enslave and unlink, to avoid the following series of events .. ip netns add test ip netns exec test bash ip link set dev lo addr 00:11:22:33:44:55 ip link set dev lo down ip link add dev bond1 type bond ip link add dev bond2 type bond ip link set dev lo master bond1 ip link set dev bond1 master bond2 ip link set dev bond1 nomaster ip link set dev bond2 master bond1 .. from still generating a splat: [ 193.652127] ====================================================== [ 193.658231] WARNING: possible circular locking dependency detected [ 193.664350] 4.20.0 #8 Not tainted [ 193.668310] ------------------------------------------------------ [ 193.674417] ip/15577 is trying to acquire lock: [ 193.678897] 00000000a40e3b69 (&(&bond->stats_lock)->rlock#3/3){+.+.}, at: bond_get_stats+0x58/0x290 [ 193.687851] but task is already holding lock: [ 193.693625] 00000000807b9d9f (&(&bond->stats_lock)->rlock#2/2){+.+.}, at: bond_get_stats+0x58/0x290 [..] [ 193.851092] lock_acquire+0xa7/0x190 [ 193.855138] _raw_spin_lock_nested+0x2d/0x40 [ 193.859878] bond_get_stats+0x58/0x290 [ 193.864093] dev_get_stats+0x5a/0xc0 [ 193.868140] bond_get_stats+0x105/0x290 [ 193.872444] dev_get_stats+0x5a/0xc0 [ 193.876493] rtnl_fill_stats+0x40/0x130 [ 193.880797] rtnl_fill_ifinfo+0x6c5/0xdc0 [ 193.885271] rtmsg_ifinfo_build_skb+0x86/0xe0 [ 193.890091] rtnetlink_event+0x5b/0xa0 [ 193.894320] raw_notifier_call_chain+0x43/0x60 [ 193.899225] netdev_change_features+0x50/0xa0 [ 193.904044] bond_compute_features.isra.46+0x1ab/0x270 [ 193.909640] bond_enslave+0x141d/0x15b0 [ 193.913946] do_set_master+0x89/0xa0 [ 193.918016] do_setlink+0x37c/0xda0 [ 193.921980] __rtnl_newlink+0x499/0x890 [ 193.926281] rtnl_newlink+0x48/0x70 [ 193.930238] rtnetlink_rcv_msg+0x171/0x4b0 [ 193.934801] netlink_rcv_skb+0xd1/0x110 [ 193.939103] rtnetlink_rcv+0x15/0x20 [ 193.943151] netlink_unicast+0x3b5/0x520 [ 193.947544] netlink_sendmsg+0x2fd/0x3f0 [ 193.951942] sock_sendmsg+0x38/0x50 [ 193.955899] ___sys_sendmsg+0x2ba/0x2d0 [ 193.960205] __x64_sys_sendmsg+0xad/0x100 [ 193.964687] do_syscall_64+0x5a/0x460 [ 193.968823] entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 7e2556e40026 ("bonding: avoid lockdep confusion in bond_get_stats()") Reported-by: syzbot Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index cf64a365362b..65c5a65af0ba 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1928,6 +1928,9 @@ static int __bond_release_one(struct net_device *bond_dev, if (!bond_has_slaves(bond)) { bond_set_carrier(bond); eth_hw_addr_random(bond_dev); + bond->nest_level = SINGLE_DEPTH_NESTING; + } else { + bond->nest_level = dev_get_nest_level(bond_dev) + 1; } unblock_netpoll_tx(); From 75664d8037efe48e968bda85e690341d12135374 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 7 Jan 2019 16:47:33 -0500 Subject: [PATCH 1427/2608] ip: on queued skb use skb_header_pointer instead of pskb_may_pull [ Upstream commit 4a06fa67c4da20148803525151845276cdb995c1 ] Commit 2efd4fca703a ("ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull") avoided a read beyond the end of the skb linear segment by calling pskb_may_pull. That function can trigger a BUG_ON in pskb_expand_head if the skb is shared, which it is when when peeking. It can also return ENOMEM. Avoid both by switching to safer skb_header_pointer. Fixes: 2efd4fca703a ("ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull") Reported-by: syzbot Suggested-by: Eric Dumazet Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_sockglue.c | 12 +++++------- net/ipv6/datagram.c | 10 ++++------ 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 4ef92ebc4f6d..d1081eac3b49 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -146,19 +146,17 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { + __be16 _ports[2], *ports; struct sockaddr_in sin; - __be16 *ports; - int end; - - end = skb_transport_offset(skb) + 4; - if (end > 0 && !pskb_may_pull(skb, end)) - return; /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ - ports = (__be16 *)skb_transport_header(skb); + ports = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_ports), &_ports); + if (!ports) + return; sin.sin_family = AF_INET; sin.sin_addr.s_addr = ip_hdr(skb)->daddr; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index db98d7fcbe19..1ee3e0d2b587 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -709,17 +709,15 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, } if (np->rxopt.bits.rxorigdstaddr) { struct sockaddr_in6 sin6; - __be16 *ports; - int end; + __be16 _ports[2], *ports; - end = skb_transport_offset(skb) + 4; - if (end <= 0 || pskb_may_pull(skb, end)) { + ports = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_ports), &_ports); + if (ports) { /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ - ports = (__be16 *)skb_transport_header(skb); - sin6.sin6_family = AF_INET6; sin6.sin6_addr = ipv6_hdr(skb)->daddr; sin6.sin6_port = ports[1]; From 3466b8be782a8e55869e782547fc24893c787926 Mon Sep 17 00:00:00 2001 From: Aymen Sghaier Date: Wed, 19 Dec 2018 16:36:44 +0200 Subject: [PATCH 1428/2608] crypto: caam - fix zero-length buffer DMA mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 04e6d25c5bb244c1a37eb9fe0b604cc11a04e8c5 upstream. Recent changes - probably DMA API related (generic and/or arm64-specific) - exposed a case where driver maps a zero-length buffer: ahash_init()->ahash_update()->ahash_final() with a zero-length string to hash kernel BUG at kernel/dma/swiotlb.c:475! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in: CPU: 2 PID: 1823 Comm: cryptomgr_test Not tainted 4.20.0-rc1-00108-g00c9fe37a7f2 #1 Hardware name: LS1046A RDB Board (DT) pstate: 80000005 (Nzcv daif -PAN -UAO) pc : swiotlb_tbl_map_single+0x170/0x2b8 lr : swiotlb_map_page+0x134/0x1f8 sp : ffff00000f79b8f0 x29: ffff00000f79b8f0 x28: 0000000000000000 x27: ffff0000093d0000 x26: 0000000000000000 x25: 00000000001f3ffe x24: 0000000000200000 x23: 0000000000000000 x22: 00000009f2c538c0 x21: ffff800970aeb410 x20: 0000000000000001 x19: ffff800970aeb410 x18: 0000000000000007 x17: 000000000000000e x16: 0000000000000001 x15: 0000000000000019 x14: c32cb8218a167fe8 x13: ffffffff00000000 x12: ffff80097fdae348 x11: 0000800976bca000 x10: 0000000000000010 x9 : 0000000000000000 x8 : ffff0000091fd6c8 x7 : 0000000000000000 x6 : 00000009f2c538bf x5 : 0000000000000000 x4 : 0000000000000001 x3 : 0000000000000000 x2 : 00000009f2c538c0 x1 : 00000000f9fff000 x0 : 0000000000000000 Process cryptomgr_test (pid: 1823, stack limit = 0x(____ptrval____)) Call trace: swiotlb_tbl_map_single+0x170/0x2b8 swiotlb_map_page+0x134/0x1f8 ahash_final_no_ctx+0xc4/0x6cc ahash_final+0x10/0x18 crypto_ahash_op+0x30/0x84 crypto_ahash_final+0x14/0x1c __test_hash+0x574/0xe0c test_hash+0x28/0x80 __alg_test_hash+0x84/0xd0 alg_test_hash+0x78/0x144 alg_test.part.30+0x12c/0x2b4 alg_test+0x3c/0x68 cryptomgr_test+0x44/0x4c kthread+0xfc/0x128 ret_from_fork+0x10/0x18 Code: d34bfc18 2a1a03f7 1a9f8694 35fff89a (d4210000) Cc: Signed-off-by: Aymen Sghaier Signed-off-by: Horia Geantă Reviewed-by: Christoph Hellwig Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/caamhash.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 698580b60b2f..8fa35bc75870 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -1109,13 +1109,16 @@ static int ahash_final_no_ctx(struct ahash_request *req) desc = edesc->hw_desc; - state->buf_dma = dma_map_single(jrdev, buf, buflen, DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, state->buf_dma)) { - dev_err(jrdev, "unable to map src\n"); - goto unmap; - } + if (buflen) { + state->buf_dma = dma_map_single(jrdev, buf, buflen, + DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, state->buf_dma)) { + dev_err(jrdev, "unable to map src\n"); + goto unmap; + } - append_seq_in_ptr(desc, state->buf_dma, buflen, 0); + append_seq_in_ptr(desc, state->buf_dma, buflen, 0); + } edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result, digestsize); From d196d2fdc0e8a0f1db9a64d0e691f7e2cd756e28 Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Thu, 3 Jan 2019 14:21:05 +0530 Subject: [PATCH 1429/2608] crypto: authencesn - Avoid twice completion call in decrypt path commit a7773363624b034ab198c738661253d20a8055c2 upstream. Authencesn template in decrypt path unconditionally calls aead_request_complete after ahash_verify which leads to following kernel panic in after decryption. [ 338.539800] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 [ 338.548372] PGD 0 P4D 0 [ 338.551157] Oops: 0000 [#1] SMP PTI [ 338.554919] CPU: 0 PID: 0 Comm: swapper/0 Kdump: loaded Tainted: G W I 4.19.7+ #13 [ 338.564431] Hardware name: Supermicro X8ST3/X8ST3, BIOS 2.0 07/29/10 [ 338.572212] RIP: 0010:esp_input_done2+0x350/0x410 [esp4] [ 338.578030] Code: ff 0f b6 68 10 48 8b 83 c8 00 00 00 e9 8e fe ff ff 8b 04 25 04 00 00 00 83 e8 01 48 98 48 8b 3c c5 10 00 00 00 e9 f7 fd ff ff <8b> 04 25 04 00 00 00 83 e8 01 48 98 4c 8b 24 c5 10 00 00 00 e9 3b [ 338.598547] RSP: 0018:ffff911c97803c00 EFLAGS: 00010246 [ 338.604268] RAX: 0000000000000002 RBX: ffff911c4469ee00 RCX: 0000000000000000 [ 338.612090] RDX: 0000000000000000 RSI: 0000000000000130 RDI: ffff911b87c20400 [ 338.619874] RBP: 0000000000000000 R08: ffff911b87c20498 R09: 000000000000000a [ 338.627610] R10: 0000000000000001 R11: 0000000000000004 R12: 0000000000000000 [ 338.635402] R13: ffff911c89590000 R14: ffff911c91730000 R15: 0000000000000000 [ 338.643234] FS: 0000000000000000(0000) GS:ffff911c97800000(0000) knlGS:0000000000000000 [ 338.652047] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 338.658299] CR2: 0000000000000004 CR3: 00000001ec20a000 CR4: 00000000000006f0 [ 338.666382] Call Trace: [ 338.669051] [ 338.671254] esp_input_done+0x12/0x20 [esp4] [ 338.675922] chcr_handle_resp+0x3b5/0x790 [chcr] [ 338.680949] cpl_fw6_pld_handler+0x37/0x60 [chcr] [ 338.686080] chcr_uld_rx_handler+0x22/0x50 [chcr] [ 338.691233] uldrx_handler+0x8c/0xc0 [cxgb4] [ 338.695923] process_responses+0x2f0/0x5d0 [cxgb4] [ 338.701177] ? bitmap_find_next_zero_area_off+0x3a/0x90 [ 338.706882] ? matrix_alloc_area.constprop.7+0x60/0x90 [ 338.712517] ? apic_update_irq_cfg+0x82/0xf0 [ 338.717177] napi_rx_handler+0x14/0xe0 [cxgb4] [ 338.722015] net_rx_action+0x2aa/0x3e0 [ 338.726136] __do_softirq+0xcb/0x280 [ 338.730054] irq_exit+0xde/0xf0 [ 338.733504] do_IRQ+0x54/0xd0 [ 338.736745] common_interrupt+0xf/0xf Fixes: 104880a6b470 ("crypto: authencesn - Convert to new AEAD...") Signed-off-by: Harsh Jain Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/authencesn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 6de852ce4cf8..4ba4470deee1 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -279,7 +279,7 @@ static void authenc_esn_verify_ahash_done(struct crypto_async_request *areq, struct aead_request *req = areq->data; err = err ?: crypto_authenc_esn_decrypt_tail(req, 0); - aead_request_complete(req, err); + authenc_esn_request_complete(req, err); } static int crypto_authenc_esn_decrypt(struct aead_request *req) From 7c5f00e8984f8921d3a20afd04fcd0f24096b2bf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 16 Dec 2018 23:23:23 -0800 Subject: [PATCH 1430/2608] crypto: bcm - convert to use crypto_authenc_extractkeys() commit ab57b33525c3221afaebd391458fa0cbcd56903d upstream. Convert the bcm crypto driver to use crypto_authenc_extractkeys() so that it picks up the fix for broken validation of rtattr::rta_len. This also fixes the DES weak key check to actually be done on the right key. (It was checking the authentication key, not the encryption key...) Fixes: 9d12ba86f818 ("crypto: brcm - Add Broadcom SPU driver") Cc: # v4.11+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/Kconfig | 1 + drivers/crypto/bcm/cipher.c | 44 +++++++++++-------------------------- 2 files changed, 14 insertions(+), 31 deletions(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 143f8bc403b9..342bc777841c 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -679,6 +679,7 @@ config CRYPTO_DEV_BCM_SPU depends on ARCH_BCM_IPROC depends on MAILBOX default m + select CRYPTO_AUTHENC select CRYPTO_DES select CRYPTO_MD5 select CRYPTO_SHA1 diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c index ee52c355bee0..b6be383a51a6 100644 --- a/drivers/crypto/bcm/cipher.c +++ b/drivers/crypto/bcm/cipher.c @@ -2846,44 +2846,28 @@ static int aead_authenc_setkey(struct crypto_aead *cipher, struct spu_hw *spu = &iproc_priv.spu; struct iproc_ctx_s *ctx = crypto_aead_ctx(cipher); struct crypto_tfm *tfm = crypto_aead_tfm(cipher); - struct rtattr *rta = (void *)key; - struct crypto_authenc_key_param *param; - const u8 *origkey = key; - const unsigned int origkeylen = keylen; - - int ret = 0; + struct crypto_authenc_keys keys; + int ret; flow_log("%s() aead:%p key:%p keylen:%u\n", __func__, cipher, key, keylen); flow_dump(" key: ", key, keylen); - if (!RTA_OK(rta, keylen)) - goto badkey; - if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) - goto badkey; - if (RTA_PAYLOAD(rta) < sizeof(*param)) + ret = crypto_authenc_extractkeys(&keys, key, keylen); + if (ret) goto badkey; - param = RTA_DATA(rta); - ctx->enckeylen = be32_to_cpu(param->enckeylen); - - key += RTA_ALIGN(rta->rta_len); - keylen -= RTA_ALIGN(rta->rta_len); - - if (keylen < ctx->enckeylen) - goto badkey; - if (ctx->enckeylen > MAX_KEY_SIZE) + if (keys.enckeylen > MAX_KEY_SIZE || + keys.authkeylen > MAX_KEY_SIZE) goto badkey; - ctx->authkeylen = keylen - ctx->enckeylen; + ctx->enckeylen = keys.enckeylen; + ctx->authkeylen = keys.authkeylen; - if (ctx->authkeylen > MAX_KEY_SIZE) - goto badkey; - - memcpy(ctx->enckey, key + ctx->authkeylen, ctx->enckeylen); + memcpy(ctx->enckey, keys.enckey, keys.enckeylen); /* May end up padding auth key. So make sure it's zeroed. */ memset(ctx->authkey, 0, sizeof(ctx->authkey)); - memcpy(ctx->authkey, key, ctx->authkeylen); + memcpy(ctx->authkey, keys.authkey, keys.authkeylen); switch (ctx->alg->cipher_info.alg) { case CIPHER_ALG_DES: @@ -2891,7 +2875,7 @@ static int aead_authenc_setkey(struct crypto_aead *cipher, u32 tmp[DES_EXPKEY_WORDS]; u32 flags = CRYPTO_TFM_RES_WEAK_KEY; - if (des_ekey(tmp, key) == 0) { + if (des_ekey(tmp, keys.enckey) == 0) { if (crypto_aead_get_flags(cipher) & CRYPTO_TFM_REQ_WEAK_KEY) { crypto_aead_set_flags(cipher, flags); @@ -2906,7 +2890,7 @@ static int aead_authenc_setkey(struct crypto_aead *cipher, break; case CIPHER_ALG_3DES: if (ctx->enckeylen == (DES_KEY_SIZE * 3)) { - const u32 *K = (const u32 *)key; + const u32 *K = (const u32 *)keys.enckey; u32 flags = CRYPTO_TFM_RES_BAD_KEY_SCHED; if (!((K[0] ^ K[2]) | (K[1] ^ K[3])) || @@ -2957,9 +2941,7 @@ static int aead_authenc_setkey(struct crypto_aead *cipher, ctx->fallback_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; ctx->fallback_cipher->base.crt_flags |= tfm->crt_flags & CRYPTO_TFM_REQ_MASK; - ret = - crypto_aead_setkey(ctx->fallback_cipher, origkey, - origkeylen); + ret = crypto_aead_setkey(ctx->fallback_cipher, key, keylen); if (ret) { flow_log(" fallback setkey() returned:%d\n", ret); tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; From b9119fd2749c1459416ebb559cf7c1d379786cff Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 16 Dec 2018 23:23:22 -0800 Subject: [PATCH 1431/2608] crypto: authenc - fix parsing key with misaligned rta_len commit 8f9c469348487844328e162db57112f7d347c49f upstream. Keys for "authenc" AEADs are formatted as an rtattr containing a 4-byte 'enckeylen', followed by an authentication key and an encryption key. crypto_authenc_extractkeys() parses the key to find the inner keys. However, it fails to consider the case where the rtattr's payload is longer than 4 bytes but not 4-byte aligned, and where the key ends before the next 4-byte aligned boundary. In this case, 'keylen -= RTA_ALIGN(rta->rta_len);' underflows to a value near UINT_MAX. This causes a buffer overread and crash during crypto_ahash_setkey(). Fix it by restricting the rtattr payload to the expected size. Reproducer using AF_ALG: #include #include #include int main() { int fd; struct sockaddr_alg addr = { .salg_type = "aead", .salg_name = "authenc(hmac(sha256),cbc(aes))", }; struct { struct rtattr attr; __be32 enckeylen; char keys[1]; } __attribute__((packed)) key = { .attr.rta_len = sizeof(key), .attr.rta_type = 1 /* CRYPTO_AUTHENC_KEYA_PARAM */, }; fd = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(fd, (void *)&addr, sizeof(addr)); setsockopt(fd, SOL_ALG, ALG_SET_KEY, &key, sizeof(key)); } It caused: BUG: unable to handle kernel paging request at ffff88007ffdc000 PGD 2e01067 P4D 2e01067 PUD 2e04067 PMD 2e05067 PTE 0 Oops: 0000 [#1] SMP CPU: 0 PID: 883 Comm: authenc Not tainted 4.20.0-rc1-00108-g00c9fe37a7f27 #13 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-20181126_142135-anatol 04/01/2014 RIP: 0010:sha256_ni_transform+0xb3/0x330 arch/x86/crypto/sha256_ni_asm.S:155 [...] Call Trace: sha256_ni_finup+0x10/0x20 arch/x86/crypto/sha256_ssse3_glue.c:321 crypto_shash_finup+0x1a/0x30 crypto/shash.c:178 shash_digest_unaligned+0x45/0x60 crypto/shash.c:186 crypto_shash_digest+0x24/0x40 crypto/shash.c:202 hmac_setkey+0x135/0x1e0 crypto/hmac.c:66 crypto_shash_setkey+0x2b/0xb0 crypto/shash.c:66 shash_async_setkey+0x10/0x20 crypto/shash.c:223 crypto_ahash_setkey+0x2d/0xa0 crypto/ahash.c:202 crypto_authenc_setkey+0x68/0x100 crypto/authenc.c:96 crypto_aead_setkey+0x2a/0xc0 crypto/aead.c:62 aead_setkey+0xc/0x10 crypto/algif_aead.c:526 alg_setkey crypto/af_alg.c:223 [inline] alg_setsockopt+0xfe/0x130 crypto/af_alg.c:256 __sys_setsockopt+0x6d/0xd0 net/socket.c:1902 __do_sys_setsockopt net/socket.c:1913 [inline] __se_sys_setsockopt net/socket.c:1910 [inline] __x64_sys_setsockopt+0x1f/0x30 net/socket.c:1910 do_syscall_64+0x4a/0x180 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: e236d4a89a2f ("[CRYPTO] authenc: Move enckeylen into key itself") Cc: # v2.6.25+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/authenc.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/crypto/authenc.c b/crypto/authenc.c index 0db344d5a01a..053287dfad65 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -58,14 +58,22 @@ int crypto_authenc_extractkeys(struct crypto_authenc_keys *keys, const u8 *key, return -EINVAL; if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) return -EINVAL; - if (RTA_PAYLOAD(rta) < sizeof(*param)) + + /* + * RTA_OK() didn't align the rtattr's payload when validating that it + * fits in the buffer. Yet, the keys should start on the next 4-byte + * aligned boundary. To avoid confusion, require that the rtattr + * payload be exactly the param struct, which has a 4-byte aligned size. + */ + if (RTA_PAYLOAD(rta) != sizeof(*param)) return -EINVAL; + BUILD_BUG_ON(sizeof(*param) % RTA_ALIGNTO); param = RTA_DATA(rta); keys->enckeylen = be32_to_cpu(param->enckeylen); - key += RTA_ALIGN(rta->rta_len); - keylen -= RTA_ALIGN(rta->rta_len); + key += rta->rta_len; + keylen -= rta->rta_len; if (keylen < keys->enckeylen) return -EINVAL; From 0400be165676222b825e064f973d7397fee943b5 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 9 Jan 2019 15:02:23 +0100 Subject: [PATCH 1432/2608] Revert "btrfs: balance dirty metadata pages in btrfs_finish_ordered_io" commit 77b7aad195099e7c6da11e94b7fa6ef5e6fb0025 upstream. This reverts commit e73e81b6d0114d4a303205a952ab2e87c44bd279. This patch causes a few problems: - adds latency to btrfs_finish_ordered_io - as btrfs_finish_ordered_io is used for free space cache, generating more work from btrfs_btree_balance_dirty_nodelay could end up in the same workque, effectively deadlocking 12260 kworker/u96:16+btrfs-freespace-write D [<0>] balance_dirty_pages+0x6e6/0x7ad [<0>] balance_dirty_pages_ratelimited+0x6bb/0xa90 [<0>] btrfs_finish_ordered_io+0x3da/0x770 [<0>] normal_work_helper+0x1c5/0x5a0 [<0>] process_one_work+0x1ee/0x5a0 [<0>] worker_thread+0x46/0x3d0 [<0>] kthread+0xf5/0x130 [<0>] ret_from_fork+0x24/0x30 [<0>] 0xffffffffffffffff Transaction commit will wait on the freespace cache: 838 btrfs-transacti D [<0>] btrfs_start_ordered_extent+0x154/0x1e0 [<0>] btrfs_wait_ordered_range+0xbd/0x110 [<0>] __btrfs_wait_cache_io+0x49/0x1a0 [<0>] btrfs_write_dirty_block_groups+0x10b/0x3b0 [<0>] commit_cowonly_roots+0x215/0x2b0 [<0>] btrfs_commit_transaction+0x37e/0x910 [<0>] transaction_kthread+0x14d/0x180 [<0>] kthread+0xf5/0x130 [<0>] ret_from_fork+0x24/0x30 [<0>] 0xffffffffffffffff And then writepages ends up waiting on transaction commit: 9520 kworker/u96:13+flush-btrfs-1 D [<0>] wait_current_trans+0xac/0xe0 [<0>] start_transaction+0x21b/0x4b0 [<0>] cow_file_range_inline+0x10b/0x6b0 [<0>] cow_file_range.isra.69+0x329/0x4a0 [<0>] run_delalloc_range+0x105/0x3c0 [<0>] writepage_delalloc+0x119/0x180 [<0>] __extent_writepage+0x10c/0x390 [<0>] extent_write_cache_pages+0x26f/0x3d0 [<0>] extent_writepages+0x4f/0x80 [<0>] do_writepages+0x17/0x60 [<0>] __writeback_single_inode+0x59/0x690 [<0>] writeback_sb_inodes+0x291/0x4e0 [<0>] __writeback_inodes_wb+0x87/0xb0 [<0>] wb_writeback+0x3bb/0x500 [<0>] wb_workfn+0x40d/0x610 [<0>] process_one_work+0x1ee/0x5a0 [<0>] worker_thread+0x1e0/0x3d0 [<0>] kthread+0xf5/0x130 [<0>] ret_from_fork+0x24/0x30 [<0>] 0xffffffffffffffff Eventually, we have every process in the system waiting on balance_dirty_pages(), and nobody is able to make progress on page writeback. The original patch tried to fix an OOM condition, that happened on 4.4 but no success reproducing that on later kernels (4.19 and 4.20). This is more likely a problem in OOM itself. Link: https://lore.kernel.org/linux-btrfs/20180528054821.9092-1-ethanlien@synology.com/ Reported-by: Chris Mason CC: stable@vger.kernel.org # 4.18+ CC: ethanlien Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 09829e8d759e..909f7ea92e0b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3170,9 +3170,6 @@ out: /* once for the tree */ btrfs_put_ordered_extent(ordered_extent); - /* Try to release some metadata so we don't get an OOM but don't wait */ - btrfs_btree_balance_dirty_nodelay(fs_info); - return ret; } From f97fd2926eed63bd5141261e00f027b2ba3b6661 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 21 Nov 2018 14:05:45 -0500 Subject: [PATCH 1433/2608] btrfs: wait on ordered extents on abort cleanup commit 74d5d229b1bf60f93bff244b2dfc0eb21ec32a07 upstream. If we flip read-only before we initiate writeback on all dirty pages for ordered extents we've created then we'll have ordered extents left over on umount, which results in all sorts of bad things happening. Fix this by making sure we wait on ordered extents if we have to do the aborted transaction cleanup stuff. generic/475 can produce this warning: [ 8531.177332] WARNING: CPU: 2 PID: 11997 at fs/btrfs/disk-io.c:3856 btrfs_free_fs_root+0x95/0xa0 [btrfs] [ 8531.183282] CPU: 2 PID: 11997 Comm: umount Tainted: G W 5.0.0-rc1-default+ #394 [ 8531.185164] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),BIOS rel-1.11.2-0-gf9626cc-prebuilt.qemu-project.org 04/01/2014 [ 8531.187851] RIP: 0010:btrfs_free_fs_root+0x95/0xa0 [btrfs] [ 8531.193082] RSP: 0018:ffffb1ab86163d98 EFLAGS: 00010286 [ 8531.194198] RAX: ffff9f3449494d18 RBX: ffff9f34a2695000 RCX:0000000000000000 [ 8531.195629] RDX: 0000000000000002 RSI: 0000000000000001 RDI:0000000000000000 [ 8531.197315] RBP: ffff9f344e930000 R08: 0000000000000001 R09:0000000000000000 [ 8531.199095] R10: 0000000000000000 R11: ffff9f34494d4ff8 R12:ffffb1ab86163dc0 [ 8531.200870] R13: ffff9f344e9300b0 R14: ffffb1ab86163db8 R15:0000000000000000 [ 8531.202707] FS: 00007fc68e949fc0(0000) GS:ffff9f34bd800000(0000)knlGS:0000000000000000 [ 8531.204851] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 8531.205942] CR2: 00007ffde8114dd8 CR3: 000000002dfbd000 CR4:00000000000006e0 [ 8531.207516] Call Trace: [ 8531.208175] btrfs_free_fs_roots+0xdb/0x170 [btrfs] [ 8531.210209] ? wait_for_completion+0x5b/0x190 [ 8531.211303] close_ctree+0x157/0x350 [btrfs] [ 8531.212412] generic_shutdown_super+0x64/0x100 [ 8531.213485] kill_anon_super+0x14/0x30 [ 8531.214430] btrfs_kill_super+0x12/0xa0 [btrfs] [ 8531.215539] deactivate_locked_super+0x29/0x60 [ 8531.216633] cleanup_mnt+0x3b/0x70 [ 8531.217497] task_work_run+0x98/0xc0 [ 8531.218397] exit_to_usermode_loop+0x83/0x90 [ 8531.219324] do_syscall_64+0x15b/0x180 [ 8531.220192] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 8531.221286] RIP: 0033:0x7fc68e5e4d07 [ 8531.225621] RSP: 002b:00007ffde8116608 EFLAGS: 00000246 ORIG_RAX:00000000000000a6 [ 8531.227512] RAX: 0000000000000000 RBX: 00005580c2175970 RCX:00007fc68e5e4d07 [ 8531.229098] RDX: 0000000000000001 RSI: 0000000000000000 RDI:00005580c2175b80 [ 8531.230730] RBP: 0000000000000000 R08: 00005580c2175ba0 R09:00007ffde8114e80 [ 8531.232269] R10: 0000000000000000 R11: 0000000000000246 R12:00005580c2175b80 [ 8531.233839] R13: 00007fc68eac61c4 R14: 00005580c2175a68 R15:0000000000000000 Leaving a tree in the rb-tree: 3853 void btrfs_free_fs_root(struct btrfs_root *root) 3854 { 3855 iput(root->ino_cache_inode); 3856 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); CC: stable@vger.kernel.org Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik [ add stacktrace ] Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 858d5812eb8f..e0bdc0c902e4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4115,6 +4115,14 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) spin_lock(&fs_info->ordered_root_lock); } spin_unlock(&fs_info->ordered_root_lock); + + /* + * We need this here because if we've been flipped read-only we won't + * get sync() from the umount, so we need to make sure any ordered + * extents that haven't had their dirty pages IO start writeout yet + * actually get run and error out properly. + */ + btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); } static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, From 41c13bfcc4cdeac537c060a5156a688c69d9c6a5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Jan 2019 10:31:09 -0800 Subject: [PATCH 1434/2608] Yama: Check for pid death before checking ancestry commit 9474f4e7cd71a633fa1ef93b7daefd44bbdfd482 upstream. It's possible that a pid has died before we take the rcu lock, in which case we can't walk the ancestry list as it may be detached. Instead, check for death first before doing the walk. Reported-by: syzbot+a9ac39bf55329e206219@syzkaller.appspotmail.com Fixes: 2d514487faf1 ("security: Yama LSM") Cc: stable@vger.kernel.org Suggested-by: Oleg Nesterov Signed-off-by: Kees Cook Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- security/yama/yama_lsm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index 8298e094f4f7..7d5541c6a225 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -373,7 +373,9 @@ static int yama_ptrace_access_check(struct task_struct *child, break; case YAMA_SCOPE_RELATIONAL: rcu_read_lock(); - if (!task_is_descendant(current, child) && + if (!pid_alive(child)) + rc = -EPERM; + if (!rc && !task_is_descendant(current, child) && !ptracer_exception_found(current, child) && !ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE)) rc = -EPERM; From caae28b3ae154a5380ff647c80ec5d9818ea61f0 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Thu, 3 Jan 2019 22:08:05 +0800 Subject: [PATCH 1435/2608] scsi: core: Synchronize request queue PM status only on successful resume commit 3f7e62bba0003f9c68f599f5997c4647ef5b4f4e upstream. The commit 356fd2663cff ("scsi: Set request queue runtime PM status back to active on resume") fixed up the inconsistent RPM status between request queue and device. However changing request queue RPM status shall be done only on successful resume, otherwise status may be still inconsistent as below, Request queue: RPM_ACTIVE Device: RPM_SUSPENDED This ends up soft lockup because requests can be submitted to underlying devices but those devices and their required resource are not resumed. For example, After above inconsistent status happens, IO request can be submitted to UFS device driver but required resource (like clock) is not resumed yet thus lead to warning as below call stack, WARN_ON(hba->clk_gating.state != CLKS_ON); ufshcd_queuecommand scsi_dispatch_cmd scsi_request_fn __blk_run_queue cfq_insert_request __elv_add_request blk_flush_plug_list blk_finish_plug jbd2_journal_commit_transaction kjournald2 We may see all behind IO requests hang because of no response from storage host or device and then soft lockup happens in system. In the end, system may crash in many ways. Fixes: 356fd2663cff (scsi: Set request queue runtime PM status back to active on resume) Cc: stable@vger.kernel.org Signed-off-by: Stanley Chu Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_pm.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c index b44c1bb687a2..ebc193f7f7dd 100644 --- a/drivers/scsi/scsi_pm.c +++ b/drivers/scsi/scsi_pm.c @@ -79,8 +79,22 @@ static int scsi_dev_type_resume(struct device *dev, if (err == 0) { pm_runtime_disable(dev); - pm_runtime_set_active(dev); + err = pm_runtime_set_active(dev); pm_runtime_enable(dev); + + /* + * Forcibly set runtime PM status of request queue to "active" + * to make sure we can again get requests from the queue + * (see also blk_pm_peek_request()). + * + * The resume hook will correct runtime PM status of the disk. + */ + if (!err && scsi_is_sdev_device(dev)) { + struct scsi_device *sdev = to_scsi_device(dev); + + if (sdev->request_queue->dev) + blk_set_runtime_active(sdev->request_queue); + } } return err; @@ -139,16 +153,6 @@ static int scsi_bus_resume_common(struct device *dev, else fn = NULL; - /* - * Forcibly set runtime PM status of request queue to "active" to - * make sure we can again get requests from the queue (see also - * blk_pm_peek_request()). - * - * The resume hook will correct runtime PM status of the disk. - */ - if (scsi_is_sdev_device(dev) && pm_runtime_suspended(dev)) - blk_set_runtime_active(to_scsi_device(dev)->request_queue); - if (fn) { async_schedule_domain(fn, dev, &scsi_sd_pm_domain); From 24c99a924db92c46cb317856e7e84c61ff02c8cb Mon Sep 17 00:00:00 2001 From: Ivan Mironov Date: Sun, 23 Dec 2018 12:41:58 +0500 Subject: [PATCH 1436/2608] scsi: sd: Fix cache_type_store() commit 44759979a49bfd2d20d789add7fa81a21eb1a4ab upstream. Changing of caching mode via /sys/devices/.../scsi_disk/.../cache_type may fail if device responds to MODE SENSE command with DPOFUA flag set, and then checks this flag to be not set on MODE SELECT command. In this scenario, when trying to change cache_type, write always fails: # echo "none" >cache_type bash: echo: write error: Invalid argument And following appears in dmesg: [13007.865745] sd 1:0:1:0: [sda] Sense Key : Illegal Request [current] [13007.865753] sd 1:0:1:0: [sda] Add. Sense: Invalid field in parameter list From SBC-4 r15, 6.5.1 "Mode pages overview", description of DEVICE-SPECIFIC PARAMETER field in the mode parameter header: ... The write protect (WP) bit for mode data sent with a MODE SELECT command shall be ignored by the device server. ... The DPOFUA bit is reserved for mode data sent with a MODE SELECT command. ... The remaining bits in the DEVICE-SPECIFIC PARAMETER byte are also reserved and shall be set to zero. [mkp: shuffled commentary to commit description] Cc: stable@vger.kernel.org Signed-off-by: Ivan Mironov Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 39754cc90043..048fccc72e03 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -206,6 +206,12 @@ cache_type_store(struct device *dev, struct device_attribute *attr, sp = buffer_data[0] & 0x80 ? 1 : 0; buffer_data[0] &= ~0x80; + /* + * Ensure WP, DPOFUA, and RESERVED fields are cleared in + * received mode parameter buffer before doing MODE SELECT. + */ + data.device_specific = 0; + if (scsi_mode_select(sdp, 1, sp, 8, buffer_data, len, SD_TIMEOUT, SD_MAX_RETRIES, &data, &sshdr)) { if (scsi_sense_valid(&sshdr)) From c3f5e4efce3e2ece7f31826a14849e60d342bde1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Jan 2019 06:56:46 +0000 Subject: [PATCH 1437/2608] crypto: talitos - reorder code in talitos_edesc_alloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c56c2e173773097a248fd3bace91ac8f6fc5386d upstream. This patch moves the mapping of IV after the kmalloc(). This avoids having to unmap in case kmalloc() fails. Signed-off-by: Christophe Leroy Reviewed-by: Horia Geantă Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/talitos.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 57e1b203cf36..f23e38334e98 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1347,23 +1347,18 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, struct talitos_private *priv = dev_get_drvdata(dev); bool is_sec1 = has_ftr_sec1(priv); int max_len = is_sec1 ? TALITOS1_MAX_DATA_LEN : TALITOS2_MAX_DATA_LEN; - void *err; if (cryptlen + authsize > max_len) { dev_err(dev, "length exceeds h/w max limit\n"); return ERR_PTR(-EINVAL); } - if (ivsize) - iv_dma = dma_map_single(dev, iv, ivsize, DMA_TO_DEVICE); - if (!dst || dst == src) { src_len = assoclen + cryptlen + authsize; src_nents = sg_nents_for_len(src, src_len); if (src_nents < 0) { dev_err(dev, "Invalid number of src SG.\n"); - err = ERR_PTR(-EINVAL); - goto error_sg; + return ERR_PTR(-EINVAL); } src_nents = (src_nents == 1) ? 0 : src_nents; dst_nents = dst ? src_nents : 0; @@ -1373,16 +1368,14 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, src_nents = sg_nents_for_len(src, src_len); if (src_nents < 0) { dev_err(dev, "Invalid number of src SG.\n"); - err = ERR_PTR(-EINVAL); - goto error_sg; + return ERR_PTR(-EINVAL); } src_nents = (src_nents == 1) ? 0 : src_nents; dst_len = assoclen + cryptlen + (encrypt ? authsize : 0); dst_nents = sg_nents_for_len(dst, dst_len); if (dst_nents < 0) { dev_err(dev, "Invalid number of dst SG.\n"); - err = ERR_PTR(-EINVAL); - goto error_sg; + return ERR_PTR(-EINVAL); } dst_nents = (dst_nents == 1) ? 0 : dst_nents; } @@ -1407,11 +1400,10 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, } edesc = kmalloc(alloc_len, GFP_DMA | flags); - if (!edesc) { - dev_err(dev, "could not allocate edescriptor\n"); - err = ERR_PTR(-ENOMEM); - goto error_sg; - } + if (!edesc) + return ERR_PTR(-ENOMEM); + if (ivsize) + iv_dma = dma_map_single(dev, iv, ivsize, DMA_TO_DEVICE); edesc->src_nents = src_nents; edesc->dst_nents = dst_nents; @@ -1423,10 +1415,6 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, DMA_BIDIRECTIONAL); return edesc; -error_sg: - if (iv_dma) - dma_unmap_single(dev, iv_dma, ivsize, DMA_TO_DEVICE); - return err; } static struct talitos_edesc *aead_edesc_alloc(struct aead_request *areq, u8 *iv, From 8041d33bf8d515969a11efa8b39febde918bbc9d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Jan 2019 06:56:48 +0000 Subject: [PATCH 1438/2608] crypto: talitos - fix ablkcipher for CONFIG_VMAP_STACK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1bea445b0a022ee126ca328b3705cd4df18ebc14 upstream. [ 2.364486] WARNING: CPU: 0 PID: 60 at ./arch/powerpc/include/asm/io.h:837 dma_nommu_map_page+0x44/0xd4 [ 2.373579] CPU: 0 PID: 60 Comm: cryptomgr_test Tainted: G W 4.20.0-rc5-00560-g6bfb52e23a00-dirty #531 [ 2.384740] NIP: c000c540 LR: c000c584 CTR: 00000000 [ 2.389743] REGS: c95abab0 TRAP: 0700 Tainted: G W (4.20.0-rc5-00560-g6bfb52e23a00-dirty) [ 2.400042] MSR: 00029032 CR: 24042204 XER: 00000000 [ 2.406669] [ 2.406669] GPR00: c02f2244 c95abb60 c6262990 c95abd80 0000256a 00000001 00000001 00000001 [ 2.406669] GPR08: 00000000 00002000 00000010 00000010 24042202 00000000 00000100 c95abd88 [ 2.406669] GPR16: 00000000 c05569d4 00000001 00000010 c95abc88 c0615664 00000004 00000000 [ 2.406669] GPR24: 00000010 c95abc88 c95abc88 00000000 c61ae210 c7ff6d40 c61ae210 00003d68 [ 2.441559] NIP [c000c540] dma_nommu_map_page+0x44/0xd4 [ 2.446720] LR [c000c584] dma_nommu_map_page+0x88/0xd4 [ 2.451762] Call Trace: [ 2.454195] [c95abb60] [82000808] 0x82000808 (unreliable) [ 2.459572] [c95abb80] [c02f2244] talitos_edesc_alloc+0xbc/0x3c8 [ 2.465493] [c95abbb0] [c02f2600] ablkcipher_edesc_alloc+0x4c/0x5c [ 2.471606] [c95abbd0] [c02f4ed0] ablkcipher_encrypt+0x20/0x64 [ 2.477389] [c95abbe0] [c02023b0] __test_skcipher+0x4bc/0xa08 [ 2.483049] [c95abe00] [c0204b60] test_skcipher+0x2c/0xcc [ 2.488385] [c95abe20] [c0204c48] alg_test_skcipher+0x48/0xbc [ 2.494064] [c95abe40] [c0205cec] alg_test+0x164/0x2e8 [ 2.499142] [c95abf00] [c0200dec] cryptomgr_test+0x48/0x50 [ 2.504558] [c95abf10] [c0039ff4] kthread+0xe4/0x110 [ 2.509471] [c95abf40] [c000e1d0] ret_from_kernel_thread+0x14/0x1c [ 2.515532] Instruction dump: [ 2.518468] 7c7e1b78 7c9d2378 7cbf2b78 41820054 3d20c076 8089c200 3d20c076 7c84e850 [ 2.526127] 8129c204 7c842e70 7f844840 419c0008 <0fe00000> 2f9e0000 54847022 7c84fa14 [ 2.533960] ---[ end trace bf78d94af73fe3b8 ]--- [ 2.539123] talitos ff020000.crypto: master data transfer error [ 2.544775] talitos ff020000.crypto: TEA error: ISR 0x20000000_00000040 [ 2.551625] alg: skcipher: encryption failed on test 1 for ecb-aes-talitos: ret=22 IV cannot be on stack when CONFIG_VMAP_STACK is selected because the stack cannot be DMA mapped anymore. This patch copies the IV into the extended descriptor. Fixes: 4de9d0b547b9 ("crypto: talitos - Add ablkcipher algorithms") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/talitos.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index f23e38334e98..4388f4e3840c 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1398,12 +1398,15 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, dma_len = 0; alloc_len += icv_stashing ? authsize : 0; } + alloc_len += ivsize; edesc = kmalloc(alloc_len, GFP_DMA | flags); if (!edesc) return ERR_PTR(-ENOMEM); - if (ivsize) + if (ivsize) { + iv = memcpy(((u8 *)edesc) + alloc_len - ivsize, iv, ivsize); iv_dma = dma_map_single(dev, iv, ivsize, DMA_TO_DEVICE); + } edesc->src_nents = src_nents; edesc->dst_nents = dst_nents; From cb2fb7b7c4dcbd3399cb3988642ee2d7b32f2b73 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Jan 2019 17:24:31 +0100 Subject: [PATCH 1439/2608] mips: fix n32 compat_ipc_parse_version commit 5a9372f751b5350e0ce3d2ee91832f1feae2c2e5 upstream. While reading through the sysvipc implementation, I noticed that the n32 semctl/shmctl/msgctl system calls behave differently based on whether o32 support is enabled or not: Without o32, the IPC_64 flag passed by user space is rejected but calls without that flag get IPC_64 behavior. As far as I can tell, this was inadvertently changed by a cleanup patch but never noticed by anyone, possibly nobody has tried using sysvipc on n32 after linux-3.19. Change it back to the old behavior now. Fixes: 78aaf956ba3a ("MIPS: Compat: Fix build error if CONFIG_MIPS32_COMPAT but no compat ABI.") Signed-off-by: Arnd Bergmann Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: stable@vger.kernel.org # 3.19+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 23e3d3e0ee5b..ae4450e891ab 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -3153,6 +3153,7 @@ config MIPS32_O32 config MIPS32_N32 bool "Kernel support for n32 binaries" depends on 64BIT + select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select COMPAT select MIPS32_COMPAT select SYSVIPC_COMPAT if SYSVIPC From a06d94d98d2417510ab2dab39f633d6557b0bb68 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sun, 6 Jan 2019 19:44:11 +0100 Subject: [PATCH 1440/2608] MIPS: lantiq: Fix IPI interrupt handling commit 2b4dba55b04b212a7fd1f0395b41d79ee3a9801b upstream. This makes SMP on the vrx200 work again, by removing all the MIPS CPU interrupt specific code and making it fully use the generic MIPS CPU interrupt controller. The mti,cpu-interrupt-controller from irq-mips-cpu.c now handles the CPU interrupts and also the IPI interrupts which are used to communication between the CPUs in a SMP system. The generic interrupt code was already used before but the interrupt vectors were overwritten again when we called set_vi_handler() in the lantiq interrupt driver and we also provided our own plat_irq_dispatch() function which overwrote the weak generic implementation. Now the code uses the generic handler for the MIPS CPU interrupts including the IPI interrupts and registers a handler for the CPU interrupts which are handled by the lantiq ICU with irq_set_chained_handler() which was already called before. Calling the set_c0_status() function is also not needed any more because the generic MIPS CPU interrupt already activates the needed bits. Fixes: 1eed40043579 ("MIPS: smp-mt: Use CPU interrupt controller IPI IRQ domain support") Cc: stable@kernel.org # v4.12 Signed-off-by: Hauke Mehrtens Signed-off-by: Paul Burton Cc: jhogan@kernel.org Cc: ralf@linux-mips.org Cc: john@phrozen.org Cc: linux-mips@linux-mips.org Cc: linux-mips@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/lantiq/irq.c | 68 ++++-------------------------------------- 1 file changed, 5 insertions(+), 63 deletions(-) diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index f0bc3312ed11..c4ef1c31e0c4 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -224,9 +224,11 @@ static struct irq_chip ltq_eiu_type = { .irq_set_type = ltq_eiu_settype, }; -static void ltq_hw_irqdispatch(int module) +static void ltq_hw_irq_handler(struct irq_desc *desc) { + int module = irq_desc_get_irq(desc) - 2; u32 irq; + int hwirq; irq = ltq_icu_r32(module, LTQ_ICU_IM0_IOSR); if (irq == 0) @@ -237,7 +239,8 @@ static void ltq_hw_irqdispatch(int module) * other bits might be bogus */ irq = __fls(irq); - do_IRQ((int)irq + MIPS_CPU_IRQ_CASCADE + (INT_NUM_IM_OFFSET * module)); + hwirq = irq + MIPS_CPU_IRQ_CASCADE + (INT_NUM_IM_OFFSET * module); + generic_handle_irq(irq_linear_revmap(ltq_domain, hwirq)); /* if this is a EBU irq, we need to ack it or get a deadlock */ if ((irq == LTQ_ICU_EBU_IRQ) && (module == 0) && LTQ_EBU_PCC_ISTAT) @@ -245,49 +248,6 @@ static void ltq_hw_irqdispatch(int module) LTQ_EBU_PCC_ISTAT); } -#define DEFINE_HWx_IRQDISPATCH(x) \ - static void ltq_hw ## x ## _irqdispatch(void) \ - { \ - ltq_hw_irqdispatch(x); \ - } -DEFINE_HWx_IRQDISPATCH(0) -DEFINE_HWx_IRQDISPATCH(1) -DEFINE_HWx_IRQDISPATCH(2) -DEFINE_HWx_IRQDISPATCH(3) -DEFINE_HWx_IRQDISPATCH(4) - -#if MIPS_CPU_TIMER_IRQ == 7 -static void ltq_hw5_irqdispatch(void) -{ - do_IRQ(MIPS_CPU_TIMER_IRQ); -} -#else -DEFINE_HWx_IRQDISPATCH(5) -#endif - -static void ltq_hw_irq_handler(struct irq_desc *desc) -{ - ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2); -} - -asmlinkage void plat_irq_dispatch(void) -{ - unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM; - int irq; - - if (!pending) { - spurious_interrupt(); - return; - } - - pending >>= CAUSEB_IP; - while (pending) { - irq = fls(pending) - 1; - do_IRQ(MIPS_CPU_IRQ_BASE + irq); - pending &= ~BIT(irq); - } -} - static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { struct irq_chip *chip = <q_irq_type; @@ -343,28 +303,10 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent) for (i = 0; i < MAX_IM; i++) irq_set_chained_handler(i + 2, ltq_hw_irq_handler); - if (cpu_has_vint) { - pr_info("Setting up vectored interrupts\n"); - set_vi_handler(2, ltq_hw0_irqdispatch); - set_vi_handler(3, ltq_hw1_irqdispatch); - set_vi_handler(4, ltq_hw2_irqdispatch); - set_vi_handler(5, ltq_hw3_irqdispatch); - set_vi_handler(6, ltq_hw4_irqdispatch); - set_vi_handler(7, ltq_hw5_irqdispatch); - } - ltq_domain = irq_domain_add_linear(node, (MAX_IM * INT_NUM_IM_OFFSET) + MIPS_CPU_IRQ_CASCADE, &irq_domain_ops, 0); -#ifndef CONFIG_MIPS_MT_SMP - set_c0_status(IE_IRQ0 | IE_IRQ1 | IE_IRQ2 | - IE_IRQ3 | IE_IRQ4 | IE_IRQ5); -#else - set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ0 | IE_IRQ1 | - IE_IRQ2 | IE_IRQ3 | IE_IRQ4 | IE_IRQ5); -#endif - /* tell oprofile which irq to use */ ltq_perfcount_irq = irq_create_mapping(ltq_domain, LTQ_PERF_IRQ); From a7c0d6db36380165a4f1586c874dbb09285c933f Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 13 Jan 2019 10:44:50 +0100 Subject: [PATCH 1441/2608] OF: properties: add missing of_node_put commit 28b170e88bc0c7509e6724717c15cb4b5686026e upstream. Add an of_node_put when the result of of_graph_get_remote_port_parent is not available. The semantic match that finds this problem is as follows (http://coccinelle.lip6.fr): // @r exists@ local idexpression e; expression x; @@ e = of_graph_get_remote_port_parent(...); ... when != x = e when != true e == NULL when != of_node_put(e) when != of_fwnode_handle(e) ( return e; | *return ...; ) // Signed-off-by: Julia Lawall Cc: stable@vger.kernel.org Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/property.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/property.c b/drivers/of/property.c index 264c355ba1ff..fd9b734fff33 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -810,6 +810,7 @@ struct device_node *of_graph_get_remote_node(const struct device_node *node, if (!of_device_is_available(remote)) { pr_debug("not available for remote node\n"); + of_node_put(remote); return NULL; } From 501f37b68703b09bfaaaa2b5196f58d7a80ca3f9 Mon Sep 17 00:00:00 2001 From: Jonathan Hunter Date: Tue, 13 Nov 2018 08:56:31 +0000 Subject: [PATCH 1442/2608] mfd: tps6586x: Handle interrupts on suspend commit ac4ca4b9f4623ba5e1ea7a582f286567c611e027 upstream. The tps6586x driver creates an irqchip that is used by its various child devices for managing interrupts. The tps6586x-rtc device is one of its children that uses the tps6586x irqchip. When using the tps6586x-rtc as a wake-up device from suspend, the following is seen: PM: Syncing filesystems ... done. Freezing user space processes ... (elapsed 0.001 seconds) done. OOM killer disabled. Freezing remaining freezable tasks ... (elapsed 0.000 seconds) done. Disabling non-boot CPUs ... Entering suspend state LP1 Enabling non-boot CPUs ... CPU1 is up tps6586x 3-0034: failed to read interrupt status tps6586x 3-0034: failed to read interrupt status The reason why the tps6586x interrupt status cannot be read is because the tps6586x interrupt is not masked during suspend and when the tps6586x-rtc interrupt occurs, to wake-up the device, the interrupt is seen before the i2c controller has been resumed in order to read the tps6586x interrupt status. The tps6586x-rtc driver sets it's interrupt as a wake-up source during suspend, which gets propagated to the parent tps6586x interrupt. However, the tps6586x-rtc driver cannot disable it's interrupt during suspend otherwise we would never be woken up and so the tps6586x must disable it's interrupt instead. Prevent the tps6586x interrupt handler from executing on exiting suspend before the i2c controller has been resumed by disabling the tps6586x interrupt on entering suspend and re-enabling it on resuming from suspend. Cc: stable@vger.kernel.org Signed-off-by: Jon Hunter Reviewed-by: Dmitry Osipenko Tested-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/tps6586x.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c index 5628a6b5b19b..c5c320efc7b4 100644 --- a/drivers/mfd/tps6586x.c +++ b/drivers/mfd/tps6586x.c @@ -594,6 +594,29 @@ static int tps6586x_i2c_remove(struct i2c_client *client) return 0; } +static int __maybe_unused tps6586x_i2c_suspend(struct device *dev) +{ + struct tps6586x *tps6586x = dev_get_drvdata(dev); + + if (tps6586x->client->irq) + disable_irq(tps6586x->client->irq); + + return 0; +} + +static int __maybe_unused tps6586x_i2c_resume(struct device *dev) +{ + struct tps6586x *tps6586x = dev_get_drvdata(dev); + + if (tps6586x->client->irq) + enable_irq(tps6586x->client->irq); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(tps6586x_pm_ops, tps6586x_i2c_suspend, + tps6586x_i2c_resume); + static const struct i2c_device_id tps6586x_id_table[] = { { "tps6586x", 0 }, { }, @@ -604,6 +627,7 @@ static struct i2c_driver tps6586x_driver = { .driver = { .name = "tps6586x", .of_match_table = of_match_ptr(tps6586x_of_match), + .pm = &tps6586x_pm_ops, }, .probe = tps6586x_i2c_probe, .remove = tps6586x_i2c_remove, From 71a41c7d322f72d6cecc972ae1e112dae8790470 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 10 Jan 2019 09:24:26 -0500 Subject: [PATCH 1443/2608] media: v4l: ioctl: Validate num_planes for debug messages commit 7fe9f01c04c2673bd6662c35b664f0f91888b96f upstream. The num_planes field in struct v4l2_pix_format_mplane is used in a loop before validating it. As the use is printing a debug message in this case, just cap the value to the maximum allowed. Signed-off-by: Sakari Ailus Cc: stable@vger.kernel.org Reviewed-by: Thierry Reding Signed-off-by: Hans Verkuil Cc: # for v4.12 and up Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-ioctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index d06941cc6a55..f1ef4e97238e 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -249,6 +249,7 @@ static void v4l_print_format(const void *arg, bool write_only) const struct v4l2_window *win; const struct v4l2_sdr_format *sdr; const struct v4l2_meta_format *meta; + u32 planes; unsigned i; pr_cont("type=%s", prt_names(p->type, v4l2_type_names)); @@ -279,7 +280,8 @@ static void v4l_print_format(const void *arg, bool write_only) prt_names(mp->field, v4l2_field_names), mp->colorspace, mp->num_planes, mp->flags, mp->ycbcr_enc, mp->quantization, mp->xfer_func); - for (i = 0; i < mp->num_planes; i++) + planes = min_t(u32, mp->num_planes, VIDEO_MAX_PLANES); + for (i = 0; i < planes; i++) printk(KERN_DEBUG "plane %u: bytesperline=%u sizeimage=%u\n", i, mp->plane_fmt[i].bytesperline, mp->plane_fmt[i].sizeimage); From 96188b18861a78efb40d390931cbe8d938d6a3cb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 20 Jan 2019 14:33:34 -0800 Subject: [PATCH 1444/2608] pstore/ram: Avoid allocation and leak of platform data commit 5631e8576a3caf606cdc375f97425a67983b420c upstream. Yue Hu noticed that when parsing device tree the allocated platform data was never freed. Since it's not used beyond the function scope, this switches to using a stack variable instead. Reported-by: Yue Hu Fixes: 35da60941e44 ("pstore/ram: add Device Tree bindings") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 9f7e546d7050..f371e03cf3bf 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -711,18 +711,15 @@ static int ramoops_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct ramoops_platform_data *pdata = dev->platform_data; + struct ramoops_platform_data pdata_local; struct ramoops_context *cxt = &oops_cxt; size_t dump_mem_sz; phys_addr_t paddr; int err = -EINVAL; if (dev_of_node(dev) && !pdata) { - pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) { - pr_err("cannot allocate platform data buffer\n"); - err = -ENOMEM; - goto fail_out; - } + pdata = &pdata_local; + memset(pdata, 0, sizeof(*pdata)); err = ramoops_parse_dt(pdev, pdata); if (err < 0) From ee624c01973b4cbc56eecac96bf1a72790d3addf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 15 Jan 2019 20:47:07 +0100 Subject: [PATCH 1445/2608] arm64: kaslr: ensure randomized quantities are clean to the PoC commit 1598ecda7b239e9232dda032bfddeed9d89fab6c upstream. kaslr_early_init() is called with the kernel mapped at its link time offset, and if it returns with a non-zero offset, the kernel is unmapped and remapped again at the randomized offset. During its execution, kaslr_early_init() also randomizes the base of the module region and of the linear mapping of DRAM, and sets two variables accordingly. However, since these variables are assigned with the caches on, they may get lost during the cache maintenance that occurs when unmapping and remapping the kernel, so ensure that these values are cleaned to the PoC. Acked-by: Catalin Marinas Fixes: f80fb3a3d508 ("arm64: add support for kernel ASLR") Cc: # v4.6+ Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/kaslr.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 47080c49cc7e..2bda224e8e71 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -43,7 +44,7 @@ static __init u64 get_kaslr_seed(void *fdt) return ret; } -static __init const u8 *get_cmdline(void *fdt) +static __init const u8 *kaslr_get_cmdline(void *fdt) { static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE; @@ -109,7 +110,7 @@ u64 __init kaslr_early_init(u64 dt_phys) * Check if 'nokaslr' appears on the command line, and * return 0 if that is the case. */ - cmdline = get_cmdline(fdt); + cmdline = kaslr_get_cmdline(fdt); str = strstr(cmdline, "nokaslr"); if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) return 0; @@ -180,5 +181,8 @@ u64 __init kaslr_early_init(u64 dt_phys) module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; module_alloc_base &= PAGE_MASK; + __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); + __flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed)); + return offset; } From 0c79a6a8ed98ba7efc80f6dd6e729750147e2e2e Mon Sep 17 00:00:00 2001 From: YunQiang Su Date: Tue, 8 Jan 2019 13:45:10 +0800 Subject: [PATCH 1446/2608] Disable MSI also when pcie-octeon.pcie_disable on commit a214720cbf50cd8c3f76bbb9c3f5c283910e9d33 upstream. Octeon has an boot-time option to disable pcie. Since MSI depends on PCI-E, we should also disable MSI also with this option is on in order to avoid inadvertently accessing PCIe registers. Signed-off-by: YunQiang Su Signed-off-by: Paul Burton Cc: pburton@wavecomp.com Cc: linux-mips@vger.kernel.org Cc: aaro.koskinen@iki.fi Cc: stable@vger.kernel.org # v3.3+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/pci/msi-octeon.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/mips/pci/msi-octeon.c b/arch/mips/pci/msi-octeon.c index 2a5bb849b10e..288b58b00dc8 100644 --- a/arch/mips/pci/msi-octeon.c +++ b/arch/mips/pci/msi-octeon.c @@ -369,7 +369,9 @@ int __init octeon_msi_initialize(void) int irq; struct irq_chip *msi; - if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_PCIE) { + if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_INVALID) { + return 0; + } else if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_PCIE) { msi_rcv_reg[0] = CVMX_PEXP_NPEI_MSI_RCV0; msi_rcv_reg[1] = CVMX_PEXP_NPEI_MSI_RCV1; msi_rcv_reg[2] = CVMX_PEXP_NPEI_MSI_RCV2; From 9171634695140ea811b0030f5d208d8798153c56 Mon Sep 17 00:00:00 2001 From: Vlad Tsyrklevich Date: Fri, 11 Jan 2019 14:34:38 +0100 Subject: [PATCH 1447/2608] omap2fb: Fix stack memory disclosure commit a01421e4484327fe44f8e126793ed5a48a221e24 upstream. Using [1] for static analysis I found that the OMAPFB_QUERY_PLANE, OMAPFB_GET_COLOR_KEY, OMAPFB_GET_DISPLAY_INFO, and OMAPFB_GET_VRAM_INFO cases could all leak uninitialized stack memory--either due to uninitialized padding or 'reserved' fields. Fix them by clearing the shared union used to store copied out data. [1] https://github.com/vlad902/kernel-uninitialized-memory-checker Signed-off-by: Vlad Tsyrklevich Reviewed-by: Kees Cook Fixes: b39a982ddecf ("OMAP: DSS2: omapfb driver") Cc: security@kernel.org [b.zolnierkie: prefix patch subject with "omap2fb: "] Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c b/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c index a3edb20ea4c3..a846d32ee653 100644 --- a/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c +++ b/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c @@ -609,6 +609,8 @@ int omapfb_ioctl(struct fb_info *fbi, unsigned int cmd, unsigned long arg) int r = 0; + memset(&p, 0, sizeof(p)); + switch (cmd) { case OMAPFB_SYNC_GFX: DBG("ioctl SYNC_GFX\n"); From 5b25a1cfe95ff15a302975f2b2824387b23b81d1 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 29 Oct 2018 06:15:31 -0400 Subject: [PATCH 1448/2608] media: vivid: fix error handling of kthread_run commit 701f49bc028edb19ffccd101997dd84f0d71e279 upstream. kthread_run returns an error pointer, but elsewhere in the code dev->kthread_vid_cap/out is checked against NULL. If kthread_run returns an error, then set the pointer to NULL. I chose this method over changing all kthread_vid_cap/out tests elsewhere since this is more robust. Signed-off-by: Hans Verkuil Reported-by: syzbot+53d5b2df0d9744411e2e@syzkaller.appspotmail.com Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/vivid/vivid-kthread-cap.c | 5 ++++- drivers/media/platform/vivid/vivid-kthread-out.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/vivid/vivid-kthread-cap.c b/drivers/media/platform/vivid/vivid-kthread-cap.c index 6ca71aabb576..d300e5e7eadc 100644 --- a/drivers/media/platform/vivid/vivid-kthread-cap.c +++ b/drivers/media/platform/vivid/vivid-kthread-cap.c @@ -877,8 +877,11 @@ int vivid_start_generating_vid_cap(struct vivid_dev *dev, bool *pstreaming) "%s-vid-cap", dev->v4l2_dev.name); if (IS_ERR(dev->kthread_vid_cap)) { + int err = PTR_ERR(dev->kthread_vid_cap); + + dev->kthread_vid_cap = NULL; v4l2_err(&dev->v4l2_dev, "kernel_thread() failed\n"); - return PTR_ERR(dev->kthread_vid_cap); + return err; } *pstreaming = true; vivid_grab_controls(dev, true); diff --git a/drivers/media/platform/vivid/vivid-kthread-out.c b/drivers/media/platform/vivid/vivid-kthread-out.c index 98eed5889bc1..7c8d75852816 100644 --- a/drivers/media/platform/vivid/vivid-kthread-out.c +++ b/drivers/media/platform/vivid/vivid-kthread-out.c @@ -248,8 +248,11 @@ int vivid_start_generating_vid_out(struct vivid_dev *dev, bool *pstreaming) "%s-vid-out", dev->v4l2_dev.name); if (IS_ERR(dev->kthread_vid_out)) { + int err = PTR_ERR(dev->kthread_vid_out); + + dev->kthread_vid_out = NULL; v4l2_err(&dev->v4l2_dev, "kernel_thread() failed\n"); - return PTR_ERR(dev->kthread_vid_out); + return err; } *pstreaming = true; vivid_grab_controls(dev, true); From 254cb979105da1b59ec3b99dd0156a43e9989194 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 29 Oct 2018 13:32:38 -0400 Subject: [PATCH 1449/2608] media: vivid: set min width/height to a value > 0 commit 9729d6d282a6d7ce88e64c9119cecdf79edf4e88 upstream. The capture DV timings capabilities allowed for a minimum width and height of 0. So passing a timings struct with 0 values is allowed and will later cause a division by zero. Ensure that the width and height must be >= 16 to avoid this. Signed-off-by: Hans Verkuil Reported-by: syzbot+57c3d83d71187054d56f@syzkaller.appspotmail.com Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/vivid/vivid-vid-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/vivid/vivid-vid-common.c b/drivers/media/platform/vivid/vivid-vid-common.c index 6f6d4df1e8a8..11b014bbacd8 100644 --- a/drivers/media/platform/vivid/vivid-vid-common.c +++ b/drivers/media/platform/vivid/vivid-vid-common.c @@ -33,7 +33,7 @@ const struct v4l2_dv_timings_cap vivid_dv_timings_cap = { .type = V4L2_DV_BT_656_1120, /* keep this initialization for compatibility with GCC < 4.4.6 */ .reserved = { 0 }, - V4L2_INIT_BT_TIMINGS(0, MAX_WIDTH, 0, MAX_HEIGHT, 14000000, 775000000, + V4L2_INIT_BT_TIMINGS(16, MAX_WIDTH, 16, MAX_HEIGHT, 14000000, 775000000, V4L2_DV_BT_STD_CEA861 | V4L2_DV_BT_STD_DMT | V4L2_DV_BT_STD_CVT | V4L2_DV_BT_STD_GTF, V4L2_DV_BT_CAP_PROGRESSIVE | V4L2_DV_BT_CAP_INTERLACED) From 993e65a624c038561a57cbf5d28f087005b4df86 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 15 Jan 2019 20:19:22 -0500 Subject: [PATCH 1450/2608] bpf: in __bpf_redirect_no_mac pull mac only if present commit e7c87bd6cc4ec7b0ac1ed0a88a58f8206c577488 upstream. Syzkaller was able to construct a packet of negative length by redirecting from bpf_prog_test_run_skb with BPF_PROG_TYPE_LWT_XMIT: BUG: KASAN: slab-out-of-bounds in memcpy include/linux/string.h:345 [inline] BUG: KASAN: slab-out-of-bounds in skb_copy_from_linear_data include/linux/skbuff.h:3421 [inline] BUG: KASAN: slab-out-of-bounds in __pskb_copy_fclone+0x2dd/0xeb0 net/core/skbuff.c:1395 Read of size 4294967282 at addr ffff8801d798009c by task syz-executor2/12942 kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 check_memory_region_inline mm/kasan/kasan.c:260 [inline] check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267 memcpy+0x23/0x50 mm/kasan/kasan.c:302 memcpy include/linux/string.h:345 [inline] skb_copy_from_linear_data include/linux/skbuff.h:3421 [inline] __pskb_copy_fclone+0x2dd/0xeb0 net/core/skbuff.c:1395 __pskb_copy include/linux/skbuff.h:1053 [inline] pskb_copy include/linux/skbuff.h:2904 [inline] skb_realloc_headroom+0xe7/0x120 net/core/skbuff.c:1539 ipip6_tunnel_xmit net/ipv6/sit.c:965 [inline] sit_tunnel_xmit+0xe1b/0x30d0 net/ipv6/sit.c:1029 __netdev_start_xmit include/linux/netdevice.h:4325 [inline] netdev_start_xmit include/linux/netdevice.h:4334 [inline] xmit_one net/core/dev.c:3219 [inline] dev_hard_start_xmit+0x295/0xc90 net/core/dev.c:3235 __dev_queue_xmit+0x2f0d/0x3950 net/core/dev.c:3805 dev_queue_xmit+0x17/0x20 net/core/dev.c:3838 __bpf_tx_skb net/core/filter.c:2016 [inline] __bpf_redirect_common net/core/filter.c:2054 [inline] __bpf_redirect+0x5cf/0xb20 net/core/filter.c:2061 ____bpf_clone_redirect net/core/filter.c:2094 [inline] bpf_clone_redirect+0x2f6/0x490 net/core/filter.c:2066 bpf_prog_41f2bcae09cd4ac3+0xb25/0x1000 The generated test constructs a packet with mac header, network header, skb->data pointing to network header and skb->len 0. Redirecting to a sit0 through __bpf_redirect_no_mac pulls the mac length, even though skb->data already is at skb->network_header. bpf_prog_test_run_skb has already pulled it as LWT_XMIT !is_l2. Update the offset calculation to pull only if skb->data differs from skb->network_header, which is not true in this case. The test itself can be run only from commit 1cf1cae963c2 ("bpf: introduce BPF_PROG_TEST_RUN command"), but the same type of packets with skb at network header could already be built from lwt xmit hooks, so this fix is more relevant to that commit. Also set the mac header on redirect from LWT_XMIT, as even after this change to __bpf_redirect_no_mac that field is expected to be set, but is not yet in ip_finish_output2. Fixes: 3a0af8fd61f9 ("bpf: BPF for lightweight tunnel infrastructure") Reported-by: syzbot Signed-off-by: Willem de Bruijn Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- net/core/filter.c | 21 +++++++++++---------- net/core/lwt_bpf.c | 1 + 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index d5158a10ac8f..542fd04bc44d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1714,18 +1714,19 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, u32 flags) { - /* skb->mac_len is not set on normal egress */ - unsigned int mlen = skb->network_header - skb->mac_header; + unsigned int mlen = skb_network_offset(skb); - __skb_pull(skb, mlen); + if (mlen) { + __skb_pull(skb, mlen); - /* At ingress, the mac header has already been pulled once. - * At egress, skb_pospull_rcsum has to be done in case that - * the skb is originated from ingress (i.e. a forwarded skb) - * to ensure that rcsum starts at net header. - */ - if (!skb_at_tc_ingress(skb)) - skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); + /* At ingress, the mac header has already been pulled once. + * At egress, skb_pospull_rcsum has to be done in case that + * the skb is originated from ingress (i.e. a forwarded skb) + * to ensure that rcsum starts at net header. + */ + if (!skb_at_tc_ingress(skb)) + skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); + } skb_pop_mac_header(skb); skb_reset_mac_len(skb); return flags & BPF_F_INGRESS ? diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 832d69649cb6..65313c766ab3 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -65,6 +65,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, lwt->name ? : ""); ret = BPF_OK; } else { + skb_reset_mac_header(skb); ret = skb_do_redirect(skb); if (ret == 0) ret = BPF_REDIRECT; From b9f9379336425ebd0c07d454857ddbc6fe750b36 Mon Sep 17 00:00:00 2001 From: James Morris Date: Wed, 16 Jan 2019 15:41:11 -0800 Subject: [PATCH 1451/2608] LSM: Check for NULL cred-security on free commit a5795fd38ee8194451ba3f281f075301a3696ce2 upstream. From: Casey Schaufler Check that the cred security blob has been set before trying to clean it up. There is a case during credential initialization that could result in this. Signed-off-by: Casey Schaufler Acked-by: John Johansen Signed-off-by: James Morris Reported-by: syzbot+69ca07954461f189e808@syzkaller.appspotmail.com Signed-off-by: Greg Kroah-Hartman --- security/security.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/security/security.c b/security/security.c index 95a1a0f52880..4fbe4e495c02 100644 --- a/security/security.c +++ b/security/security.c @@ -993,6 +993,13 @@ int security_cred_alloc_blank(struct cred *cred, gfp_t gfp) void security_cred_free(struct cred *cred) { + /* + * There is a failure case in prepare_creds() that + * may result in a call here with ->security being NULL. + */ + if (unlikely(cred->security == NULL)) + return; + call_void_hook(cred_free, cred); } From eb376a62ac5d0ad2d0b98e9ca6eeb71e2efa53ac Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 13 Nov 2018 09:06:46 -0500 Subject: [PATCH 1452/2608] media: vb2: vb2_mmap: move lock up commit cd26d1c4d1bc947b56ae404998ae2276df7b39b7 upstream. If a filehandle is dup()ped, then it is possible to close it from one fd and call mmap from the other. This creates a race condition in vb2_mmap where it is using queue data that __vb2_queue_free (called from close()) is in the process of releasing. By moving up the mutex_lock(mmap_lock) in vb2_mmap this race is avoided since __vb2_queue_free is called with the same mutex locked. So vb2_mmap now reads consistent buffer data. Signed-off-by: Hans Verkuil Reported-by: syzbot+be93025dd45dccd8923c@syzkaller.appspotmail.com Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/videobuf2-core.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index 43522a09b11d..ba1bea3a36e6 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -1925,9 +1925,13 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) return -EINVAL; } } + + mutex_lock(&q->mmap_lock); + if (vb2_fileio_is_active(q)) { dprintk(1, "mmap: file io in progress\n"); - return -EBUSY; + ret = -EBUSY; + goto unlock; } /* @@ -1935,7 +1939,7 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) */ ret = __find_plane_by_offset(q, off, &buffer, &plane); if (ret) - return ret; + goto unlock; vb = q->bufs[buffer]; @@ -1951,8 +1955,9 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) return -EINVAL; } - mutex_lock(&q->mmap_lock); ret = call_memop(vb, mmap, vb->planes[plane].mem_priv, vma); + +unlock: mutex_unlock(&q->mmap_lock); if (ret) return ret; From f85592f4c0cb6c6b99e6e4fcd1701fa19d79c587 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 20 Dec 2018 10:35:11 -0500 Subject: [PATCH 1453/2608] sunrpc: handle ENOMEM in rpcb_getport_async commit 81c88b18de1f11f70c97f28ced8d642c00bb3955 upstream. If we ignore the error we'll hit a null dereference a little later. Reported-by: syzbot+4b98281f2401ab849f4b@syzkaller.appspotmail.com Signed-off-by: J. Bruce Fields Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/rpcb_clnt.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index ea0676f199c8..da21efac80f4 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -771,6 +771,12 @@ void rpcb_getport_async(struct rpc_task *task) case RPCBVERS_3: map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID]; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC); + if (!map->r_addr) { + status = -ENOMEM; + dprintk("RPC: %5u %s: no memory available\n", + task->tk_pid, __func__); + goto bailout_free_args; + } map->r_owner = ""; break; case RPCBVERS_2: @@ -793,6 +799,8 @@ void rpcb_getport_async(struct rpc_task *task) rpc_put_task(child); return; +bailout_free_args: + kfree(map); bailout_release_client: rpc_release_client(rpcb_clnt); bailout_nofree: From 26e6d521e5520d62f72fc5682e87bd0bcdbc5b66 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 2 Jan 2019 19:14:31 -0800 Subject: [PATCH 1454/2608] netfilter: ebtables: account ebt_table_info to kmemcg commit e2c8d550a973bb34fc28bc8d0ec996f84562fb8a upstream. The [ip,ip6,arp]_tables use x_tables_info internally and the underlying memory is already accounted to kmemcg. Do the same for ebtables. The syzbot, by using setsockopt(EBT_SO_SET_ENTRIES), was able to OOM the whole system from a restricted memcg, a potential DoS. By accounting the ebt_table_info, the memory used for ebt_table_info can be contained within the memcg of the allocating process. However the lifetime of ebt_table_info is independent of the allocating process and is tied to the network namespace. So, the oom-killer will not be able to relieve the memory pressure due to ebt_table_info memory. The memory for ebt_table_info is allocated through vmalloc. Currently vmalloc does not handle the oom-killed allocating process correctly and one large allocation can bypass memcg limit enforcement. So, with this patch, at least the small allocations will be contained. For large allocations, we need to fix vmalloc. Reported-by: syzbot+7713f3aa67be76b1552c@syzkaller.appspotmail.com Signed-off-by: Shakeel Butt Reviewed-by: Kirill Tkhai Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/bridge/netfilter/ebtables.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 54c7fe68040f..22e4c15a1fc3 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1134,14 +1134,16 @@ static int do_replace(struct net *net, const void __user *user, tmp.name[sizeof(tmp.name) - 1] = 0; countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; - newinfo = vmalloc(sizeof(*newinfo) + countersize); + newinfo = __vmalloc(sizeof(*newinfo) + countersize, GFP_KERNEL_ACCOUNT, + PAGE_KERNEL); if (!newinfo) return -ENOMEM; if (countersize) memset(newinfo->counters, 0, countersize); - newinfo->entries = vmalloc(tmp.entries_size); + newinfo->entries = __vmalloc(tmp.entries_size, GFP_KERNEL_ACCOUNT, + PAGE_KERNEL); if (!newinfo->entries) { ret = -ENOMEM; goto free_newinfo; From 484636b44424008464636f713424a79bd7be5265 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Wed, 9 Jan 2019 10:55:10 -0500 Subject: [PATCH 1455/2608] selinux: fix GPF on invalid policy commit 5b0e7310a2a33c06edc7eb81ffc521af9b2c5610 upstream. levdatum->level can be NULL if we encounter an error while loading the policy during sens_read prior to initializing it. Make sure sens_destroy handles that case correctly. Reported-by: syzbot+6664500f0f18f07a5c0e@syzkaller.appspotmail.com Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/ss/policydb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index ffeb644bfecd..524068d71bc1 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -730,7 +730,8 @@ static int sens_destroy(void *key, void *datum, void *p) kfree(key); if (datum) { levdatum = datum; - ebitmap_destroy(&levdatum->level->cat); + if (levdatum->level) + ebitmap_destroy(&levdatum->level->cat); kfree(levdatum->level); } kfree(datum); From 0fb89795bbaea0a2c69549b78249eeecd28c721e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 14 Jan 2019 09:48:10 +0100 Subject: [PATCH 1456/2608] blockdev: Fix livelocks on loop device commit 04906b2f542c23626b0ef6219b808406f8dddbe9 upstream. bd_set_size() updates also block device's block size. This is somewhat unexpected from its name and at this point, only blkdev_open() uses this functionality. Furthermore, this can result in changing block size under a filesystem mounted on a loop device which leads to livelocks inside __getblk_gfp() like: Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 10863 Comm: syz-executor0 Not tainted 4.18.0-rc5+ #151 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__sanitizer_cov_trace_pc+0x3f/0x50 kernel/kcov.c:106 ... Call Trace: init_page_buffers+0x3e2/0x530 fs/buffer.c:904 grow_dev_page fs/buffer.c:947 [inline] grow_buffers fs/buffer.c:1009 [inline] __getblk_slow fs/buffer.c:1036 [inline] __getblk_gfp+0x906/0xb10 fs/buffer.c:1313 __bread_gfp+0x2d/0x310 fs/buffer.c:1347 sb_bread include/linux/buffer_head.h:307 [inline] fat12_ent_bread+0x14e/0x3d0 fs/fat/fatent.c:75 fat_ent_read_block fs/fat/fatent.c:441 [inline] fat_alloc_clusters+0x8ce/0x16e0 fs/fat/fatent.c:489 fat_add_cluster+0x7a/0x150 fs/fat/inode.c:101 __fat_get_block fs/fat/inode.c:148 [inline] ... Trivial reproducer for the problem looks like: truncate -s 1G /tmp/image losetup /dev/loop0 /tmp/image mkfs.ext4 -b 1024 /dev/loop0 mount -t ext4 /dev/loop0 /mnt losetup -c /dev/loop0 l /mnt Fix the problem by moving initialization of a block device block size into a separate function and call it when needed. Thanks to Tetsuo Handa for help with debugging the problem. Reported-by: syzbot+9933e4476f365f5d5a1b@syzkaller.appspotmail.com Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/block_dev.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 3323eec5c164..3911c1a80219 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -116,6 +116,20 @@ void invalidate_bdev(struct block_device *bdev) } EXPORT_SYMBOL(invalidate_bdev); +static void set_init_blocksize(struct block_device *bdev) +{ + unsigned bsize = bdev_logical_block_size(bdev); + loff_t size = i_size_read(bdev->bd_inode); + + while (bsize < PAGE_SIZE) { + if (size & bsize) + break; + bsize <<= 1; + } + bdev->bd_block_size = bsize; + bdev->bd_inode->i_blkbits = blksize_bits(bsize); +} + int set_blocksize(struct block_device *bdev, int size) { /* Size must be a power of two, and between 512 and PAGE_SIZE */ @@ -1393,18 +1407,9 @@ EXPORT_SYMBOL(check_disk_change); void bd_set_size(struct block_device *bdev, loff_t size) { - unsigned bsize = bdev_logical_block_size(bdev); - inode_lock(bdev->bd_inode); i_size_write(bdev->bd_inode, size); inode_unlock(bdev->bd_inode); - while (bsize < PAGE_SIZE) { - if (size & bsize) - break; - bsize <<= 1; - } - bdev->bd_block_size = bsize; - bdev->bd_inode->i_blkbits = blksize_bits(bsize); } EXPORT_SYMBOL(bd_set_size); @@ -1482,8 +1487,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) } } - if (!ret) + if (!ret) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); + set_init_blocksize(bdev); + } /* * If the device is invalidated, rescan partition @@ -1518,6 +1525,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_clear; } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); + set_init_blocksize(bdev); } if (bdev->bd_bdi == &noop_backing_dev_info) From 81b2fee6eb16e0f942dc921a1e1382d7c31a6722 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 14 Jan 2019 18:34:02 +0800 Subject: [PATCH 1457/2608] sctp: allocate sctp_sockaddr_entry with kzalloc commit 400b8b9a2a17918f8ce00786f596f530e7f30d50 upstream. The similar issue as fixed in Commit 4a2eb0c37b47 ("sctp: initialize sin6_flowinfo for ipv6 addrs in sctp_inet6addr_event") also exists in sctp_inetaddr_event, as Alexander noticed. To fix it, allocate sctp_sockaddr_entry with kzalloc for both sctp ipv4 and ipv6 addresses, as does in sctp_v4/6_copy_addrlist(). Reported-by: Alexander Potapenko Signed-off-by: Xin Long Reported-by: syzbot+ae0c70c0c2d40c51bb92@syzkaller.appspotmail.com Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/ipv6.c | 5 +---- net/sctp/protocol.c | 4 +--- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 8002a72aae1a..7eb06fa75730 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -97,11 +97,9 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, switch (ev) { case NETDEV_UP: - addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; - addr->a.v6.sin6_port = 0; - addr->a.v6.sin6_flowinfo = 0; addr->a.v6.sin6_addr = ifa->addr; addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; addr->valid = 1; @@ -415,7 +413,6 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; - addr->a.v6.sin6_port = 0; addr->a.v6.sin6_addr = ifp->addr; addr->a.v6.sin6_scope_id = dev->ifindex; addr->valid = 1; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index df22a9c352ad..cbb04d66f564 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -151,7 +151,6 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v4.sin_family = AF_INET; - addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; addr->valid = 1; INIT_LIST_HEAD(&addr->list); @@ -782,10 +781,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, switch (ev) { case NETDEV_UP: - addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v4.sin_family = AF_INET; - addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; addr->valid = 1; spin_lock_bh(&net->sctp.local_addr_lock); From 2ad734a2b3ce31199c34910fa61718d367a6e909 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 14 Jan 2019 17:22:25 +0800 Subject: [PATCH 1458/2608] tipc: fix uninit-value in tipc_nl_compat_link_reset_stats commit 8b66fee7f8ee18f9c51260e7a43ab37db5177a05 upstream. syzbot reports following splat: BUG: KMSAN: uninit-value in strlen+0x3b/0xa0 lib/string.c:486 CPU: 1 PID: 11057 Comm: syz-executor0 Not tainted 4.20.0-rc7+ #2 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:613 __msan_warning+0x82/0xf0 mm/kmsan/kmsan_instr.c:295 strlen+0x3b/0xa0 lib/string.c:486 nla_put_string include/net/netlink.h:1154 [inline] tipc_nl_compat_link_reset_stats+0x1f0/0x360 net/tipc/netlink_compat.c:760 __tipc_nl_compat_doit net/tipc/netlink_compat.c:311 [inline] tipc_nl_compat_doit+0x3aa/0xaf0 net/tipc/netlink_compat.c:344 tipc_nl_compat_handle net/tipc/netlink_compat.c:1107 [inline] tipc_nl_compat_recv+0x14d7/0x2760 net/tipc/netlink_compat.c:1210 genl_family_rcv_msg net/netlink/genetlink.c:601 [inline] genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626 netlink_rcv_skb+0x444/0x640 net/netlink/af_netlink.c:2477 genl_rcv+0x63/0x80 net/netlink/genetlink.c:637 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0xf40/0x1020 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xdb9/0x11b0 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x305/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x457ec9 Code: 6d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 3b b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f2557338c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457ec9 RDX: 0000000000000000 RSI: 00000000200001c0 RDI: 0000000000000003 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f25573396d4 R13: 00000000004cb478 R14: 00000000004d86c8 R15: 00000000ffffffff Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:204 [inline] kmsan_internal_poison_shadow+0x92/0x150 mm/kmsan/kmsan.c:158 kmsan_kmalloc+0xa6/0x130 mm/kmsan/kmsan_hooks.c:176 kmsan_slab_alloc+0xe/0x10 mm/kmsan/kmsan_hooks.c:185 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2759 [inline] __kmalloc_node_track_caller+0xe18/0x1030 mm/slub.c:4383 __kmalloc_reserve net/core/skbuff.c:137 [inline] __alloc_skb+0x309/0xa20 net/core/skbuff.c:205 alloc_skb include/linux/skbuff.h:998 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline] netlink_sendmsg+0xb82/0x1300 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xdb9/0x11b0 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x305/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 The uninitialised access happened in tipc_nl_compat_link_reset_stats: nla_put_string(skb, TIPC_NLA_LINK_NAME, name) This is because name string is not validated before it's used. Reported-by: syzbot+e01d94b5a4c266be6e4c@syzkaller.appspotmail.com Signed-off-by: Ying Xue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/netlink_compat.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index e48f0b2c01b9..b4747b5cb5ea 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -87,6 +87,11 @@ static int tipc_skb_tailroom(struct sk_buff *skb) return limit; } +static inline int TLV_GET_DATA_LEN(struct tlv_desc *tlv) +{ + return TLV_GET_LEN(tlv) - TLV_SPACE(0); +} + static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len) { struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(skb); @@ -166,6 +171,11 @@ static struct sk_buff *tipc_get_err_tlv(char *str) return buf; } +static inline bool string_is_valid(char *s, int len) +{ + return memchr(s, '\0', len) ? true : false; +} + static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, struct tipc_nl_compat_msg *msg, struct sk_buff *arg) @@ -741,6 +751,7 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, { char *name; struct nlattr *link; + int len; name = (char *)TLV_DATA(msg->req); @@ -748,6 +759,10 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, if (!link) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name)) return -EMSGSIZE; From 6129b69a0a9db48a82cb207f96cde1f0f1b39dec Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 14 Jan 2019 17:22:26 +0800 Subject: [PATCH 1459/2608] tipc: fix uninit-value in tipc_nl_compat_bearer_enable commit 0762216c0ad2a2fccd63890648eca491f2c83d9a upstream. syzbot reported: BUG: KMSAN: uninit-value in strlen+0x3b/0xa0 lib/string.c:484 CPU: 1 PID: 6371 Comm: syz-executor652 Not tainted 4.19.0-rc8+ #70 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x306/0x460 lib/dump_stack.c:113 kmsan_report+0x1a2/0x2e0 mm/kmsan/kmsan.c:917 __msan_warning+0x7c/0xe0 mm/kmsan/kmsan_instr.c:500 strlen+0x3b/0xa0 lib/string.c:484 nla_put_string include/net/netlink.h:1011 [inline] tipc_nl_compat_bearer_enable+0x238/0x7b0 net/tipc/netlink_compat.c:389 __tipc_nl_compat_doit net/tipc/netlink_compat.c:311 [inline] tipc_nl_compat_doit+0x39f/0xae0 net/tipc/netlink_compat.c:344 tipc_nl_compat_recv+0x147c/0x2760 net/tipc/netlink_compat.c:1107 genl_family_rcv_msg net/netlink/genetlink.c:601 [inline] genl_rcv_msg+0x185c/0x1a20 net/netlink/genetlink.c:626 netlink_rcv_skb+0x394/0x640 net/netlink/af_netlink.c:2454 genl_rcv+0x63/0x80 net/netlink/genetlink.c:637 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x166d/0x1720 net/netlink/af_netlink.c:1343 netlink_sendmsg+0x1391/0x1420 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe47/0x1200 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x307/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xbe/0x100 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x440179 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fffef7beee8 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440179 RDX: 0000000000000000 RSI: 0000000020000100 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 0000000000000000 R09: 00000000004002c8 R10: 0000000000000000 R11: 0000000000000213 R12: 0000000000401a00 R13: 0000000000401a90 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:255 [inline] kmsan_internal_poison_shadow+0xc8/0x1d0 mm/kmsan/kmsan.c:180 kmsan_kmalloc+0xa4/0x120 mm/kmsan/kmsan_hooks.c:104 kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan_hooks.c:113 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2727 [inline] __kmalloc_node_track_caller+0xb43/0x1400 mm/slub.c:4360 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x422/0xe90 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:996 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1189 [inline] netlink_sendmsg+0xcaf/0x1420 net/netlink/af_netlink.c:1883 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe47/0x1200 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x307/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xbe/0x100 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 The root cause is that we don't validate whether bear name is a valid string in tipc_nl_compat_bearer_enable(). Meanwhile, we also fix the same issue in the following functions: tipc_nl_compat_bearer_disable() tipc_nl_compat_link_stat_dump() tipc_nl_compat_media_set() tipc_nl_compat_bearer_set() Reported-by: syzbot+b33d5cae0efd35dbfe77@syzkaller.appspotmail.com Signed-off-by: Ying Xue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/netlink_compat.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index b4747b5cb5ea..84187ec221b9 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -380,6 +380,7 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, struct nlattr *prop; struct nlattr *bearer; struct tipc_bearer_config *b; + int len; b = (struct tipc_bearer_config *)TLV_DATA(msg->req); @@ -387,6 +388,10 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + if (!string_is_valid(b->name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name)) return -EMSGSIZE; @@ -412,6 +417,7 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, { char *name; struct nlattr *bearer; + int len; name = (char *)TLV_DATA(msg->req); @@ -419,6 +425,10 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name)) return -EMSGSIZE; @@ -479,6 +489,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, struct nlattr *prop[TIPC_NLA_PROP_MAX + 1]; struct nlattr *stats[TIPC_NLA_STATS_MAX + 1]; int err; + int len; if (!attrs[TIPC_NLA_LINK]) return -EINVAL; @@ -505,6 +516,11 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, return err; name = (char *)TLV_DATA(msg->req); + + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0) return 0; @@ -645,6 +661,7 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb, struct nlattr *prop; struct nlattr *media; struct tipc_link_config *lc; + int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); @@ -652,6 +669,10 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb, if (!media) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name)) return -EMSGSIZE; @@ -672,6 +693,7 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb, struct nlattr *prop; struct nlattr *bearer; struct tipc_link_config *lc; + int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); @@ -679,6 +701,10 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb, if (!bearer) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name)) return -EMSGSIZE; From 8123f1b363e44da4607aafa4de92e86a71ca1f82 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 14 Jan 2019 17:22:27 +0800 Subject: [PATCH 1460/2608] tipc: fix uninit-value in tipc_nl_compat_link_set commit edf5ff04a45750ac8ce2435974f001dc9cfbf055 upstream. syzbot reports following splat: BUG: KMSAN: uninit-value in strlen+0x3b/0xa0 lib/string.c:486 CPU: 1 PID: 9306 Comm: syz-executor172 Not tainted 4.20.0-rc7+ #2 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:613 __msan_warning+0x82/0xf0 mm/kmsan/kmsan_instr.c:313 strlen+0x3b/0xa0 lib/string.c:486 nla_put_string include/net/netlink.h:1154 [inline] __tipc_nl_compat_link_set net/tipc/netlink_compat.c:708 [inline] tipc_nl_compat_link_set+0x929/0x1220 net/tipc/netlink_compat.c:744 __tipc_nl_compat_doit net/tipc/netlink_compat.c:311 [inline] tipc_nl_compat_doit+0x3aa/0xaf0 net/tipc/netlink_compat.c:344 tipc_nl_compat_handle net/tipc/netlink_compat.c:1107 [inline] tipc_nl_compat_recv+0x14d7/0x2760 net/tipc/netlink_compat.c:1210 genl_family_rcv_msg net/netlink/genetlink.c:601 [inline] genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626 netlink_rcv_skb+0x444/0x640 net/netlink/af_netlink.c:2477 genl_rcv+0x63/0x80 net/netlink/genetlink.c:637 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0xf40/0x1020 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xdb9/0x11b0 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x305/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 The uninitialised access happened in nla_put_string(skb, TIPC_NLA_LINK_NAME, lc->name) This is because lc->name string is not validated before it's used. Reported-by: syzbot+d78b8a29241a195aefb8@syzkaller.appspotmail.com Signed-off-by: Ying Xue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/netlink_compat.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 84187ec221b9..7765887bff5c 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -753,9 +753,14 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, struct tipc_link_config *lc; struct tipc_bearer *bearer; struct tipc_media *media; + int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + media = tipc_media_find(lc->name); if (media) { cmd->doit = &tipc_nl_media_set; From 2aae1723dea1235ffef183daf0694805297424f6 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 14 Jan 2019 17:22:28 +0800 Subject: [PATCH 1461/2608] tipc: fix uninit-value in tipc_nl_compat_name_table_dump commit 974cb0e3e7c963ced06c4e32c5b2884173fa5e01 upstream. syzbot reported: BUG: KMSAN: uninit-value in __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline] BUG: KMSAN: uninit-value in __fswab32 include/uapi/linux/swab.h:59 [inline] BUG: KMSAN: uninit-value in tipc_nl_compat_name_table_dump+0x4a8/0xba0 net/tipc/netlink_compat.c:826 CPU: 0 PID: 6290 Comm: syz-executor848 Not tainted 4.19.0-rc8+ #70 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x306/0x460 lib/dump_stack.c:113 kmsan_report+0x1a2/0x2e0 mm/kmsan/kmsan.c:917 __msan_warning+0x7c/0xe0 mm/kmsan/kmsan_instr.c:500 __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline] __fswab32 include/uapi/linux/swab.h:59 [inline] tipc_nl_compat_name_table_dump+0x4a8/0xba0 net/tipc/netlink_compat.c:826 __tipc_nl_compat_dumpit+0x59e/0xdb0 net/tipc/netlink_compat.c:205 tipc_nl_compat_dumpit+0x63a/0x820 net/tipc/netlink_compat.c:270 tipc_nl_compat_handle net/tipc/netlink_compat.c:1151 [inline] tipc_nl_compat_recv+0x1402/0x2760 net/tipc/netlink_compat.c:1210 genl_family_rcv_msg net/netlink/genetlink.c:601 [inline] genl_rcv_msg+0x185c/0x1a20 net/netlink/genetlink.c:626 netlink_rcv_skb+0x394/0x640 net/netlink/af_netlink.c:2454 genl_rcv+0x63/0x80 net/netlink/genetlink.c:637 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x166d/0x1720 net/netlink/af_netlink.c:1343 netlink_sendmsg+0x1391/0x1420 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe47/0x1200 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x307/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xbe/0x100 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x440179 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffecec49318 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440179 RDX: 0000000000000000 RSI: 0000000020000100 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 0000000000000000 R09: 00000000004002c8 R10: 0000000000000000 R11: 0000000000000213 R12: 0000000000401a00 R13: 0000000000401a90 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:255 [inline] kmsan_internal_poison_shadow+0xc8/0x1d0 mm/kmsan/kmsan.c:180 kmsan_kmalloc+0xa4/0x120 mm/kmsan/kmsan_hooks.c:104 kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan_hooks.c:113 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2727 [inline] __kmalloc_node_track_caller+0xb43/0x1400 mm/slub.c:4360 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x422/0xe90 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:996 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1189 [inline] netlink_sendmsg+0xcaf/0x1420 net/netlink/af_netlink.c:1883 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe47/0x1200 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x307/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xbe/0x100 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 We cannot take for granted the thing that the length of data contained in TLV is longer than the size of struct tipc_name_table_query in tipc_nl_compat_name_table_dump(). Reported-by: syzbot+06e771a754829716a327@syzkaller.appspotmail.com Signed-off-by: Ying Xue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/netlink_compat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 7765887bff5c..d9a56bfeb869 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -815,6 +815,8 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) }; ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); + if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query)) + return -EINVAL; depth = ntohl(ntq->depth); From 1f3dd37ef84bd0a07d14d260ec16e166a470c065 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 14 Jan 2019 17:22:29 +0800 Subject: [PATCH 1462/2608] tipc: fix uninit-value in tipc_nl_compat_doit commit 2753ca5d9009c180dbfd4c802c80983b4b6108d1 upstream. BUG: KMSAN: uninit-value in tipc_nl_compat_doit+0x404/0xa10 net/tipc/netlink_compat.c:335 CPU: 0 PID: 4514 Comm: syz-executor485 Not tainted 4.16.0+ #87 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683 tipc_nl_compat_doit+0x404/0xa10 net/tipc/netlink_compat.c:335 tipc_nl_compat_recv+0x164b/0x2700 net/tipc/netlink_compat.c:1153 genl_family_rcv_msg net/netlink/genetlink.c:599 [inline] genl_rcv_msg+0x1686/0x1810 net/netlink/genetlink.c:624 netlink_rcv_skb+0x378/0x600 net/netlink/af_netlink.c:2447 genl_rcv+0x63/0x80 net/netlink/genetlink.c:635 netlink_unicast_kernel net/netlink/af_netlink.c:1311 [inline] netlink_unicast+0x166b/0x1740 net/netlink/af_netlink.c:1337 netlink_sendmsg+0x1048/0x1310 net/netlink/af_netlink.c:1900 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 __sys_sendmsg net/socket.c:2080 [inline] SYSC_sendmsg+0x2a3/0x3d0 net/socket.c:2091 SyS_sendmsg+0x54/0x80 net/socket.c:2087 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x43fda9 RSP: 002b:00007ffd0c184ba8 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fda9 RDX: 0000000000000000 RSI: 0000000020023000 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 00000000004002c8 R09: 00000000004002c8 R10: 00000000004002c8 R11: 0000000000000213 R12: 00000000004016d0 R13: 0000000000401760 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314 kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321 slab_post_alloc_hook mm/slab.h:445 [inline] slab_alloc_node mm/slub.c:2737 [inline] __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:984 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1183 [inline] netlink_sendmsg+0x9a6/0x1310 net/netlink/af_netlink.c:1875 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 __sys_sendmsg net/socket.c:2080 [inline] SYSC_sendmsg+0x2a3/0x3d0 net/socket.c:2091 SyS_sendmsg+0x54/0x80 net/socket.c:2087 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 In tipc_nl_compat_recv(), when the len variable returned by nlmsg_attrlen() is 0, the message is still treated as a valid one, which is obviously unresonable. When len is zero, it means the message not only doesn't contain any valid TLV payload, but also TLV header is not included. Under this stituation, tlv_type field in TLV header is still accessed in tipc_nl_compat_dumpit() or tipc_nl_compat_doit(), but the field space is obviously illegal. Of course, it is not initialized. Reported-by: syzbot+bca0dc46634781f08b38@syzkaller.appspotmail.com Reported-by: syzbot+6bdb590321a7ae40c1a6@syzkaller.appspotmail.com Signed-off-by: Ying Xue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/netlink_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index d9a56bfeb869..73895daf8943 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1240,7 +1240,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) } len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); - if (len && !TLV_OK(msg.req, len)) { + if (!len || !TLV_OK(msg.req, len)) { msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); err = -EOPNOTSUPP; goto send; From 06ee6e217586a1944cb9d50b3a2141cb060b7128 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 8 Nov 2018 14:01:01 +0100 Subject: [PATCH 1463/2608] block/loop: Don't grab "struct file" for vfs_getattr() operation. commit b1ab5fa309e6c49e4e06270ec67dd7b3e9971d04 upstream. vfs_getattr() needs "struct path" rather than "struct file". Let's use path_get()/path_put() rather than get_file()/fput(). Signed-off-by: Tetsuo Handa Reviewed-by: Jan Kara Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 6d61633a7f89..58164bc29da2 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1175,7 +1175,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) static int loop_get_status(struct loop_device *lo, struct loop_info64 *info) { - struct file *file; + struct path path; struct kstat stat; int ret; @@ -1200,16 +1200,16 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) } /* Drop lo_ctl_mutex while we call into the filesystem. */ - file = get_file(lo->lo_backing_file); + path = lo->lo_backing_file->f_path; + path_get(&path); mutex_unlock(&lo->lo_ctl_mutex); - ret = vfs_getattr(&file->f_path, &stat, STATX_INO, - AT_STATX_SYNC_AS_STAT); + ret = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT); if (!ret) { info->lo_device = huge_encode_dev(stat.dev); info->lo_inode = stat.ino; info->lo_rdevice = huge_encode_dev(stat.rdev); } - fput(file); + path_put(&path); return ret; } From 57da9a9742200f391d1cf93fea389f7ddc25ec9a Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 8 Nov 2018 14:01:02 +0100 Subject: [PATCH 1464/2608] block/loop: Use global lock for ioctl() operation. commit 310ca162d779efee8a2dc3731439680f3e9c1e86 upstream. syzbot is reporting NULL pointer dereference [1] which is caused by race condition between ioctl(loop_fd, LOOP_CLR_FD, 0) versus ioctl(other_loop_fd, LOOP_SET_FD, loop_fd) due to traversing other loop devices at loop_validate_file() without holding corresponding lo->lo_ctl_mutex locks. Since ioctl() request on loop devices is not frequent operation, we don't need fine grained locking. Let's use global lock in order to allow safe traversal at loop_validate_file(). Note that syzbot is also reporting circular locking dependency between bdev->bd_mutex and lo->lo_ctl_mutex [2] which is caused by calling blkdev_reread_part() with lock held. This patch does not address it. [1] https://syzkaller.appspot.com/bug?id=f3cfe26e785d85f9ee259f385515291d21bd80a3 [2] https://syzkaller.appspot.com/bug?id=bf154052f0eea4bc7712499e4569505907d15889 Signed-off-by: Tetsuo Handa Reported-by: syzbot Reviewed-by: Jan Kara Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 58 ++++++++++++++++++++++---------------------- drivers/block/loop.h | 1 - 2 files changed, 29 insertions(+), 30 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 58164bc29da2..aa9e9b2fce02 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -82,6 +82,7 @@ static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_index_mutex); +static DEFINE_MUTEX(loop_ctl_mutex); static int max_part; static int part_shift; @@ -1018,7 +1019,7 @@ static int loop_clr_fd(struct loop_device *lo) */ if (atomic_read(&lo->lo_refcnt) > 1) { lo->lo_flags |= LO_FLAGS_AUTOCLEAR; - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); return 0; } @@ -1070,12 +1071,12 @@ static int loop_clr_fd(struct loop_device *lo) if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; loop_unprepare_queue(lo); - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); /* - * Need not hold lo_ctl_mutex to fput backing file. - * Calling fput holding lo_ctl_mutex triggers a circular + * Need not hold loop_ctl_mutex to fput backing file. + * Calling fput holding loop_ctl_mutex triggers a circular * lock dependency possibility warning as fput can take - * bd_mutex which is usually taken before lo_ctl_mutex. + * bd_mutex which is usually taken before loop_ctl_mutex. */ fput(filp); return 0; @@ -1180,7 +1181,7 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) int ret; if (lo->lo_state != Lo_bound) { - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); return -ENXIO; } @@ -1199,10 +1200,10 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) lo->lo_encrypt_key_size); } - /* Drop lo_ctl_mutex while we call into the filesystem. */ + /* Drop loop_ctl_mutex while we call into the filesystem. */ path = lo->lo_backing_file->f_path; path_get(&path); - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); ret = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT); if (!ret) { info->lo_device = huge_encode_dev(stat.dev); @@ -1294,7 +1295,7 @@ loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) { int err; if (!arg) { - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); return -EINVAL; } err = loop_get_status(lo, &info64); @@ -1312,7 +1313,7 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { int err; if (!arg) { - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); return -EINVAL; } err = loop_get_status(lo, &info64); @@ -1370,7 +1371,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, struct loop_device *lo = bdev->bd_disk->private_data; int err; - mutex_lock_nested(&lo->lo_ctl_mutex, 1); + mutex_lock_nested(&loop_ctl_mutex, 1); switch (cmd) { case LOOP_SET_FD: err = loop_set_fd(lo, mode, bdev, arg); @@ -1379,7 +1380,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, err = loop_change_fd(lo, bdev, arg); break; case LOOP_CLR_FD: - /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ + /* loop_clr_fd would have unlocked loop_ctl_mutex on success */ err = loop_clr_fd(lo); if (!err) goto out_unlocked; @@ -1392,7 +1393,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, break; case LOOP_GET_STATUS: err = loop_get_status_old(lo, (struct loop_info __user *) arg); - /* loop_get_status() unlocks lo_ctl_mutex */ + /* loop_get_status() unlocks loop_ctl_mutex */ goto out_unlocked; case LOOP_SET_STATUS64: err = -EPERM; @@ -1402,7 +1403,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, break; case LOOP_GET_STATUS64: err = loop_get_status64(lo, (struct loop_info64 __user *) arg); - /* loop_get_status() unlocks lo_ctl_mutex */ + /* loop_get_status() unlocks loop_ctl_mutex */ goto out_unlocked; case LOOP_SET_CAPACITY: err = -EPERM; @@ -1422,7 +1423,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, default: err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; } - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); out_unlocked: return err; @@ -1539,7 +1540,7 @@ loop_get_status_compat(struct loop_device *lo, int err; if (!arg) { - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); return -EINVAL; } err = loop_get_status(lo, &info64); @@ -1556,16 +1557,16 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, switch(cmd) { case LOOP_SET_STATUS: - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock(&loop_ctl_mutex); err = loop_set_status_compat( lo, (const struct compat_loop_info __user *) arg); - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); break; case LOOP_GET_STATUS: - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock(&loop_ctl_mutex); err = loop_get_status_compat( lo, (struct compat_loop_info __user *) arg); - /* loop_get_status() unlocks lo_ctl_mutex */ + /* loop_get_status() unlocks loop_ctl_mutex */ break; case LOOP_SET_CAPACITY: case LOOP_CLR_FD: @@ -1609,7 +1610,7 @@ static void __lo_release(struct loop_device *lo) if (atomic_dec_return(&lo->lo_refcnt)) return; - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock(&loop_ctl_mutex); if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { /* * In autoclear mode, stop the loop thread @@ -1627,7 +1628,7 @@ static void __lo_release(struct loop_device *lo) blk_mq_unfreeze_queue(lo->lo_queue); } - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); } static void lo_release(struct gendisk *disk, fmode_t mode) @@ -1673,10 +1674,10 @@ static int unregister_transfer_cb(int id, void *ptr, void *data) struct loop_device *lo = ptr; struct loop_func_table *xfer = data; - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock(&loop_ctl_mutex); if (lo->lo_encryption == xfer) loop_release_xfer(lo); - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); return 0; } @@ -1849,7 +1850,6 @@ static int loop_add(struct loop_device **l, int i) if (!part_shift) disk->flags |= GENHD_FL_NO_PART_SCAN; disk->flags |= GENHD_FL_EXT_DEVT; - mutex_init(&lo->lo_ctl_mutex); atomic_set(&lo->lo_refcnt, 0); lo->lo_number = i; spin_lock_init(&lo->lo_lock); @@ -1962,19 +1962,19 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, ret = loop_lookup(&lo, parm); if (ret < 0) break; - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock(&loop_ctl_mutex); if (lo->lo_state != Lo_unbound) { ret = -EBUSY; - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); break; } if (atomic_read(&lo->lo_refcnt) > 0) { ret = -EBUSY; - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); break; } lo->lo_disk->private_data = NULL; - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); idr_remove(&loop_index_idr, lo->lo_number); loop_remove(lo); break; diff --git a/drivers/block/loop.h b/drivers/block/loop.h index dfc54ceba410..b2251752452b 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -54,7 +54,6 @@ struct loop_device { spinlock_t lo_lock; int lo_state; - struct mutex lo_ctl_mutex; struct kthread_worker worker; struct task_struct *worker_task; bool use_dio; From f1e81ba8a3fa56dcc48828869b392b29559a0ac3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 8 Nov 2018 14:01:03 +0100 Subject: [PATCH 1465/2608] loop: Fold __loop_release into loop_release commit 967d1dc144b50ad005e5eecdfadfbcfb399ffff6 upstream. __loop_release() has a single call site. Fold it there. This is currently not a huge win but it will make following replacement of loop_index_mutex more obvious. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index aa9e9b2fce02..cb13430082ef 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1603,12 +1603,15 @@ out: return err; } -static void __lo_release(struct loop_device *lo) +static void lo_release(struct gendisk *disk, fmode_t mode) { + struct loop_device *lo; int err; + mutex_lock(&loop_index_mutex); + lo = disk->private_data; if (atomic_dec_return(&lo->lo_refcnt)) - return; + goto unlock_index; mutex_lock(&loop_ctl_mutex); if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { @@ -1618,7 +1621,7 @@ static void __lo_release(struct loop_device *lo) */ err = loop_clr_fd(lo); if (!err) - return; + goto unlock_index; } else if (lo->lo_state == Lo_bound) { /* * Otherwise keep thread (if running) and config, @@ -1629,12 +1632,7 @@ static void __lo_release(struct loop_device *lo) } mutex_unlock(&loop_ctl_mutex); -} - -static void lo_release(struct gendisk *disk, fmode_t mode) -{ - mutex_lock(&loop_index_mutex); - __lo_release(disk->private_data); +unlock_index: mutex_unlock(&loop_index_mutex); } From c1e63df4f30c3918476ac9bc594355b0e9629893 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 8 Nov 2018 14:01:04 +0100 Subject: [PATCH 1466/2608] loop: Get rid of loop_index_mutex commit 0a42e99b58a208839626465af194cfe640ef9493 upstream. Now that loop_ctl_mutex is global, just get rid of loop_index_mutex as there is no good reason to keep these two separate and it just complicates the locking. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index cb13430082ef..0b8b274eb682 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -81,7 +81,6 @@ #include static DEFINE_IDR(loop_index_idr); -static DEFINE_MUTEX(loop_index_mutex); static DEFINE_MUTEX(loop_ctl_mutex); static int max_part; @@ -1588,9 +1587,11 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, static int lo_open(struct block_device *bdev, fmode_t mode) { struct loop_device *lo; - int err = 0; + int err; - mutex_lock(&loop_index_mutex); + err = mutex_lock_killable(&loop_ctl_mutex); + if (err) + return err; lo = bdev->bd_disk->private_data; if (!lo) { err = -ENXIO; @@ -1599,7 +1600,7 @@ static int lo_open(struct block_device *bdev, fmode_t mode) atomic_inc(&lo->lo_refcnt); out: - mutex_unlock(&loop_index_mutex); + mutex_unlock(&loop_ctl_mutex); return err; } @@ -1608,12 +1609,11 @@ static void lo_release(struct gendisk *disk, fmode_t mode) struct loop_device *lo; int err; - mutex_lock(&loop_index_mutex); + mutex_lock(&loop_ctl_mutex); lo = disk->private_data; if (atomic_dec_return(&lo->lo_refcnt)) - goto unlock_index; + goto out_unlock; - mutex_lock(&loop_ctl_mutex); if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { /* * In autoclear mode, stop the loop thread @@ -1621,7 +1621,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode) */ err = loop_clr_fd(lo); if (!err) - goto unlock_index; + return; } else if (lo->lo_state == Lo_bound) { /* * Otherwise keep thread (if running) and config, @@ -1631,9 +1631,8 @@ static void lo_release(struct gendisk *disk, fmode_t mode) blk_mq_unfreeze_queue(lo->lo_queue); } +out_unlock: mutex_unlock(&loop_ctl_mutex); -unlock_index: - mutex_unlock(&loop_index_mutex); } static const struct block_device_operations lo_fops = { @@ -1926,7 +1925,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) struct kobject *kobj; int err; - mutex_lock(&loop_index_mutex); + mutex_lock(&loop_ctl_mutex); err = loop_lookup(&lo, MINOR(dev) >> part_shift); if (err < 0) err = loop_add(&lo, MINOR(dev) >> part_shift); @@ -1934,7 +1933,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) kobj = NULL; else kobj = get_disk(lo->lo_disk); - mutex_unlock(&loop_index_mutex); + mutex_unlock(&loop_ctl_mutex); *part = 0; return kobj; @@ -1944,9 +1943,13 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, unsigned long parm) { struct loop_device *lo; - int ret = -ENOSYS; + int ret; - mutex_lock(&loop_index_mutex); + ret = mutex_lock_killable(&loop_ctl_mutex); + if (ret) + return ret; + + ret = -ENOSYS; switch (cmd) { case LOOP_CTL_ADD: ret = loop_lookup(&lo, parm); @@ -1960,7 +1963,6 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, ret = loop_lookup(&lo, parm); if (ret < 0) break; - mutex_lock(&loop_ctl_mutex); if (lo->lo_state != Lo_unbound) { ret = -EBUSY; mutex_unlock(&loop_ctl_mutex); @@ -1972,7 +1974,6 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, break; } lo->lo_disk->private_data = NULL; - mutex_unlock(&loop_ctl_mutex); idr_remove(&loop_index_idr, lo->lo_number); loop_remove(lo); break; @@ -1982,7 +1983,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, break; ret = loop_add(&lo, -1); } - mutex_unlock(&loop_index_mutex); + mutex_unlock(&loop_ctl_mutex); return ret; } @@ -2066,10 +2067,10 @@ static int __init loop_init(void) THIS_MODULE, loop_probe, NULL, NULL); /* pre-create number of devices given by config or max_loop */ - mutex_lock(&loop_index_mutex); + mutex_lock(&loop_ctl_mutex); for (i = 0; i < nr; i++) loop_add(&lo, i); - mutex_unlock(&loop_index_mutex); + mutex_unlock(&loop_ctl_mutex); printk(KERN_INFO "loop: module loaded\n"); return 0; From d2762edcb6af99fc9322bab0b1d4e71a427760e8 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 12 Nov 2018 08:42:14 -0700 Subject: [PATCH 1467/2608] loop: Fix double mutex_unlock(&loop_ctl_mutex) in loop_control_ioctl() commit 628bd85947091830a8c4872adfd5ed1d515a9cf2 upstream. Commit 0a42e99b58a20883 ("loop: Get rid of loop_index_mutex") forgot to remove mutex_unlock(&loop_ctl_mutex) from loop_control_ioctl() when replacing loop_index_mutex with loop_ctl_mutex. Fixes: 0a42e99b58a20883 ("loop: Get rid of loop_index_mutex") Reported-by: syzbot Reviewed-by: Ming Lei Reviewed-by: Jan Kara Signed-off-by: Tetsuo Handa Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 0b8b274eb682..11da9707e1ce 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1965,12 +1965,10 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, break; if (lo->lo_state != Lo_unbound) { ret = -EBUSY; - mutex_unlock(&loop_ctl_mutex); break; } if (atomic_read(&lo->lo_refcnt) > 0) { ret = -EBUSY; - mutex_unlock(&loop_ctl_mutex); break; } lo->lo_disk->private_data = NULL; From 45662e4b717c7579e49a5e5c5086c543d15af0c4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 9 Jan 2019 19:17:14 -0800 Subject: [PATCH 1468/2608] loop: drop caches if offset or block_size are changed commit 5db470e229e22b7eda6e23b5566e532c96fb5bc3 upstream. If we don't drop caches used in old offset or block_size, we can get old data from new offset/block_size, which gives unexpected data to user. For example, Martijn found a loopback bug in the below scenario. 1) LOOP_SET_FD loads first two pages on loop file 2) LOOP_SET_STATUS64 changes the offset on the loop file 3) mount is failed due to the cached pages having wrong superblock Cc: Jens Axboe Cc: linux-block@vger.kernel.org Reported-by: Martijn Coenen Reviewed-by: Bart Van Assche Signed-off-by: Jaegeuk Kim Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 11da9707e1ce..7910dd8b1d3a 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1097,6 +1097,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) return -EINVAL; + if (lo->lo_offset != info->lo_offset || + lo->lo_sizelimit != info->lo_sizelimit) { + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } + /* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); @@ -1125,6 +1131,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { + /* kill_bdev should have truncated all the pages */ + if (lo->lo_device->bd_inode->i_mapping->nrpages) { + err = -EAGAIN; + pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + __func__, lo->lo_number, lo->lo_file_name, + lo->lo_device->bd_inode->i_mapping->nrpages); + goto exit; + } if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto exit; @@ -1346,22 +1360,39 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg) static int loop_set_block_size(struct loop_device *lo, unsigned long arg) { + int err = 0; + if (lo->lo_state != Lo_bound) return -ENXIO; if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL; + if (lo->lo_queue->limits.logical_block_size != arg) { + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } + blk_mq_freeze_queue(lo->lo_queue); + /* kill_bdev should have truncated all the pages */ + if (lo->lo_queue->limits.logical_block_size != arg && + lo->lo_device->bd_inode->i_mapping->nrpages) { + err = -EAGAIN; + pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + __func__, lo->lo_number, lo->lo_file_name, + lo->lo_device->bd_inode->i_mapping->nrpages); + goto out_unfreeze; + } + blk_queue_logical_block_size(lo->lo_queue, arg); blk_queue_physical_block_size(lo->lo_queue, arg); blk_queue_io_min(lo->lo_queue, arg); loop_update_dio(lo); - +out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue); - return 0; + return err; } static int lo_ioctl(struct block_device *bdev, fmode_t mode, From 787d30991a505a5c837389704f8be5e12547e652 Mon Sep 17 00:00:00 2001 From: Ivan Mironov Date: Tue, 8 Jan 2019 12:23:53 +0500 Subject: [PATCH 1469/2608] drm/fb-helper: Ignore the value of fb_var_screeninfo.pixclock commit 66a8d5bfb518f9f12d47e1d2dce1732279f9451e upstream. Strict requirement of pixclock to be zero breaks support of SDL 1.2 which contains hardcoded table of supported video modes with non-zero pixclock values[1]. To better understand which pixclock values are considered valid and how driver should handle these values, I briefly examined few existing fbdev drivers and documentation in Documentation/fb/. And it looks like there are no strict rules on that and actual behaviour varies: * some drivers treat (pixclock == 0) as "use defaults" (uvesafb.c); * some treat (pixclock == 0) as invalid value which leads to -EINVAL (clps711x-fb.c); * some pass converted pixclock value to hardware (uvesafb.c); * some are trying to find nearest value from predefined table (vga16fb.c, video_gx.c). Given this, I believe that it should be safe to just ignore this value if changing is not supported. It seems that any portable fbdev application which was not written only for one specific device working under one specific kernel version should not rely on any particular behaviour of pixclock anyway. However, while enabling SDL1 applications to work out of the box when there is no /etc/fb.modes with valid settings, this change affects the video mode choosing logic in SDL. Depending on current screen resolution, contents of /etc/fb.modes and resolution requested by application, this may lead to user-visible difference (not always): image will be displayed in a right way, but it will be aligned to the left instead of center. There is no "right behaviour" here as well, as emulated fbdev, opposing to old fbdev drivers, simply ignores any requsts of video mode changes with resolutions smaller than current. The easiest way to reproduce this problem is to install sdl-sopwith[2], remove /etc/fb.modes file if it exists, and then try to run sopwith from console without X. At least in Fedora 29, sopwith may be simply installed from standard repositories. [1] SDL 1.2.15 source code, src/video/fbcon/SDL_fbvideo.c, vesa_timings [2] http://sdl-sopwith.sourceforge.net/ Signed-off-by: Ivan Mironov Cc: stable@vger.kernel.org Fixes: 79e539453b34e ("DRM: i915: add mode setting support") Fixes: 771fe6b912fca ("drm/radeon: introduce kernel modesetting for radeon hardware") Fixes: 785b93ef8c309 ("drm/kms: move driver specific fb common code to helper functions (v2)") Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20190108072353.28078-3-mironov.ivan@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_fb_helper.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index ad6812baa611..f1259a0c2883 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1578,9 +1578,14 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, struct drm_fb_helper *fb_helper = info->par; struct drm_framebuffer *fb = fb_helper->fb; - if (var->pixclock != 0 || in_dbg_master()) + if (in_dbg_master()) return -EINVAL; + if (var->pixclock != 0) { + DRM_DEBUG("fbdev emulation doesn't support changing the pixel clock, value of pixclock is ignored\n"); + var->pixclock = 0; + } + /* * Changes struct fb_var_screeninfo are currently not pushed back * to KMS, hence fail if different settings are requested. From 4381a9484b4772589021c61e682177e1602b32fb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Nov 2018 07:05:58 -0500 Subject: [PATCH 1470/2608] media: vb2: be sure to unlock mutex on errors commit c06ef2e9acef4cda1feee2ce055b8086e33d251a upstream. As reported by smatch: drivers/media/common/videobuf2/videobuf2-core.c: drivers/media/common/videobuf2/videobuf2-core.c:2159 vb2_mmap() warn: inconsistent returns 'mutex:&q->mmap_lock'. Locked on: line 2148 Unlocked on: line 2100 line 2108 line 2113 line 2118 line 2156 line 2159 There is one error condition that doesn't unlock a mutex. Fixes: cd26d1c4d1bc ("media: vb2: vb2_mmap: move lock up") Reviewed-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/videobuf2-core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index ba1bea3a36e6..f1725da2a90d 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -1952,7 +1952,8 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) if (length < (vma->vm_end - vma->vm_start)) { dprintk(1, "MMAP invalid, as it would overflow buffer length\n"); - return -EINVAL; + ret = -EINVAL; + goto unlock; } ret = call_memop(vb, mmap, vb->planes[plane].mem_priv, vma); From c2912ca3f893a14fd24a6cad165acf61f4d7bc01 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 14 Jan 2019 09:48:09 +0100 Subject: [PATCH 1471/2608] nbd: Use set_blocksize() to set device blocksize commit c8a83a6b54d0ca078de036aafb3f6af58c1dc5eb upstream. NBD can update block device block size implicitely through bd_set_size(). Make it explicitely set blocksize with set_blocksize() as this behavior of bd_set_size() is going away. CC: Josef Bacik Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/nbd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index fe1414df0f33..d32cd943dff2 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -275,9 +275,10 @@ static void nbd_size_update(struct nbd_device *nbd) blk_queue_physical_block_size(nbd->disk->queue, config->blksize); set_capacity(nbd->disk, config->bytesize >> 9); if (bdev) { - if (bdev->bd_disk) + if (bdev->bd_disk) { bd_set_size(bdev, config->bytesize); - else + set_blocksize(bdev, config->blksize); + } else bdev->bd_invalidated = 1; bdput(bdev); } From 3b68e5cf57f08ad1a9dd7f8ca48ae1326ac98824 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 23 Jan 2019 08:09:52 +0100 Subject: [PATCH 1472/2608] Linux 4.14.95 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e9a138dd964a..70cc37cb3e99 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 94 +SUBLEVEL = 95 EXTRAVERSION = NAME = Petit Gorille From a2fc92be9f39dea7cff71a6e8b076dc3e24a5c1a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 4 Jan 2019 16:58:15 -0800 Subject: [PATCH 1473/2608] ipv6: Consider sk_bound_dev_if when binding a socket to a v4 mapped address [ Upstream commit ec90ad334986fa5856d11dd272f7f22fa86c55c4 ] Similar to c5ee066333eb ("ipv6: Consider sk_bound_dev_if when binding a socket to an address"), binding a socket to v4 mapped addresses needs to consider if the socket is bound to a device. This problem also exists from the beginning of git history. Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/af_inet6.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 9ccbf74deb99..5a97f53da8c4 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -317,6 +317,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { + struct net_device *dev = NULL; int chk_addr_ret; /* Binding to v4-mapped address on a v6-only socket @@ -327,9 +328,17 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } + if (sk->sk_bound_dev_if) { + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); + if (!dev) { + err = -ENODEV; + goto out; + } + } + /* Reproduce AF_INET checks to make the bindings consistent */ v4addr = addr->sin6_addr.s6_addr32[3]; - chk_addr_ret = inet_addr_type(net, v4addr); + chk_addr_ret = inet_addr_type_dev_table(net, dev, v4addr); if (!net->ipv4.sysctl_ip_nonlocal_bind && !(inet->freebind || inet->transparent) && v4addr != htonl(INADDR_ANY) && From da689d5e066bf3966856f117dd0f29009e2fac4e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 8 Jan 2019 16:48:05 +0000 Subject: [PATCH 1474/2608] mlxsw: spectrum: Disable lag port TX before removing it [ Upstream commit 8adbe212a159d9c78a90fca1d854f6e63452426b ] Make sure that lag port TX is disabled before mlxsw_sp_port_lag_leave() is called and prevent from possible EMAD error. Fixes: 0d65fc13042f ("mlxsw: spectrum: Implement LAG port join/leave") Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 84864fdcb0e8..cf65b2ee8b95 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4276,12 +4276,15 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, lower_dev, upper_dev); } else if (netif_is_lag_master(upper_dev)) { - if (info->linking) + if (info->linking) { err = mlxsw_sp_port_lag_join(mlxsw_sp_port, upper_dev); - else + } else { + mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, + false); mlxsw_sp_port_lag_leave(mlxsw_sp_port, upper_dev); + } } else if (netif_is_ovs_master(upper_dev)) { if (info->linking) err = mlxsw_sp_port_ovs_join(mlxsw_sp_port); From 28c8d59cf35fce118c59eedfbe1cca94c93f0458 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 8 Jan 2019 16:48:13 +0000 Subject: [PATCH 1475/2608] mlxsw: spectrum_switchdev: Set PVID correctly during VLAN deletion [ Upstream commit 674bed5df4cab8f96d04f7b99608883a48f9226b ] When a VLAN is deleted from a bridge port we should not change the PVID unless the deleted VLAN is the PVID. Fixes: fe9ccc785de5 ("mlxsw: spectrum_switchdev: Don't batch VLAN operations") Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 21611613f44c..9052e93e1925 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1424,7 +1424,7 @@ static void mlxsw_sp_bridge_port_vlan_del(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_port *bridge_port, u16 vid) { - u16 pvid = mlxsw_sp_port->pvid == vid ? 0 : vid; + u16 pvid = mlxsw_sp_port->pvid == vid ? 0 : mlxsw_sp_port->pvid; struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); From 30ce30211933abf1be12a15b871c0d9188e7bbbb Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 2 Jan 2019 13:01:43 -0800 Subject: [PATCH 1476/2608] net, skbuff: do not prefer skb allocation fails early [ Upstream commit f8c468e8537925e0c4607263f498a1b7c0c8982e ] Commit dcda9b04713c ("mm, tree wide: replace __GFP_REPEAT by __GFP_RETRY_MAYFAIL with more useful semantic") replaced __GFP_REPEAT in alloc_skb_with_frags() with __GFP_RETRY_MAYFAIL when the allocation may directly reclaim. The previous behavior would require reclaim up to 1 << order pages for skb aligned header_len of order > PAGE_ALLOC_COSTLY_ORDER before failing, otherwise the allocations in alloc_skb() would loop in the page allocator looking for memory. __GFP_RETRY_MAYFAIL makes both allocations failable under memory pressure, including for the HEAD allocation. This can cause, among many other things, write() to fail with ENOTCONN during RPC when under memory pressure. These allocations should succeed as they did previous to dcda9b04713c even if it requires calling the oom killer and additional looping in the page allocator to find memory. There is no way to specify the previous behavior of __GFP_REPEAT, but it's unlikely to be necessary since the previous behavior only guaranteed that 1 << order pages would be reclaimed before failing for order > PAGE_ALLOC_COSTLY_ORDER. That reclaim is not guaranteed to be contiguous memory, so repeating for such large orders is usually not beneficial. Removing the setting of __GFP_RETRY_MAYFAIL to restore the previous behavior, specifically not allowing alloc_skb() to fail for small orders and oom kill if necessary rather than allowing RPCs to fail. Fixes: dcda9b04713c ("mm, tree wide: replace __GFP_REPEAT by __GFP_RETRY_MAYFAIL with more useful semantic") Signed-off-by: David Rientjes Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4067fa3fcbb2..873032d1a083 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5154,7 +5154,6 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, unsigned long chunk; struct sk_buff *skb; struct page *page; - gfp_t gfp_head; int i; *errcode = -EMSGSIZE; @@ -5164,12 +5163,8 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, if (npages > MAX_SKB_FRAGS) return NULL; - gfp_head = gfp_mask; - if (gfp_head & __GFP_DIRECT_RECLAIM) - gfp_head |= __GFP_RETRY_MAYFAIL; - *errcode = -ENOBUFS; - skb = alloc_skb(header_len, gfp_head); + skb = alloc_skb(header_len, gfp_mask); if (!skb) return NULL; From 7cd4cd3226a747a9d4ed15eefe3a284e6a010f6e Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 4 Jan 2019 13:26:10 +0100 Subject: [PATCH 1477/2608] qmi_wwan: add MTU default to qmap network interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f87118d5760f00af7228033fbe783c7f380d2866 ] This patch adds MTU default value to qmap network interface in order to avoid "RTNETLINK answers: No buffer space available" error when setting an ipv6 address. Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 891f8f975b43..25204d2c9e89 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -123,6 +123,7 @@ static void qmimux_setup(struct net_device *dev) dev->addr_len = 0; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; dev->netdev_ops = &qmimux_netdev_ops; + dev->mtu = 1500; dev->needs_free_netdev = true; } From 054aff556424ce9b1c8232e4d0ce44f76e78270d Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 2 Jan 2019 14:45:07 +0800 Subject: [PATCH 1478/2608] r8169: Add support for new Realtek Ethernet [ Upstream commit 36352991835ce99e46b4441dd0eb6980f9a83e8f ] There are two new Realtek Ethernet devices which are re-branded r8168h. Add the IDs to to support them. Signed-off-by: Kai-Heng Feng Reviewed-by: Heiner Kallweit Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 1b61ce310132..c7364d9496e3 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -324,6 +324,8 @@ enum cfg_version { }; static const struct pci_device_id rtl8169_pci_tbl[] = { + { PCI_VDEVICE(REALTEK, 0x2502), RTL_CFG_1 }, + { PCI_VDEVICE(REALTEK, 0x2600), RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8129), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8136), 0, 0, RTL_CFG_2 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8161), 0, 0, RTL_CFG_1 }, From f1ce6ee14adf3790d023932d1daa377c68bbd13e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 5 Jan 2019 07:35:04 -0800 Subject: [PATCH 1479/2608] ipv6: Take rcu_read_lock in __inet6_bind for mapped addresses [ Upstream commit d4a7e9bb74b5aaf07b89f6531c080b1130bdf019 ] I realized the last patch calls dev_get_by_index_rcu in a branch not holding the rcu lock. Add the calls to rcu_read_lock and rcu_read_unlock. Fixes: ec90ad334986 ("ipv6: Consider sk_bound_dev_if when binding a socket to a v4 mapped address") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/af_inet6.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 5a97f53da8c4..b1ed9254a4b6 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -328,17 +328,20 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } + rcu_read_lock(); if (sk->sk_bound_dev_if) { dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; - goto out; + goto out_unlock; } } /* Reproduce AF_INET checks to make the bindings consistent */ v4addr = addr->sin6_addr.s6_addr32[3]; chk_addr_ret = inet_addr_type_dev_table(net, dev, v4addr); + rcu_read_unlock(); + if (!net->ipv4.sysctl_ip_nonlocal_bind && !(inet->freebind || inet->transparent) && v4addr != htonl(INADDR_ANY) && From ffd2e8a3f6b66691154c0b0220e1cf02381a3180 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 9 Jan 2019 00:24:03 +0100 Subject: [PATCH 1480/2608] net: dsa: mv88x6xxx: mv88e6390 errata [ Upstream commit ea89098ef9a574bceca00d3b5df14aaf0b3f9ccf ] The 6390 copper ports have an errata which require poking magic values into undocumented magic registers and then performing a software reset. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mv88e6xxx/chip.c | 113 +++++++++++++++++++++++++++++++ drivers/net/dsa/mv88e6xxx/chip.h | 5 ++ drivers/net/dsa/mv88e6xxx/port.h | 10 +++ 3 files changed, 128 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index eebda5ec9676..34998ecd9cc9 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1979,6 +1979,107 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) return 0; } +/* The mv88e6390 has some hidden registers used for debug and + * development. The errata also makes use of them. + */ +static int mv88e6390_hidden_write(struct mv88e6xxx_chip *chip, int port, + int reg, u16 val) +{ + u16 ctrl; + int err; + + err = mv88e6xxx_port_write(chip, PORT_RESERVED_1A_DATA_PORT, + PORT_RESERVED_1A, val); + if (err) + return err; + + ctrl = PORT_RESERVED_1A_BUSY | PORT_RESERVED_1A_WRITE | + PORT_RESERVED_1A_BLOCK | port << PORT_RESERVED_1A_PORT_SHIFT | + reg; + + return mv88e6xxx_port_write(chip, PORT_RESERVED_1A_CTRL_PORT, + PORT_RESERVED_1A, ctrl); +} + +static int mv88e6390_hidden_wait(struct mv88e6xxx_chip *chip) +{ + return mv88e6xxx_wait(chip, PORT_RESERVED_1A_CTRL_PORT, + PORT_RESERVED_1A, PORT_RESERVED_1A_BUSY); +} + + +static int mv88e6390_hidden_read(struct mv88e6xxx_chip *chip, int port, + int reg, u16 *val) +{ + u16 ctrl; + int err; + + ctrl = PORT_RESERVED_1A_BUSY | PORT_RESERVED_1A_READ | + PORT_RESERVED_1A_BLOCK | port << PORT_RESERVED_1A_PORT_SHIFT | + reg; + + err = mv88e6xxx_port_write(chip, PORT_RESERVED_1A_CTRL_PORT, + PORT_RESERVED_1A, ctrl); + if (err) + return err; + + err = mv88e6390_hidden_wait(chip); + if (err) + return err; + + return mv88e6xxx_port_read(chip, PORT_RESERVED_1A_DATA_PORT, + PORT_RESERVED_1A, val); +} + +/* Check if the errata has already been applied. */ +static bool mv88e6390_setup_errata_applied(struct mv88e6xxx_chip *chip) +{ + int port; + int err; + u16 val; + + for (port = 0; port < mv88e6xxx_num_ports(chip); port++) { + err = mv88e6390_hidden_read(chip, port, 0, &val); + if (err) { + dev_err(chip->dev, + "Error reading hidden register: %d\n", err); + return false; + } + if (val != 0x01c0) + return false; + } + + return true; +} + +/* The 6390 copper ports have an errata which require poking magic + * values into undocumented hidden registers and then performing a + * software reset. + */ +static int mv88e6390_setup_errata(struct mv88e6xxx_chip *chip) +{ + int port; + int err; + + if (mv88e6390_setup_errata_applied(chip)) + return 0; + + /* Set the ports into blocking mode */ + for (port = 0; port < mv88e6xxx_num_ports(chip); port++) { + err = mv88e6xxx_port_set_state(chip, port, BR_STATE_DISABLED); + if (err) + return err; + } + + for (port = 0; port < mv88e6xxx_num_ports(chip); port++) { + err = mv88e6390_hidden_write(chip, port, 0, 0x01c0); + if (err) + return err; + } + + return mv88e6xxx_software_reset(chip); +} + static int mv88e6xxx_setup(struct dsa_switch *ds) { struct mv88e6xxx_chip *chip = ds->priv; @@ -1990,6 +2091,12 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) mutex_lock(&chip->reg_lock); + if (chip->info->ops->setup_errata) { + err = chip->info->ops->setup_errata(chip); + if (err) + goto unlock; + } + /* Setup Switch Port Registers */ for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { err = mv88e6xxx_setup_port(chip, i); @@ -2652,6 +2759,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = { static const struct mv88e6xxx_ops mv88e6190_ops = { /* MV88E6XXX_FAMILY_6390 */ + .setup_errata = mv88e6390_setup_errata, .irl_init_all = mv88e6390_g2_irl_init_all, .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, @@ -2687,6 +2795,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = { static const struct mv88e6xxx_ops mv88e6190x_ops = { /* MV88E6XXX_FAMILY_6390 */ + .setup_errata = mv88e6390_setup_errata, .irl_init_all = mv88e6390_g2_irl_init_all, .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, @@ -2722,6 +2831,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = { static const struct mv88e6xxx_ops mv88e6191_ops = { /* MV88E6XXX_FAMILY_6390 */ + .setup_errata = mv88e6390_setup_errata, .irl_init_all = mv88e6390_g2_irl_init_all, .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, @@ -2793,6 +2903,7 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { static const struct mv88e6xxx_ops mv88e6290_ops = { /* MV88E6XXX_FAMILY_6390 */ + .setup_errata = mv88e6390_setup_errata, .irl_init_all = mv88e6390_g2_irl_init_all, .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, @@ -3030,6 +3141,7 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { static const struct mv88e6xxx_ops mv88e6390_ops = { /* MV88E6XXX_FAMILY_6390 */ + .setup_errata = mv88e6390_setup_errata, .irl_init_all = mv88e6390_g2_irl_init_all, .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, @@ -3068,6 +3180,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { static const struct mv88e6xxx_ops mv88e6390x_ops = { /* MV88E6XXX_FAMILY_6390 */ + .setup_errata = mv88e6390_setup_errata, .irl_init_all = mv88e6390_g2_irl_init_all, .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 334f6f7544ba..0913eeca53b3 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -222,6 +222,11 @@ struct mv88e6xxx_mdio_bus { }; struct mv88e6xxx_ops { + /* Switch Setup Errata, called early in the switch setup to + * allow any errata actions to be performed + */ + int (*setup_errata)(struct mv88e6xxx_chip *chip); + /* Ingress Rate Limit unit (IRL) operations */ int (*irl_init_all)(struct mv88e6xxx_chip *chip, int port); diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index b16d5f0e6e9c..ccdc67fe9079 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -236,6 +236,16 @@ /* Offset 0x19: Port IEEE Priority Remapping Registers (4-7) */ #define MV88E6095_PORT_IEEE_PRIO_REMAP_4567 0x19 +/* Offset 0x1a: Magic undocumented errata register */ +#define PORT_RESERVED_1A 0x1a +#define PORT_RESERVED_1A_BUSY BIT(15) +#define PORT_RESERVED_1A_WRITE BIT(14) +#define PORT_RESERVED_1A_READ 0 +#define PORT_RESERVED_1A_PORT_SHIFT 5 +#define PORT_RESERVED_1A_BLOCK (0xf << 10) +#define PORT_RESERVED_1A_CTRL_PORT 4 +#define PORT_RESERVED_1A_DATA_PORT 5 + int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, u16 *val); int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, From 96040804c9a8e7af58b28cffdfb3313021da35ec Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 24 Oct 2018 22:59:15 +0530 Subject: [PATCH 1481/2608] gpio: pl061: Move irq_chip definition inside struct pl061 [ Upstream commit ed8dce4c6f726b7f3c6bf40859b92a9e32f189c1 ] Keeping the irq_chip definition static will make it shared with multiple giochips in the system. This practice is considered to be bad and now we will get the below warning from gpiolib core: "detected irqchip that is shared with multiple gpiochips: please fix the driver." Hence, move the irq_chip definition from static to `struct pl061` for using a unique irq_chip for each gpiochip. Signed-off-by: Manivannan Sadhasivam Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-pl061.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c index 6aaaab79c205..f6e1e0e306a3 100644 --- a/drivers/gpio/gpio-pl061.c +++ b/drivers/gpio/gpio-pl061.c @@ -54,6 +54,7 @@ struct pl061 { void __iomem *base; struct gpio_chip gc; + struct irq_chip irq_chip; int parent_irq; #ifdef CONFIG_PM @@ -281,15 +282,6 @@ static int pl061_irq_set_wake(struct irq_data *d, unsigned int state) return irq_set_irq_wake(pl061->parent_irq, state); } -static struct irq_chip pl061_irqchip = { - .name = "pl061", - .irq_ack = pl061_irq_ack, - .irq_mask = pl061_irq_mask, - .irq_unmask = pl061_irq_unmask, - .irq_set_type = pl061_irq_type, - .irq_set_wake = pl061_irq_set_wake, -}; - static int pl061_probe(struct amba_device *adev, const struct amba_id *id) { struct device *dev = &adev->dev; @@ -328,6 +320,13 @@ static int pl061_probe(struct amba_device *adev, const struct amba_id *id) /* * irq_chip support */ + pl061->irq_chip.name = dev_name(dev); + pl061->irq_chip.irq_ack = pl061_irq_ack; + pl061->irq_chip.irq_mask = pl061_irq_mask; + pl061->irq_chip.irq_unmask = pl061_irq_unmask; + pl061->irq_chip.irq_set_type = pl061_irq_type; + pl061->irq_chip.irq_set_wake = pl061_irq_set_wake; + writeb(0, pl061->base + GPIOIE); /* disable irqs */ irq = adev->irq[0]; if (irq < 0) { @@ -336,14 +335,14 @@ static int pl061_probe(struct amba_device *adev, const struct amba_id *id) } pl061->parent_irq = irq; - ret = gpiochip_irqchip_add(&pl061->gc, &pl061_irqchip, + ret = gpiochip_irqchip_add(&pl061->gc, &pl061->irq_chip, 0, handle_bad_irq, IRQ_TYPE_NONE); if (ret) { dev_info(&adev->dev, "could not add irqchip\n"); return ret; } - gpiochip_set_chained_irqchip(&pl061->gc, &pl061_irqchip, + gpiochip_set_chained_irqchip(&pl061->gc, &pl061->irq_chip, irq, pl061_irq_handler); amba_set_drvdata(adev, pl061); From d1dd718179b245e9126527f0d1f653dd843d79f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= Date: Wed, 31 Oct 2018 17:21:26 -0700 Subject: [PATCH 1482/2608] platform/x86: asus-wmi: Tell the EC the OS will handle the display off hotkey MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 78f3ac76d9e5219589718b9e4733bee21627b3f5 ] In the past, Asus firmwares would change the panel backlight directly through the EC when the display off hotkey (Fn+F7) was pressed, and only notify the OS of such change, with 0x33 when the LCD was ON and 0x34 when the LCD was OFF. These are currently mapped to KEY_DISPLAYTOGGLE and KEY_DISPLAY_OFF, respectively. Most recently the EC on Asus most machines lost ability to toggle the LCD backlight directly, but unless the OS informs the firmware it is going to handle the display toggle hotkey events, the firmware still tries change the brightness through the EC, to no effect. The end result is a long list (at Endless we counted 11) of Asus laptop models where the display toggle hotkey does not perform any action. Our firmware engineers contacts at Asus were surprised that there were still machines out there with the old behavior. Calling WMNB(ASUS_WMI_DEVID_BACKLIGHT==0x00050011, 2) on the _WDG device tells the firmware that it should let the OS handle the display toggle event, in which case it will simply notify the OS of a key press with 0x35, as shown by the DSDT excerpts bellow. Scope (_SB) { (...) Device (ATKD) { (...) Name (_WDG, Buffer (0x28) { /* 0000 */ 0xD0, 0x5E, 0x84, 0x97, 0x6D, 0x4E, 0xDE, 0x11, /* 0008 */ 0x8A, 0x39, 0x08, 0x00, 0x20, 0x0C, 0x9A, 0x66, /* 0010 */ 0x4E, 0x42, 0x01, 0x02, 0x35, 0xBB, 0x3C, 0x0B, /* 0018 */ 0xC2, 0xE3, 0xED, 0x45, 0x91, 0xC2, 0x4C, 0x5A, /* 0020 */ 0x6D, 0x19, 0x5D, 0x1C, 0xFF, 0x00, 0x01, 0x08 }) Method (WMNB, 3, Serialized) { CreateDWordField (Arg2, Zero, IIA0) CreateDWordField (Arg2, 0x04, IIA1) Local0 = (Arg1 & 0xFFFFFFFF) (...) If ((Local0 == 0x53564544)) { (...) If ((IIA0 == 0x00050011)) { If ((IIA1 == 0x02)) { ^^PCI0.SBRG.EC0.SPIN (0x72, One) ^^PCI0.SBRG.EC0.BLCT = One } Return (One) } } (...) } (...) } (...) } (...) Scope (_SB.PCI0.SBRG.EC0) { (...) Name (BLCT, Zero) (...) Method (_Q10, 0, NotSerialized) // _Qxx: EC Query { If ((BLCT == Zero)) { Local0 = One Local0 = RPIN (0x72) Local0 ^= One SPIN (0x72, Local0) If (ATKP) { Local0 = (0x34 - Local0) ^^^^ATKD.IANE (Local0) } } ElseIf ((BLCT == One)) { If (ATKP) { ^^^^ATKD.IANE (0x35) } } } (...) } Signed-off-by: João Paulo Rechi Vita Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/platform/x86/asus-wmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 7440f650e81a..3f662cd774d7 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -2147,7 +2147,8 @@ static int asus_wmi_add(struct platform_device *pdev) err = asus_wmi_backlight_init(asus); if (err && err != -ENODEV) goto fail_backlight; - } + } else + err = asus_wmi_set_devstate(ASUS_WMI_DEVID_BACKLIGHT, 2, NULL); status = wmi_install_notify_handler(asus->driver->event_guid, asus_wmi_notify, asus); From 18947188046cf1dbe85d4e4f27edb55a025b8a92 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Tue, 23 Oct 2018 14:37:39 +0200 Subject: [PATCH 1483/2608] e1000e: allow non-monotonic SYSTIM readings [ Upstream commit e1f65b0d70e9e5c80e15105cd96fa00174d7c436 ] It seems with some NICs supported by the e1000e driver a SYSTIM reading may occasionally be few microseconds before the previous reading and if enabled also pass e1000e_sanitize_systim() without reaching the maximum number of rereads, even if the function is modified to check three consecutive readings (i.e. it doesn't look like a double read error). This causes an underflow in the timecounter and the PHC time jumps hours ahead. This was observed on 82574, I217 and I219. The fastest way to reproduce it is to run a program that continuously calls the PTP_SYS_OFFSET ioctl on the PHC. Modify e1000e_phc_gettime() to use timecounter_cyc2time() instead of timecounter_read() in order to allow non-monotonic SYSTIM readings and prevent the PHC from jumping. Cc: Richard Cochran Signed-off-by: Miroslav Lichvar Acked-by: Jacob Keller Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/e1000e/ptp.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index b366885487a8..cd16b70a4e70 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -191,10 +191,14 @@ static int e1000e_phc_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter, ptp_clock_info); unsigned long flags; - u64 ns; + u64 cycles, ns; spin_lock_irqsave(&adapter->systim_lock, flags); - ns = timecounter_read(&adapter->tc); + + /* Use timecounter_cyc2time() to allow non-monotonic SYSTIM readings */ + cycles = adapter->cc.read(&adapter->cc); + ns = timecounter_cyc2time(&adapter->tc, cycles); + spin_unlock_irqrestore(&adapter->systim_lock, flags); *ts = ns_to_timespec64(ns); @@ -250,9 +254,12 @@ static void e1000e_systim_overflow_work(struct work_struct *work) systim_overflow_work.work); struct e1000_hw *hw = &adapter->hw; struct timespec64 ts; + u64 ns; - adapter->ptp_clock_info.gettime64(&adapter->ptp_clock_info, &ts); + /* Update the timecounter */ + ns = timecounter_read(&adapter->tc); + ts = ns_to_timespec64(ns); e_dbg("SYSTIM overflow check at %lld.%09lu\n", (long long) ts.tv_sec, ts.tv_nsec); From b7bf54a9cd61766bfc54d5c22d191ff2160f5ea9 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 30 Oct 2018 12:35:45 +0100 Subject: [PATCH 1484/2608] writeback: don't decrement wb->refcnt if !wb->bdi [ Upstream commit 347a28b586802d09604a149c1a1f6de5dccbe6fa ] This happened while running in qemu-system-aarch64, the AMBA PL011 UART driver when enabling CONFIG_DEBUG_TEST_DRIVER_REMOVE. arch_initcall(pl011_init) came before subsys_initcall(default_bdi_init), devtmpfs' handle_remove() crashes because the reference count is a NULL pointer only because wb->bdi hasn't been initialized yet. Rework so that wb_put have an extra check if wb->bdi before decrement wb->refcnt and also add a WARN_ON_ONCE to get a warning if it happens again in other drivers. Fixes: 52ebea749aae ("writeback: make backing_dev_info host cgroup-specific bdi_writebacks") Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Signed-off-by: Anders Roxell Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- include/linux/backing-dev-defs.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 3c1beffc861a..19240379637f 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -233,6 +233,14 @@ static inline void wb_get(struct bdi_writeback *wb) */ static inline void wb_put(struct bdi_writeback *wb) { + if (WARN_ON_ONCE(!wb->bdi)) { + /* + * A driver bug might cause a file to be removed before bdi was + * initialized. + */ + return; + } + if (wb != &wb->bdi->wb) percpu_ref_put(&wb->refcnt); } From 3811d33a7638fff675d1985971133ddf9a792e4a Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 30 Oct 2018 12:35:44 +0100 Subject: [PATCH 1485/2608] serial: set suppress_bind_attrs flag only if builtin [ Upstream commit 646097940ad35aa2c1f2012af932d55976a9f255 ] When the test 'CONFIG_DEBUG_TEST_DRIVER_REMOVE=y' is enabled, arch_initcall(pl011_init) came before subsys_initcall(default_bdi_init). devtmpfs gets killed because we try to remove a file and decrement the wb reference count before the noop_backing_device_info gets initialized. [ 0.332075] Serial: AMBA PL011 UART driver [ 0.485276] 9000000.pl011: ttyAMA0 at MMIO 0x9000000 (irq = 39, base_baud = 0) is a PL011 rev1 [ 0.502382] console [ttyAMA0] enabled [ 0.515710] Unable to handle kernel paging request at virtual address 0000800074c12000 [ 0.516053] Mem abort info: [ 0.516222] ESR = 0x96000004 [ 0.516417] Exception class = DABT (current EL), IL = 32 bits [ 0.516641] SET = 0, FnV = 0 [ 0.516826] EA = 0, S1PTW = 0 [ 0.516984] Data abort info: [ 0.517149] ISV = 0, ISS = 0x00000004 [ 0.517339] CM = 0, WnR = 0 [ 0.517553] [0000800074c12000] user address but active_mm is swapper [ 0.517928] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 0.518305] Modules linked in: [ 0.518839] CPU: 0 PID: 13 Comm: kdevtmpfs Not tainted 4.19.0-rc5-next-20180928-00002-g2ba39ab0cd01-dirty #82 [ 0.519307] Hardware name: linux,dummy-virt (DT) [ 0.519681] pstate: 80000005 (Nzcv daif -PAN -UAO) [ 0.519959] pc : __destroy_inode+0x94/0x2a8 [ 0.520212] lr : __destroy_inode+0x78/0x2a8 [ 0.520401] sp : ffff0000098c3b20 [ 0.520590] x29: ffff0000098c3b20 x28: 00000000087a3714 [ 0.520904] x27: 0000000000002000 x26: 0000000000002000 [ 0.521179] x25: ffff000009583000 x24: 0000000000000000 [ 0.521467] x23: ffff80007bb52000 x22: ffff80007bbaa7c0 [ 0.521737] x21: ffff0000093f9338 x20: 0000000000000000 [ 0.522033] x19: ffff80007bbb05d8 x18: 0000000000000400 [ 0.522376] x17: 0000000000000000 x16: 0000000000000000 [ 0.522727] x15: 0000000000000400 x14: 0000000000000400 [ 0.523068] x13: 0000000000000001 x12: 0000000000000001 [ 0.523421] x11: 0000000000000000 x10: 0000000000000970 [ 0.523749] x9 : ffff0000098c3a60 x8 : ffff80007bbab190 [ 0.524017] x7 : ffff80007bbaa880 x6 : 0000000000000c88 [ 0.524305] x5 : ffff0000093d96c8 x4 : 61c8864680b583eb [ 0.524567] x3 : ffff0000093d6180 x2 : ffffffffffffffff [ 0.524872] x1 : 0000800074c12000 x0 : 0000800074c12000 [ 0.525207] Process kdevtmpfs (pid: 13, stack limit = 0x(____ptrval____)) [ 0.525529] Call trace: [ 0.525806] __destroy_inode+0x94/0x2a8 [ 0.526108] destroy_inode+0x34/0x88 [ 0.526370] evict+0x144/0x1c8 [ 0.526636] iput+0x184/0x230 [ 0.526871] dentry_unlink_inode+0x118/0x130 [ 0.527152] d_delete+0xd8/0xe0 [ 0.527420] vfs_unlink+0x240/0x270 [ 0.527665] handle_remove+0x1d8/0x330 [ 0.527875] devtmpfsd+0x138/0x1c8 [ 0.528085] kthread+0x14c/0x158 [ 0.528291] ret_from_fork+0x10/0x18 [ 0.528720] Code: 92800002 aa1403e0 d538d081 8b010000 (c85f7c04) [ 0.529367] ---[ end trace 5a3dee47727f877c ]--- Rework to set suppress_bind_attrs flag to avoid removing the device when CONFIG_DEBUG_TEST_DRIVER_REMOVE=y. This applies for pic32_uart and xilinx_uartps as well. Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Signed-off-by: Anders Roxell Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/amba-pl011.c | 2 ++ drivers/tty/serial/pic32_uart.c | 1 + drivers/tty/serial/xilinx_uartps.c | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index c9f701aca677..4a4a9f33715c 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -2800,6 +2800,7 @@ static struct platform_driver arm_sbsa_uart_platform_driver = { .name = "sbsa-uart", .of_match_table = of_match_ptr(sbsa_uart_of_match), .acpi_match_table = ACPI_PTR(sbsa_uart_acpi_match), + .suppress_bind_attrs = IS_BUILTIN(CONFIG_SERIAL_AMBA_PL011), }, }; @@ -2828,6 +2829,7 @@ static struct amba_driver pl011_driver = { .drv = { .name = "uart-pl011", .pm = &pl011_dev_pm_ops, + .suppress_bind_attrs = IS_BUILTIN(CONFIG_SERIAL_AMBA_PL011), }, .id_table = pl011_ids, .probe = pl011_probe, diff --git a/drivers/tty/serial/pic32_uart.c b/drivers/tty/serial/pic32_uart.c index 00a33eb859d3..e3d7d9d6c599 100644 --- a/drivers/tty/serial/pic32_uart.c +++ b/drivers/tty/serial/pic32_uart.c @@ -920,6 +920,7 @@ static struct platform_driver pic32_uart_platform_driver = { .driver = { .name = PIC32_DEV_NAME, .of_match_table = of_match_ptr(pic32_serial_dt_ids), + .suppress_bind_attrs = IS_BUILTIN(CONFIG_SERIAL_PIC32), }, }; diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index 897b1c515d00..217686cb4cd3 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -1644,6 +1644,7 @@ static struct platform_driver cdns_uart_platform_driver = { .name = CDNS_UART_NAME, .of_match_table = cdns_uart_of_match, .pm = &cdns_uart_dev_pm_ops, + .suppress_bind_attrs = IS_BUILTIN(CONFIG_SERIAL_XILINX_PS_UART), }, }; From a72a4f82f90a5d32d8fd6b00776d2c7d756cbb02 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 13 Nov 2018 12:01:30 +0900 Subject: [PATCH 1486/2608] ALSA: oxfw: add support for APOGEE duet FireWire [ Upstream commit fba43f454cdf9caa3185219d116bd2a6e6354552 ] This commit adds support for APOGEE duet FireWire, launched 2007, already discontinued. This model uses Oxford Semiconductor FW971 as its communication engine. Below is information on Configuration ROM of this unit. The unit supports some AV/C commands defined by Audio subunit specification and vendor dependent commands. $ ./hinawa-config-rom-printer /dev/fw1 { 'bus-info': { 'adj': False, 'bmc': False, 'chip_ID': 42949742248, 'cmc': False, 'cyc_clk_acc': 255, 'generation': 0, 'imc': False, 'isc': True, 'link_spd': 3, 'max_ROM': 0, 'max_rec': 64, 'name': '1394', 'node_vendor_ID': 987, 'pmc': False}, 'root-directory': [ ['VENDOR', 987], ['DESCRIPTOR', 'Apogee Electronics'], ['MODEL', 122333], ['DESCRIPTOR', 'Duet'], [ 'NODE_CAPABILITIES', { 'addressing': {'64': True, 'fix': True, 'prv': False}, 'misc': {'int': False, 'ms': False, 'spt': True}, 'state': { 'atn': False, 'ded': False, 'drq': True, 'elo': False, 'init': False, 'lst': True, 'off': False}, 'testing': {'bas': False, 'ext': False}}], [ 'UNIT', [ ['SPECIFIER_ID', 41005], ['VERSION', 65537], ['MODEL', 122333], ['DESCRIPTOR', 'Duet']]]]} Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/firewire/Kconfig | 1 + sound/firewire/oxfw/oxfw.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/sound/firewire/Kconfig b/sound/firewire/Kconfig index 529d9f405fa9..0cb65d0864cc 100644 --- a/sound/firewire/Kconfig +++ b/sound/firewire/Kconfig @@ -41,6 +41,7 @@ config SND_OXFW * Mackie(Loud) U.420/U.420d * TASCAM FireOne * Stanton Controllers & Systems 1 Deck/Mixer + * APOGEE duet FireWire To compile this driver as a module, choose M here: the module will be called snd-oxfw. diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c index a315d5b6b86b..1554dc98e092 100644 --- a/sound/firewire/oxfw/oxfw.c +++ b/sound/firewire/oxfw/oxfw.c @@ -20,6 +20,7 @@ #define VENDOR_LACIE 0x00d04b #define VENDOR_TASCAM 0x00022e #define OUI_STANTON 0x001260 +#define OUI_APOGEE 0x0003db #define MODEL_SATELLITE 0x00200f @@ -442,6 +443,13 @@ static const struct ieee1394_device_id oxfw_id_table[] = { .vendor_id = OUI_STANTON, .model_id = 0x002000, }, + // APOGEE, duet FireWire + { + .match_flags = IEEE1394_MATCH_VENDOR_ID | + IEEE1394_MATCH_MODEL_ID, + .vendor_id = OUI_APOGEE, + .model_id = 0x01dddd, + }, { } }; MODULE_DEVICE_TABLE(ieee1394, oxfw_id_table); From ff3c3ca3b8f5352ae7f8e037e583202bd7438278 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 9 Nov 2018 23:13:13 +0100 Subject: [PATCH 1487/2608] x86/mce: Fix -Wmissing-prototypes warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 68b5e4326e4b8ac9080835005d8254fed0fb3c56 ] Add the proper includes and make smca_get_name() static. Fix an actual bug too which the warning triggered: arch/x86/kernel/cpu/mcheck/therm_throt.c:395:39: error: conflicting \ types for ‘smp_thermal_interrupt’ asmlinkage __visible void __irq_entry smp_thermal_interrupt(struct pt_regs *r) ^~~~~~~~~~~~~~~~~~~~~ In file included from arch/x86/kernel/cpu/mcheck/therm_throt.c:29: ./arch/x86/include/asm/traps.h:107:17: note: previous declaration of \ ‘smp_thermal_interrupt’ was here asmlinkage void smp_thermal_interrupt(void); Signed-off-by: Borislav Petkov Cc: Yi Wang Cc: Michael Matz Cc: x86@kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1811081633160.1549@nanos.tec.linutronix.de Signed-off-by: Sasha Levin --- arch/x86/include/asm/traps.h | 6 +++--- arch/x86/kernel/cpu/mcheck/mce_amd.c | 5 +++-- arch/x86/kernel/cpu/mcheck/therm_throt.c | 3 ++- arch/x86/kernel/cpu/mcheck/threshold.c | 3 ++- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 3de69330e6c5..afbc87206886 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -104,9 +104,9 @@ extern int panic_on_unrecovered_nmi; void math_emulate(struct math_emu_info *); #ifndef CONFIG_X86_32 -asmlinkage void smp_thermal_interrupt(void); -asmlinkage void smp_threshold_interrupt(void); -asmlinkage void smp_deferred_error_interrupt(void); +asmlinkage void smp_thermal_interrupt(struct pt_regs *regs); +asmlinkage void smp_threshold_interrupt(struct pt_regs *regs); +asmlinkage void smp_deferred_error_interrupt(struct pt_regs *regs); #endif extern void ist_enter(struct pt_regs *regs); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index beec0daecbc5..4fa97a44e73f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -99,7 +100,7 @@ static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init = [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 } }; -const char *smca_get_name(enum smca_bank_types t) +static const char *smca_get_name(enum smca_bank_types t) { if (t >= N_SMCA_BANK_TYPES) return NULL; @@ -823,7 +824,7 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) mce_log(&m); } -asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void) +asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(struct pt_regs *regs) { entering_irq(); trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR); diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 2da67b70ba98..ee229ceee745 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -390,7 +391,7 @@ static void unexpected_thermal_interrupt(void) static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; -asmlinkage __visible void __irq_entry smp_thermal_interrupt(struct pt_regs *r) +asmlinkage __visible void __irq_entry smp_thermal_interrupt(struct pt_regs *regs) { entering_irq(); trace_thermal_apic_entry(THERMAL_APIC_VECTOR); diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index 2b584b319eff..c21e0a1efd0f 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -18,7 +19,7 @@ static void default_threshold_interrupt(void) void (*mce_threshold_vector)(void) = default_threshold_interrupt; -asmlinkage __visible void __irq_entry smp_threshold_interrupt(void) +asmlinkage __visible void __irq_entry smp_threshold_interrupt(struct pt_regs *regs) { entering_irq(); trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR); From c85acbf72786a5901a2170b2145761ed7cf06429 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 13 Nov 2018 22:42:44 +0000 Subject: [PATCH 1488/2608] MIPS: SiByte: Enable swiotlb for SWARM, LittleSur and BigSur [ Upstream commit e4849aff1e169b86c561738daf8ff020e9de1011 ] The Broadcom SiByte BCM1250, BCM1125, and BCM1125H SOCs have an onchip DRAM controller that supports memory amounts of up to 16GiB, and due to how the address decoder has been wired in the SOC any memory beyond 1GiB is actually mapped starting from 4GiB physical up, that is beyond the 32-bit addressable limit[1]. Consequently if the maximum amount of memory has been installed, then it will span up to 19GiB. Many of the evaluation boards we support that are based on one of these SOCs have their memory soldered and the amount present fits in the 32-bit address range. The BCM91250A SWARM board however has actual DIMM slots and accepts, depending on the peripherals revision of the SOC, up to 4GiB or 8GiB of memory in commercially available JEDEC modules[2]. I believe this is also the case with the BCM91250C2 LittleSur board. This means that up to either 3GiB or 7GiB of memory requires 64-bit addressing to access. I believe the BCM91480B BigSur board, which has the BCM1480 SOC instead, accepts at least as much memory, although I have no documentation or actual hardware available to verify that. Both systems have PCI slots installed for use by any PCI option boards, including ones that only support 32-bit addressing (additionally the 32-bit PCI host bridge of the BCM1250, BCM1125, and BCM1125H SOCs limits addressing to 32-bits), and there is no IOMMU available. Therefore for PCI DMA to work in the presence of memory beyond enable swiotlb for the affected systems. All the other SOC onchip DMA devices use 40-bit addressing and therefore can address the whole memory, so only enable swiotlb if PCI support and support for DMA beyond 4GiB have been both enabled in the configuration of the kernel. This shows up as follows: Broadcom SiByte BCM1250 B2 @ 800 MHz (SB1 rev 2) Board type: SiByte BCM91250A (SWARM) Determined physical RAM map: memory: 000000000fe7fe00 @ 0000000000000000 (usable) memory: 000000001ffffe00 @ 0000000080000000 (usable) memory: 000000000ffffe00 @ 00000000c0000000 (usable) memory: 0000000087fffe00 @ 0000000100000000 (usable) software IO TLB: mapped [mem 0xcbffc000-0xcfffc000] (64MB) in the bootstrap log and removes failures like these: defxx 0000:02:00.0: dma_direct_map_page: overflow 0x0000000185bc6080+4608 of device mask ffffffff bus mask 0 fddi0: Receive buffer allocation failed fddi0: Adapter open failed! IP-Config: Failed to open fddi0 defxx 0000:09:08.0: dma_direct_map_page: overflow 0x0000000185bc6080+4608 of device mask ffffffff bus mask 0 fddi1: Receive buffer allocation failed fddi1: Adapter open failed! IP-Config: Failed to open fddi1 when memory beyond 4GiB is handed out to devices that can only do 32-bit addressing. This updates commit cce335ae47e2 ("[MIPS] 64-bit Sibyte kernels need DMA32."). References: [1] "BCM1250/BCM1125/BCM1125H User Manual", Revision 1250_1125-UM100-R, Broadcom Corporation, 21 Oct 2002, Section 3: "System Overview", "Memory Map", pp. 34-38 [2] "BCM91250A User Manual", Revision 91250A-UM100-R, Broadcom Corporation, 18 May 2004, Section 3: "Physical Description", "Supported DRAM", p. 23 Signed-off-by: Maciej W. Rozycki [paul.burton@mips.com: Remove GPL text from dma.c; SPDX tag covers it] Signed-off-by: Paul Burton Reviewed-by: Christoph Hellwig Patchwork: https://patchwork.linux-mips.org/patch/21108/ References: cce335ae47e2 ("[MIPS] 64-bit Sibyte kernels need DMA32.") Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/Kconfig | 3 +++ arch/mips/sibyte/common/Makefile | 1 + arch/mips/sibyte/common/dma.c | 14 ++++++++++++++ 3 files changed, 18 insertions(+) create mode 100644 arch/mips/sibyte/common/dma.c diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index ae4450e891ab..8d4470f44b74 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -800,6 +800,7 @@ config SIBYTE_SWARM select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_LITTLE_ENDIAN select ZONE_DMA32 if 64BIT + select SWIOTLB if ARCH_DMA_ADDR_T_64BIT && PCI config SIBYTE_LITTLESUR bool "Sibyte BCM91250C2-LittleSur" @@ -822,6 +823,7 @@ config SIBYTE_SENTOSA select SYS_HAS_CPU_SB1 select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_LITTLE_ENDIAN + select SWIOTLB if ARCH_DMA_ADDR_T_64BIT && PCI config SIBYTE_BIGSUR bool "Sibyte BCM91480B-BigSur" @@ -835,6 +837,7 @@ config SIBYTE_BIGSUR select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_LITTLE_ENDIAN select ZONE_DMA32 if 64BIT + select SWIOTLB if ARCH_DMA_ADDR_T_64BIT && PCI config SNI_RM bool "SNI RM200/300/400" diff --git a/arch/mips/sibyte/common/Makefile b/arch/mips/sibyte/common/Makefile index b3d6bf23a662..3ef3fb658136 100644 --- a/arch/mips/sibyte/common/Makefile +++ b/arch/mips/sibyte/common/Makefile @@ -1,4 +1,5 @@ obj-y := cfe.o +obj-$(CONFIG_SWIOTLB) += dma.o obj-$(CONFIG_SIBYTE_BUS_WATCHER) += bus_watcher.o obj-$(CONFIG_SIBYTE_CFE_CONSOLE) += cfe_console.o obj-$(CONFIG_SIBYTE_TBPROF) += sb_tbprof.o diff --git a/arch/mips/sibyte/common/dma.c b/arch/mips/sibyte/common/dma.c new file mode 100644 index 000000000000..eb47a94f3583 --- /dev/null +++ b/arch/mips/sibyte/common/dma.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * DMA support for Broadcom SiByte platforms. + * + * Copyright (c) 2018 Maciej W. Rozycki + */ + +#include +#include + +void __init plat_swiotlb_setup(void) +{ + swiotlb_init(1); +} From 85a3e68205d5b7b4c2dce7369964c2721d026d39 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 17 Oct 2018 17:26:22 +0200 Subject: [PATCH 1489/2608] arm64: perf: set suppress_bind_attrs flag to true [ Upstream commit 81e9fa8bab381f8b6eb04df7cdf0f71994099bd4 ] The armv8_pmuv3 driver doesn't have a remove function, and when the test 'CONFIG_DEBUG_TEST_DRIVER_REMOVE=y' is enabled, the following Call trace can be seen. [ 1.424287] Failed to register pmu: armv8_pmuv3, reason -17 [ 1.424870] WARNING: CPU: 0 PID: 1 at ../kernel/events/core.c:11771 perf_event_sysfs_init+0x98/0xdc [ 1.425220] Modules linked in: [ 1.425531] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W 4.19.0-rc7-next-20181012-00003-ge7a97b1ad77b-dirty #35 [ 1.425951] Hardware name: linux,dummy-virt (DT) [ 1.426212] pstate: 80000005 (Nzcv daif -PAN -UAO) [ 1.426458] pc : perf_event_sysfs_init+0x98/0xdc [ 1.426720] lr : perf_event_sysfs_init+0x98/0xdc [ 1.426908] sp : ffff00000804bd50 [ 1.427077] x29: ffff00000804bd50 x28: ffff00000934e078 [ 1.427429] x27: ffff000009546000 x26: 0000000000000007 [ 1.427757] x25: ffff000009280710 x24: 00000000ffffffef [ 1.428086] x23: ffff000009408000 x22: 0000000000000000 [ 1.428415] x21: ffff000009136008 x20: ffff000009408730 [ 1.428744] x19: ffff80007b20b400 x18: 000000000000000a [ 1.429075] x17: 0000000000000000 x16: 0000000000000000 [ 1.429418] x15: 0000000000000400 x14: 2e79726f74636572 [ 1.429748] x13: 696420656d617320 x12: 656874206e692065 [ 1.430060] x11: 6d616e20656d6173 x10: 2065687420687469 [ 1.430335] x9 : ffff00000804bd50 x8 : 206e6f7361657220 [ 1.430610] x7 : 2c3376756d705f38 x6 : ffff00000954d7ce [ 1.430880] x5 : 0000000000000000 x4 : 0000000000000000 [ 1.431226] x3 : 0000000000000000 x2 : ffffffffffffffff [ 1.431554] x1 : 4d151327adc50b00 x0 : 0000000000000000 [ 1.431868] Call trace: [ 1.432102] perf_event_sysfs_init+0x98/0xdc [ 1.432382] do_one_initcall+0x6c/0x1a8 [ 1.432637] kernel_init_freeable+0x1bc/0x280 [ 1.432905] kernel_init+0x18/0x160 [ 1.433115] ret_from_fork+0x10/0x18 [ 1.433297] ---[ end trace 27fd415390eb9883 ]--- Rework to set suppress_bind_attrs flag to avoid removing the device when CONFIG_DEBUG_TEST_DRIVER_REMOVE=y, since there's no real reason to remove the armv8_pmuv3 driver. Cc: Arnd Bergmann Co-developed-by: Arnd Bergmann Signed-off-by: Anders Roxell Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/perf_event.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 86249a24592d..05fdae70e9f6 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1130,6 +1130,7 @@ static struct platform_driver armv8_pmu_driver = { .driver = { .name = ARMV8_PMU_PDEV_NAME, .of_match_table = armv8_pmu_of_device_ids, + .suppress_bind_attrs = true, }, .probe = armv8_pmu_device_probe, }; From 8f472434b860321ffecdda32e30b429ff9e3892c Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 9 Nov 2018 20:44:36 +0900 Subject: [PATCH 1490/2608] usb: gadget: udc: renesas_usb3: add a safety connection way for forced_b_device [ Upstream commit ceb94bc52c437463f0903e61060a94a2226fb672 ] This patch adds a safety connection way for "forced_b_device" with "workaround_for_vbus" like below: < Example for R-Car E3 Ebisu > # modprobe # echo 1 > /sys/kernel/debug/ee020000.usb/b_device (connect a usb cable to host side.) # echo 2 > /sys/kernel/debug/ee020000.usb/b_device Previous code should have connected a usb cable before the "b_device" is set to 1 on the Ebisu board. However, if xHCI driver on the board is probed, it causes some troubles: - Conflicts USB VBUS/signals between the board and another host. - "Cannot enable. Maybe the USB cable is bad?" might happen on both the board and another host with a usb hub. - Cannot enumerate a usb gadget correctly because an interruption of VBUS change happens unexpectedly. Reported-by: Kazuya Mizuguchi Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/renesas_usb3.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index ade0723787e5..e5355ede2c46 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -352,6 +352,7 @@ struct renesas_usb3 { bool extcon_host; /* check id and set EXTCON_USB_HOST */ bool extcon_usb; /* check vbus and set EXTCON_USB */ bool forced_b_device; + bool start_to_connect; }; #define gadget_to_renesas_usb3(_gadget) \ @@ -470,7 +471,8 @@ static void usb3_init_axi_bridge(struct renesas_usb3 *usb3) static void usb3_init_epc_registers(struct renesas_usb3 *usb3) { usb3_write(usb3, ~0, USB3_USB_INT_STA_1); - usb3_enable_irq_1(usb3, USB_INT_1_VBUS_CNG); + if (!usb3->workaround_for_vbus) + usb3_enable_irq_1(usb3, USB_INT_1_VBUS_CNG); } static bool usb3_wakeup_usb2_phy(struct renesas_usb3 *usb3) @@ -676,8 +678,7 @@ static void usb3_mode_config(struct renesas_usb3 *usb3, bool host, bool a_dev) usb3_set_mode(usb3, host); usb3_vbus_out(usb3, a_dev); /* for A-Peripheral or forced B-device mode */ - if ((!host && a_dev) || - (usb3->workaround_for_vbus && usb3->forced_b_device)) + if ((!host && a_dev) || usb3->start_to_connect) usb3_connect(usb3); spin_unlock_irqrestore(&usb3->lock, flags); } @@ -2369,7 +2370,11 @@ static ssize_t renesas_usb3_b_device_write(struct file *file, if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count))) return -EFAULT; - if (!strncmp(buf, "1", 1)) + usb3->start_to_connect = false; + if (usb3->workaround_for_vbus && usb3->forced_b_device && + !strncmp(buf, "2", 1)) + usb3->start_to_connect = true; + else if (!strncmp(buf, "1", 1)) usb3->forced_b_device = true; else usb3->forced_b_device = false; @@ -2377,7 +2382,7 @@ static ssize_t renesas_usb3_b_device_write(struct file *file, if (usb3->workaround_for_vbus) usb3_disconnect(usb3); - /* Let this driver call usb3_connect() anyway */ + /* Let this driver call usb3_connect() if needed */ usb3_check_id(usb3); return count; From fbbfb5c6bf28e8058df01b3c51b5f4eca9af1046 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 16 Nov 2018 14:12:02 +0100 Subject: [PATCH 1491/2608] selinux: always allow mounting submounts [ Upstream commit 2cbdcb882f97a45f7475c67ac6257bbc16277dfe ] If a superblock has the MS_SUBMOUNT flag set, we should always allow mounting it. These mounts are done automatically by the kernel either as part of mounting some parent mount (e.g. debugfs always mounts tracefs under "tracing" for compatibility) or they are mounted automatically as needed on subdirectory accesses (e.g. NFS crossmnt mounts). Since such automounts are either an implicit consequence of the parent mount (which is already checked) or they can happen during regular accesses (where it doesn't make sense to check against the current task's context), the mount permission check should be skipped for them. Without this patch, attempts to access contents of an automounted directory can cause unexpected SELinux denials. In the current kernel tree, the MS_SUBMOUNT flag is set only via vfs_submount(), which is called only from the following places: - AFS, when automounting special "symlinks" referencing other cells - CIFS, when automounting "referrals" - NFS, when automounting subtrees - debugfs, when automounting tracefs In all cases the submounts are meant to be transparent to the user and it makes sense that if mounting the master is allowed, then so should be the automounts. Note that CAP_SYS_ADMIN capability checking is already skipped for (SB_KERNMOUNT|SB_SUBMOUNT) in: - sget_userns() in fs/super.c: if (!(flags & (SB_KERNMOUNT|SB_SUBMOUNT)) && !(type->fs_flags & FS_USERNS_MOUNT) && !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); - sget() in fs/super.c: /* Ensure the requestor has permissions over the target filesystem */ if (!(flags & (SB_KERNMOUNT|SB_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); Verified internally on patched RHEL 7.6 with a reproducer using NFS+httpd and selinux-tesuite. Fixes: 93faccbbfa95 ("fs: Better permission checking for submounts") Signed-off-by: Ondrej Mosnacek Signed-off-by: Paul Moore Signed-off-by: Sasha Levin --- security/selinux/hooks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f5d304736852..d6b9ed34ceae 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2820,7 +2820,7 @@ static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data) return rc; /* Allow all mounts performed by the kernel */ - if (flags & MS_KERNMOUNT) + if (flags & (MS_KERNMOUNT | MS_SUBMOUNT)) return 0; ad.type = LSM_AUDIT_DATA_DENTRY; From ce91ad1a6f6f1f5b0901c21420cb5b0938909c32 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 25 Nov 2018 17:13:08 -0500 Subject: [PATCH 1492/2608] rxe: IB_WR_REG_MR does not capture MR's iova field [ Upstream commit b024dd0eba6e6d568f69d63c5e3153aba94c23e3 ] FRWR memory registration is done with a series of calls and WRs. 1. ULP invokes ib_dma_map_sg() 2. ULP invokes ib_map_mr_sg() 3. ULP posts an IB_WR_REG_MR on the Send queue Step 2 generates an iova. It is permissible for ULPs to change this iova (with certain restrictions) between steps 2 and 3. rxe_map_mr_sg captures the MR's iova but later when rxe processes the REG_MR WR, it ignores the MR's iova field. If a ULP alters the MR's iova after step 2 but before step 3, rxe never captures that change. When the remote sends an RDMA Read targeting that MR, rxe looks up the R_key, but the altered iova does not match the iova stored in the MR, causing the RDMA Read request to fail. Reported-by: Anna Schumaker Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_req.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index de853bcc2384..08ae4f3a6a37 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -640,6 +640,7 @@ next_wqe: rmr->access = wqe->wr.wr.reg.access; rmr->lkey = wqe->wr.wr.reg.key; rmr->rkey = wqe->wr.wr.reg.key; + rmr->iova = wqe->wr.wr.reg.mr->iova; wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; } else { From f63e5b19854654296ac3033c489db9542c02f36f Mon Sep 17 00:00:00 2001 From: Daniel Santos Date: Fri, 19 Oct 2018 03:30:20 -0500 Subject: [PATCH 1493/2608] jffs2: Fix use of uninitialized delayed_work, lockdep breakage [ Upstream commit a788c5272769ddbcdbab297cf386413eeac04463 ] jffs2_sync_fs makes the assumption that if CONFIG_JFFS2_FS_WRITEBUFFER is defined then a write buffer is available and has been initialized. However, this does is not the case when the mtd device has no out-of-band buffer: int jffs2_nand_flash_setup(struct jffs2_sb_info *c) { if (!c->mtd->oobsize) return 0; ... The resulting call to cancel_delayed_work_sync passing a uninitialized (but zeroed) delayed_work struct forces lockdep to become disabled. [ 90.050639] overlayfs: upper fs does not support tmpfile. [ 90.652264] INFO: trying to register non-static key. [ 90.662171] the code is fine but needs lockdep annotation. [ 90.673090] turning off the locking correctness validator. [ 90.684021] CPU: 0 PID: 1762 Comm: mount_root Not tainted 4.14.63 #0 [ 90.696672] Stack : 00000000 00000000 80d8f6a2 00000038 805f0000 80444600 8fe364f4 805dfbe7 [ 90.713349] 80563a30 000006e2 8068370c 00000001 00000000 00000001 8e2fdc48 ffffffff [ 90.730020] 00000000 00000000 80d90000 00000000 00000106 00000000 6465746e 312e3420 [ 90.746690] 6b636f6c 03bf0000 f8000000 20676e69 00000000 80000000 00000000 8e2c2a90 [ 90.763362] 80d90000 00000001 00000000 8e2c2a90 00000003 80260dc0 08052098 80680000 [ 90.780033] ... [ 90.784902] Call Trace: [ 90.789793] [<8000f0d8>] show_stack+0xb8/0x148 [ 90.798659] [<8005a000>] register_lock_class+0x270/0x55c [ 90.809247] [<8005cb64>] __lock_acquire+0x13c/0xf7c [ 90.818964] [<8005e314>] lock_acquire+0x194/0x1dc [ 90.828345] [<8003f27c>] flush_work+0x200/0x24c [ 90.837374] [<80041dfc>] __cancel_work_timer+0x158/0x210 [ 90.847958] [<801a8770>] jffs2_sync_fs+0x20/0x54 [ 90.857173] [<80125cf4>] iterate_supers+0xf4/0x120 [ 90.866729] [<80158fc4>] sys_sync+0x44/0x9c [ 90.875067] [<80014424>] syscall_common+0x34/0x58 Signed-off-by: Daniel Santos Reviewed-by: Hou Tao Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin --- fs/jffs2/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index bc00cc385b77..83340496645b 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -101,7 +101,8 @@ static int jffs2_sync_fs(struct super_block *sb, int wait) struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); #ifdef CONFIG_JFFS2_FS_WRITEBUFFER - cancel_delayed_work_sync(&c->wbuf_dwork); + if (jffs2_is_writebuffered(c)) + cancel_delayed_work_sync(&c->wbuf_dwork); #endif mutex_lock(&c->alloc_sem); From bf34ede3a11d58e3bcf7007bf14f6a94562bc8f1 Mon Sep 17 00:00:00 2001 From: "A.s. Dong" Date: Wed, 14 Nov 2018 13:02:00 +0000 Subject: [PATCH 1494/2608] clk: imx: make mux parent strings const [ Upstream commit 9e5ef7a57ca75a1b9411c46caeeb6881124284a3 ] As the commit 2893c379461a ("clk: make strings in parent name arrays const"), let's make the parent strings const, otherwise we may meet the following warning when compiling: drivers/clk/imx/clk-imx7ulp.c: In function 'imx7ulp_clocks_init': drivers/clk/imx/clk-imx7ulp.c:73:35: warning: passing argument 5 of 'imx_clk_mux_flags' discards 'const' qualifier from pointer target type clks[IMX7ULP_CLK_APLL_PRE_SEL] = imx_clk_mux_flags("apll_pre_sel", base + 0x508, 0, 1, pll_pre_sels, ARRAY_SIZE(pll_pre_sels), CLK_SET_PARENT_GATE); ^ In file included from drivers/clk/imx/clk-imx7ulp.c:23:0: drivers/clk/imx/clk.h:200:27: note: expected 'const char **' but argument is of type 'const char * const*' ... Cc: Stephen Boyd Cc: Michael Turquette Cc: Shawn Guo Signed-off-by: Dong Aisheng Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-busy.c | 2 +- drivers/clk/imx/clk-fixup-mux.c | 2 +- drivers/clk/imx/clk.h | 18 +++++++++++------- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/clk/imx/clk-busy.c b/drivers/clk/imx/clk-busy.c index 5cc99590f9a3..097625c5715c 100644 --- a/drivers/clk/imx/clk-busy.c +++ b/drivers/clk/imx/clk-busy.c @@ -154,7 +154,7 @@ static struct clk_ops clk_busy_mux_ops = { struct clk *imx_clk_busy_mux(const char *name, void __iomem *reg, u8 shift, u8 width, void __iomem *busy_reg, u8 busy_shift, - const char **parent_names, int num_parents) + const char * const *parent_names, int num_parents) { struct clk_busy_mux *busy; struct clk *clk; diff --git a/drivers/clk/imx/clk-fixup-mux.c b/drivers/clk/imx/clk-fixup-mux.c index c9b327e0a8dd..44817c1b0b88 100644 --- a/drivers/clk/imx/clk-fixup-mux.c +++ b/drivers/clk/imx/clk-fixup-mux.c @@ -70,7 +70,7 @@ static const struct clk_ops clk_fixup_mux_ops = { }; struct clk *imx_clk_fixup_mux(const char *name, void __iomem *reg, - u8 shift, u8 width, const char **parents, + u8 shift, u8 width, const char * const *parents, int num_parents, void (*fixup)(u32 *val)) { struct clk_fixup_mux *fixup_mux; diff --git a/drivers/clk/imx/clk.h b/drivers/clk/imx/clk.h index d69c4bbf3597..b03fbd502528 100644 --- a/drivers/clk/imx/clk.h +++ b/drivers/clk/imx/clk.h @@ -63,14 +63,14 @@ struct clk *imx_clk_busy_divider(const char *name, const char *parent_name, struct clk *imx_clk_busy_mux(const char *name, void __iomem *reg, u8 shift, u8 width, void __iomem *busy_reg, u8 busy_shift, - const char **parent_names, int num_parents); + const char * const *parent_names, int num_parents); struct clk *imx_clk_fixup_divider(const char *name, const char *parent, void __iomem *reg, u8 shift, u8 width, void (*fixup)(u32 *val)); struct clk *imx_clk_fixup_mux(const char *name, void __iomem *reg, - u8 shift, u8 width, const char **parents, + u8 shift, u8 width, const char * const *parents, int num_parents, void (*fixup)(u32 *val)); static inline struct clk *imx_clk_fixed(const char *name, int rate) @@ -79,7 +79,8 @@ static inline struct clk *imx_clk_fixed(const char *name, int rate) } static inline struct clk *imx_clk_mux_ldb(const char *name, void __iomem *reg, - u8 shift, u8 width, const char **parents, int num_parents) + u8 shift, u8 width, const char * const *parents, + int num_parents) { return clk_register_mux(NULL, name, parents, num_parents, CLK_SET_RATE_NO_REPARENT | CLK_SET_RATE_PARENT, reg, @@ -178,7 +179,8 @@ static inline struct clk *imx_clk_gate4(const char *name, const char *parent, } static inline struct clk *imx_clk_mux(const char *name, void __iomem *reg, - u8 shift, u8 width, const char **parents, int num_parents) + u8 shift, u8 width, const char * const *parents, + int num_parents) { return clk_register_mux(NULL, name, parents, num_parents, CLK_SET_RATE_NO_REPARENT, reg, shift, @@ -186,7 +188,8 @@ static inline struct clk *imx_clk_mux(const char *name, void __iomem *reg, } static inline struct clk *imx_clk_mux2(const char *name, void __iomem *reg, - u8 shift, u8 width, const char **parents, int num_parents) + u8 shift, u8 width, const char * const *parents, + int num_parents) { return clk_register_mux(NULL, name, parents, num_parents, CLK_SET_RATE_NO_REPARENT | CLK_OPS_PARENT_ENABLE, @@ -194,8 +197,9 @@ static inline struct clk *imx_clk_mux2(const char *name, void __iomem *reg, } static inline struct clk *imx_clk_mux_flags(const char *name, - void __iomem *reg, u8 shift, u8 width, const char **parents, - int num_parents, unsigned long flags) + void __iomem *reg, u8 shift, u8 width, + const char * const *parents, int num_parents, + unsigned long flags) { return clk_register_mux(NULL, name, parents, num_parents, flags | CLK_SET_RATE_NO_REPARENT, reg, shift, width, 0, From e1e3a46706bd4037e8b7407dc660ae6e05b8ac56 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sat, 3 Nov 2018 16:38:18 -0700 Subject: [PATCH 1495/2608] pstore/ram: Do not treat empty buffers as valid [ Upstream commit 30696378f68a9e3dad6bfe55938b112e72af00c2 ] The ramoops backend currently calls persistent_ram_save_old() even if a buffer is empty. While this appears to work, it is does not seem like the right thing to do and could lead to future bugs so lets avoid that. It also prevents misleading prints in the logs which claim the buffer is valid. I got something like: found existing buffer, size 0, start 0 When I was expecting: no valid data in buffer (sig = ...) This bails out early (and reports with pr_debug()), since it's an acceptable state. Signed-off-by: Joel Fernandes (Google) Co-developed-by: Kees Cook Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- fs/pstore/ram_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index ecdb3baa1283..11e558efd61e 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -488,6 +488,11 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, sig ^= PERSISTENT_RAM_SIG; if (prz->buffer->sig == sig) { + if (buffer_size(prz) == 0) { + pr_debug("found existing empty buffer\n"); + return 0; + } + if (buffer_size(prz) > prz->buffer_size || buffer_start(prz) > buffer_size(prz)) pr_info("found existing invalid buffer, size %zu, start %zu\n", From e37cb09e8eacaff118c6073b0cd436e3011ac237 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 8 Nov 2018 15:12:42 -0200 Subject: [PATCH 1496/2608] powerpc/xmon: Fix invocation inside lock region [ Upstream commit 8d4a862276a9c30a269d368d324fb56529e6d5fd ] Currently xmon needs to get devtree_lock (through rtas_token()) during its invocation (at crash time). If there is a crash while devtree_lock is being held, then xmon tries to get the lock but spins forever and never get into the interactive debugger, as in the following case: int *ptr = NULL; raw_spin_lock_irqsave(&devtree_lock, flags); *ptr = 0xdeadbeef; This patch avoids calling rtas_token(), thus trying to get the same lock, at crash time. This new mechanism proposes getting the token at initialization time (xmon_init()) and just consuming it at crash time. This would allow xmon to be possible invoked independent of devtree_lock being held or not. Signed-off-by: Breno Leitao Reviewed-by: Thiago Jung Bauermann Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/xmon/xmon.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index a5938fadd031..f752f771f29d 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -78,6 +78,9 @@ static int xmon_gate; #define xmon_owner 0 #endif /* CONFIG_SMP */ +#ifdef CONFIG_PPC_PSERIES +static int set_indicator_token = RTAS_UNKNOWN_SERVICE; +#endif static unsigned long in_xmon __read_mostly = 0; static int xmon_on = IS_ENABLED(CONFIG_XMON_DEFAULT); @@ -357,7 +360,6 @@ static inline void disable_surveillance(void) #ifdef CONFIG_PPC_PSERIES /* Since this can't be a module, args should end up below 4GB. */ static struct rtas_args args; - int token; /* * At this point we have got all the cpus we can into @@ -366,11 +368,11 @@ static inline void disable_surveillance(void) * If we did try to take rtas.lock there would be a * real possibility of deadlock. */ - token = rtas_token("set-indicator"); - if (token == RTAS_UNKNOWN_SERVICE) + if (set_indicator_token == RTAS_UNKNOWN_SERVICE) return; - rtas_call_unlocked(&args, token, 3, 1, NULL, SURVEILLANCE_TOKEN, 0, 0); + rtas_call_unlocked(&args, set_indicator_token, 3, 1, NULL, + SURVEILLANCE_TOKEN, 0, 0); #endif /* CONFIG_PPC_PSERIES */ } @@ -3472,6 +3474,14 @@ static void xmon_init(int enable) __debugger_iabr_match = xmon_iabr_match; __debugger_break_match = xmon_break_match; __debugger_fault_handler = xmon_fault_handler; + +#ifdef CONFIG_PPC_PSERIES + /* + * Get the token here to avoid trying to get a lock + * during the crash, causing a deadlock. + */ + set_indicator_token = rtas_token("set-indicator"); +#endif } else { __debugger = NULL; __debugger_ipi = NULL; From a692fe3bd16dc28dda5dc3b2774256fb040e84d7 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 23 Nov 2018 14:30:11 -0200 Subject: [PATCH 1497/2608] powerpc/pseries/cpuidle: Fix preempt warning [ Upstream commit 2b038cbc5fcf12a7ee1cc9bfd5da1e46dacdee87 ] When booting a pseries kernel with PREEMPT enabled, it dumps the following warning: BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 caller is pseries_processor_idle_init+0x5c/0x22c CPU: 13 PID: 1 Comm: swapper/0 Not tainted 4.20.0-rc3-00090-g12201a0128bc-dirty #828 Call Trace: [c000000429437ab0] [c0000000009c8878] dump_stack+0xec/0x164 (unreliable) [c000000429437b00] [c0000000005f2f24] check_preemption_disabled+0x154/0x160 [c000000429437b90] [c000000000cab8e8] pseries_processor_idle_init+0x5c/0x22c [c000000429437c10] [c000000000010ed4] do_one_initcall+0x64/0x300 [c000000429437ce0] [c000000000c54500] kernel_init_freeable+0x3f0/0x500 [c000000429437db0] [c0000000000112dc] kernel_init+0x2c/0x160 [c000000429437e20] [c00000000000c1d0] ret_from_kernel_thread+0x5c/0x6c This happens because the code calls get_lppaca() which calls get_paca() and it checks if preemption is disabled through check_preemption_disabled(). Preemption should be disabled because the per CPU variable may make no sense if there is a preemption (and a CPU switch) after it reads the per CPU data and when it is used. In this device driver specifically, it is not a problem, because this code just needs to have access to one lppaca struct, and it does not matter if it is the current per CPU lppaca struct or not (i.e. when there is a preemption and a CPU migration). That said, the most appropriate fix seems to be related to avoiding the debug_smp_processor_id() call at get_paca(), instead of calling preempt_disable() before get_paca(). Signed-off-by: Breno Leitao Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- drivers/cpuidle/cpuidle-pseries.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index a187a39fb866..7f21c6a57178 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -239,7 +239,13 @@ static int pseries_idle_probe(void) return -ENODEV; if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - if (lppaca_shared_proc(get_lppaca())) { + /* + * Use local_paca instead of get_lppaca() since + * preemption is not disabled, and it is not required in + * fact, since lppaca_ptr does not need to be the value + * associated to the current CPU, it can be from any CPU. + */ + if (lppaca_shared_proc(local_paca->lppaca_ptr)) { cpuidle_state_table = shared_states; max_idle_state = ARRAY_SIZE(shared_states); } else { From b851aa7ff1bc11b1e6a1f3d1d8de503f8bbcf81b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 18 Oct 2018 16:03:06 -0400 Subject: [PATCH 1498/2608] media: firewire: Fix app_info parameter type in avc_ca{,_app}_info [ Upstream commit b2e9a4eda11fd2cb1e6714e9ad3f455c402568ff ] Clang warns: drivers/media/firewire/firedtv-avc.c:999:45: warning: implicit conversion from 'int' to 'char' changes value from 159 to -97 [-Wconstant-conversion] app_info[0] = (EN50221_TAG_APP_INFO >> 16) & 0xff; ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~ drivers/media/firewire/firedtv-avc.c:1000:45: warning: implicit conversion from 'int' to 'char' changes value from 128 to -128 [-Wconstant-conversion] app_info[1] = (EN50221_TAG_APP_INFO >> 8) & 0xff; ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~ drivers/media/firewire/firedtv-avc.c:1040:44: warning: implicit conversion from 'int' to 'char' changes value from 159 to -97 [-Wconstant-conversion] app_info[0] = (EN50221_TAG_CA_INFO >> 16) & 0xff; ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~ drivers/media/firewire/firedtv-avc.c:1041:44: warning: implicit conversion from 'int' to 'char' changes value from 128 to -128 [-Wconstant-conversion] app_info[1] = (EN50221_TAG_CA_INFO >> 8) & 0xff; ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~ 4 warnings generated. Change app_info's type to unsigned char to match the type of the member msg in struct ca_msg, which is the only thing passed into the app_info parameter in this function. Link: https://github.com/ClangBuiltLinux/linux/issues/105 Signed-off-by: Nathan Chancellor Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/firewire/firedtv-avc.c | 6 ++++-- drivers/media/firewire/firedtv.h | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/media/firewire/firedtv-avc.c b/drivers/media/firewire/firedtv-avc.c index 5bde6c209cd7..b243e4a52f10 100644 --- a/drivers/media/firewire/firedtv-avc.c +++ b/drivers/media/firewire/firedtv-avc.c @@ -968,7 +968,8 @@ static int get_ca_object_length(struct avc_response_frame *r) return r->operand[7]; } -int avc_ca_app_info(struct firedtv *fdtv, char *app_info, unsigned int *len) +int avc_ca_app_info(struct firedtv *fdtv, unsigned char *app_info, + unsigned int *len) { struct avc_command_frame *c = (void *)fdtv->avc_data; struct avc_response_frame *r = (void *)fdtv->avc_data; @@ -1009,7 +1010,8 @@ out: return ret; } -int avc_ca_info(struct firedtv *fdtv, char *app_info, unsigned int *len) +int avc_ca_info(struct firedtv *fdtv, unsigned char *app_info, + unsigned int *len) { struct avc_command_frame *c = (void *)fdtv->avc_data; struct avc_response_frame *r = (void *)fdtv->avc_data; diff --git a/drivers/media/firewire/firedtv.h b/drivers/media/firewire/firedtv.h index 345d1eda8c05..5b18a08c6285 100644 --- a/drivers/media/firewire/firedtv.h +++ b/drivers/media/firewire/firedtv.h @@ -124,8 +124,10 @@ int avc_lnb_control(struct firedtv *fdtv, char voltage, char burst, struct dvb_diseqc_master_cmd *diseqcmd); void avc_remote_ctrl_work(struct work_struct *work); int avc_register_remote_control(struct firedtv *fdtv); -int avc_ca_app_info(struct firedtv *fdtv, char *app_info, unsigned int *len); -int avc_ca_info(struct firedtv *fdtv, char *app_info, unsigned int *len); +int avc_ca_app_info(struct firedtv *fdtv, unsigned char *app_info, + unsigned int *len); +int avc_ca_info(struct firedtv *fdtv, unsigned char *app_info, + unsigned int *len); int avc_ca_reset(struct firedtv *fdtv); int avc_ca_pmt(struct firedtv *fdtv, char *app_info, int length); int avc_ca_get_time_date(struct firedtv *fdtv, int *interval); From ec957dab1fe63792d81ede9bcfdd8ba978b452f4 Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Wed, 5 Dec 2018 03:31:51 -0500 Subject: [PATCH 1499/2608] media: venus: core: Set dma maximum segment size [ Upstream commit de2563bce7a157f5296bab94f3843d7d64fb14b4 ] Turning on CONFIG_DMA_API_DEBUG_SG results in the following error: [ 460.308650] ------------[ cut here ]------------ [ 460.313490] qcom-venus aa00000.video-codec: DMA-API: mapping sg segment longer than device claims to support [len=4194304] [max=65536] [ 460.326017] WARNING: CPU: 3 PID: 3555 at src/kernel/dma/debug.c:1301 debug_dma_map_sg+0x174/0x254 [ 460.338888] Modules linked in: venus_dec venus_enc videobuf2_dma_sg videobuf2_memops hci_uart btqca bluetooth venus_core v4l2_mem2mem videobuf2_v4l2 videobuf2_common ath10k_snoc ath10k_core ath lzo lzo_compress zramjoydev [ 460.375811] CPU: 3 PID: 3555 Comm: V4L2DecoderThre Tainted: G W 4.19.1 #82 [ 460.384223] Hardware name: Google Cheza (rev1) (DT) [ 460.389251] pstate: 60400009 (nZCv daif +PAN -UAO) [ 460.394191] pc : debug_dma_map_sg+0x174/0x254 [ 460.398680] lr : debug_dma_map_sg+0x174/0x254 [ 460.403162] sp : ffffff80200c37d0 [ 460.406583] x29: ffffff80200c3830 x28: 0000000000010000 [ 460.412056] x27: 00000000ffffffff x26: ffffffc0f785ea80 [ 460.417532] x25: 0000000000000000 x24: ffffffc0f4ea1290 [ 460.423001] x23: ffffffc09e700300 x22: ffffffc0f4ea1290 [ 460.428470] x21: ffffff8009037000 x20: 0000000000000001 [ 460.433936] x19: ffffff80091b0000 x18: 0000000000000000 [ 460.439411] x17: 0000000000000000 x16: 000000000000f251 [ 460.444885] x15: 0000000000000006 x14: 0720072007200720 [ 460.450354] x13: ffffff800af536e0 x12: 0000000000000000 [ 460.455822] x11: 0000000000000000 x10: 0000000000000000 [ 460.461288] x9 : 537944d9c6c48d00 x8 : 537944d9c6c48d00 [ 460.466758] x7 : 0000000000000000 x6 : ffffffc0f8d98f80 [ 460.472230] x5 : 0000000000000000 x4 : 0000000000000000 [ 460.477703] x3 : 000000000000008a x2 : ffffffc0fdb13948 [ 460.483170] x1 : ffffffc0fdb0b0b0 x0 : 000000000000007a [ 460.488640] Call trace: [ 460.491165] debug_dma_map_sg+0x174/0x254 [ 460.495307] vb2_dma_sg_alloc+0x260/0x2dc [videobuf2_dma_sg] [ 460.501150] __vb2_queue_alloc+0x164/0x374 [videobuf2_common] [ 460.507076] vb2_core_reqbufs+0xfc/0x23c [videobuf2_common] [ 460.512815] vb2_reqbufs+0x44/0x5c [videobuf2_v4l2] [ 460.517853] v4l2_m2m_reqbufs+0x44/0x78 [v4l2_mem2mem] [ 460.523144] v4l2_m2m_ioctl_reqbufs+0x1c/0x28 [v4l2_mem2mem] [ 460.528976] v4l_reqbufs+0x30/0x40 [ 460.532480] __video_do_ioctl+0x36c/0x454 [ 460.536610] video_usercopy+0x25c/0x51c [ 460.540572] video_ioctl2+0x38/0x48 [ 460.544176] v4l2_ioctl+0x60/0x74 [ 460.547602] do_video_ioctl+0x948/0x3520 [ 460.551648] v4l2_compat_ioctl32+0x60/0x98 [ 460.555872] __arm64_compat_sys_ioctl+0x134/0x20c [ 460.560718] el0_svc_common+0x9c/0xe4 [ 460.564498] el0_svc_compat_handler+0x2c/0x38 [ 460.568982] el0_svc_compat+0x8/0x18 [ 460.572672] ---[ end trace ce209b87b2f3af88 ]--- >From above warning one would deduce that the sg segment will overflow the device's capacity. In reality, the hardware can accommodate larger sg segments. So, initialize the max segment size properly to weed out this warning. Based on a similar patch sent by Sean Paul for mdss: https://patchwork.kernel.org/patch/10671457/ Signed-off-by: Vivek Gautam Acked-by: Stanimir Varbanov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/qcom/venus/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c index 41eef376eb2d..769e9e68562d 100644 --- a/drivers/media/platform/qcom/venus/core.c +++ b/drivers/media/platform/qcom/venus/core.c @@ -187,6 +187,14 @@ static int venus_probe(struct platform_device *pdev) if (ret) return ret; + if (!dev->dma_parms) { + dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms), + GFP_KERNEL); + if (!dev->dma_parms) + return -ENOMEM; + } + dma_set_max_seg_size(dev, DMA_BIT_MASK(32)); + INIT_LIST_HEAD(&core->instances); mutex_init(&core->lock); INIT_DELAYED_WORK(&core->work, venus_sys_error_handler); From e1bca2f43db4c1b25351d8bd69bc01a704beffcd Mon Sep 17 00:00:00 2001 From: yupeng Date: Wed, 5 Dec 2018 18:56:28 -0800 Subject: [PATCH 1500/2608] net: call sk_dst_reset when set SO_DONTROUTE [ Upstream commit 0fbe82e628c817e292ff588cd5847fc935e025f2 ] after set SO_DONTROUTE to 1, the IP layer should not route packets if the dest IP address is not in link scope. But if the socket has cached the dst_entry, such packets would be routed until the sk_dst_cache expires. So we should clean the sk_dst_cache when a user set SO_DONTROUTE option. Below are server/client python scripts which could reprodue this issue: server side code: ========================================================================== import socket import struct import time s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind(('0.0.0.0', 9000)) s.listen(1) sock, addr = s.accept() sock.setsockopt(socket.SOL_SOCKET, socket.SO_DONTROUTE, struct.pack('i', 1)) while True: sock.send(b'foo') time.sleep(1) ========================================================================== client side code: ========================================================================== import socket import time s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect(('server_address', 9000)) while True: data = s.recv(1024) print(data) ========================================================================== Signed-off-by: yupeng Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/sock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/sock.c b/net/core/sock.c index 01cae48d6eef..a88579589946 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -735,6 +735,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, break; case SO_DONTROUTE: sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); + sk_dst_reset(sk); break; case SO_BROADCAST: sock_valbool_flag(sk, SOCK_BROADCAST, valbool); From ff00df41a1086b7e00f7f23bc1b1bef3b94c902f Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Wed, 5 Dec 2018 13:18:34 +0100 Subject: [PATCH 1501/2608] scsi: target: use consistent left-aligned ASCII INQUIRY data [ Upstream commit 0de263577de5d5e052be5f4f93334e63cc8a7f0b ] spc5r17.pdf specifies: 4.3.1 ASCII data field requirements ASCII data fields shall contain only ASCII printable characters (i.e., code values 20h to 7Eh) and may be terminated with one or more ASCII null (00h) characters. ASCII data fields described as being left-aligned shall have any unused bytes at the end of the field (i.e., highest offset) and the unused bytes shall be filled with ASCII space characters (20h). LIO currently space-pads the T10 VENDOR IDENTIFICATION and PRODUCT IDENTIFICATION fields in the standard INQUIRY data. However, the PRODUCT REVISION LEVEL field in the standard INQUIRY data as well as the T10 VENDOR IDENTIFICATION field in the INQUIRY Device Identification VPD Page are zero-terminated/zero-padded. Fix this inconsistency by using space-padding for all of the above fields. Signed-off-by: David Disseldorp Reviewed-by: Christoph Hellwig Reviewed-by: Bryant G. Ly Reviewed-by: Lee Duncan Reviewed-by: Hannes Reinecke Reviewed-by: Roman Bolshakov Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/target_core_spc.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index cb0461a10808..93424db5f002 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -108,12 +108,17 @@ spc_emulate_inquiry_std(struct se_cmd *cmd, unsigned char *buf) buf[7] = 0x2; /* CmdQue=1 */ - memcpy(&buf[8], "LIO-ORG ", 8); - memset(&buf[16], 0x20, 16); + /* + * ASCII data fields described as being left-aligned shall have any + * unused bytes at the end of the field (i.e., highest offset) and the + * unused bytes shall be filled with ASCII space characters (20h). + */ + memset(&buf[8], 0x20, 8 + 16 + 4); + memcpy(&buf[8], "LIO-ORG", sizeof("LIO-ORG") - 1); memcpy(&buf[16], dev->t10_wwn.model, - min_t(size_t, strlen(dev->t10_wwn.model), 16)); + strnlen(dev->t10_wwn.model, 16)); memcpy(&buf[32], dev->t10_wwn.revision, - min_t(size_t, strlen(dev->t10_wwn.revision), 4)); + strnlen(dev->t10_wwn.revision, 4)); buf[4] = 31; /* Set additional length to 31 */ return 0; @@ -251,7 +256,9 @@ check_t10_vend_desc: buf[off] = 0x2; /* ASCII */ buf[off+1] = 0x1; /* T10 Vendor ID */ buf[off+2] = 0x0; - memcpy(&buf[off+4], "LIO-ORG", 8); + /* left align Vendor ID and pad with spaces */ + memset(&buf[off+4], 0x20, 8); + memcpy(&buf[off+4], "LIO-ORG", sizeof("LIO-ORG") - 1); /* Extra Byte for NULL Terminator */ id_len++; /* Identifier Length */ From eef9287108b27063391a52a47ed2b4cc369af472 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 10 Dec 2018 02:00:47 +0300 Subject: [PATCH 1502/2608] selftests: do not macro-expand failed assertion expressions [ Upstream commit b708a3cc9600390ccaa2b68a88087dd265154b2b ] I've stumbled over the current macro-expand behaviour of the test harness: $ gcc -Wall -xc - <<'__EOF__' TEST(macro) { int status = 0; ASSERT_TRUE(WIFSIGNALED(status)); } TEST_HARNESS_MAIN __EOF__ $ ./a.out [==========] Running 1 tests from 1 test cases. [ RUN ] global.macro :4:global.macro:Expected 0 (0) != (((signed char) (((status) & 0x7f) + 1) >> 1) > 0) (0) global.macro: Test terminated by assertion [ FAIL ] global.macro [==========] 0 / 1 tests passed. [ FAILED ] With this change the output of the same test looks much more comprehensible: [==========] Running 1 tests from 1 test cases. [ RUN ] global.macro :4:global.macro:Expected 0 (0) != WIFSIGNALED(status) (0) global.macro: Test terminated by assertion [ FAIL ] global.macro [==========] 0 / 1 tests passed. [ FAILED ] The issue is very similar to the bug fixed in glibc assert(3) three years ago: https://sourceware.org/bugzilla/show_bug.cgi?id=18604 Cc: Shuah Khan Cc: Kees Cook Cc: Andy Lutomirski Cc: Will Drewry Cc: linux-kselftest@vger.kernel.org Signed-off-by: Dmitry V. Levin Acked-by: Kees Cook Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/kselftest_harness.h | 42 ++++++++++----------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index e81bd28bdd89..8f32d699d6c5 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -330,7 +330,7 @@ * ASSERT_EQ(expected, measured): expected == measured */ #define ASSERT_EQ(expected, seen) \ - __EXPECT(expected, seen, ==, 1) + __EXPECT(expected, #expected, seen, #seen, ==, 1) /** * ASSERT_NE(expected, seen) @@ -341,7 +341,7 @@ * ASSERT_NE(expected, measured): expected != measured */ #define ASSERT_NE(expected, seen) \ - __EXPECT(expected, seen, !=, 1) + __EXPECT(expected, #expected, seen, #seen, !=, 1) /** * ASSERT_LT(expected, seen) @@ -352,7 +352,7 @@ * ASSERT_LT(expected, measured): expected < measured */ #define ASSERT_LT(expected, seen) \ - __EXPECT(expected, seen, <, 1) + __EXPECT(expected, #expected, seen, #seen, <, 1) /** * ASSERT_LE(expected, seen) @@ -363,7 +363,7 @@ * ASSERT_LE(expected, measured): expected <= measured */ #define ASSERT_LE(expected, seen) \ - __EXPECT(expected, seen, <=, 1) + __EXPECT(expected, #expected, seen, #seen, <=, 1) /** * ASSERT_GT(expected, seen) @@ -374,7 +374,7 @@ * ASSERT_GT(expected, measured): expected > measured */ #define ASSERT_GT(expected, seen) \ - __EXPECT(expected, seen, >, 1) + __EXPECT(expected, #expected, seen, #seen, >, 1) /** * ASSERT_GE(expected, seen) @@ -385,7 +385,7 @@ * ASSERT_GE(expected, measured): expected >= measured */ #define ASSERT_GE(expected, seen) \ - __EXPECT(expected, seen, >=, 1) + __EXPECT(expected, #expected, seen, #seen, >=, 1) /** * ASSERT_NULL(seen) @@ -395,7 +395,7 @@ * ASSERT_NULL(measured): NULL == measured */ #define ASSERT_NULL(seen) \ - __EXPECT(NULL, seen, ==, 1) + __EXPECT(NULL, "NULL", seen, #seen, ==, 1) /** * ASSERT_TRUE(seen) @@ -405,7 +405,7 @@ * ASSERT_TRUE(measured): measured != 0 */ #define ASSERT_TRUE(seen) \ - ASSERT_NE(0, seen) + __EXPECT(0, "0", seen, #seen, !=, 1) /** * ASSERT_FALSE(seen) @@ -415,7 +415,7 @@ * ASSERT_FALSE(measured): measured == 0 */ #define ASSERT_FALSE(seen) \ - ASSERT_EQ(0, seen) + __EXPECT(0, "0", seen, #seen, ==, 1) /** * ASSERT_STREQ(expected, seen) @@ -448,7 +448,7 @@ * EXPECT_EQ(expected, measured): expected == measured */ #define EXPECT_EQ(expected, seen) \ - __EXPECT(expected, seen, ==, 0) + __EXPECT(expected, #expected, seen, #seen, ==, 0) /** * EXPECT_NE(expected, seen) @@ -459,7 +459,7 @@ * EXPECT_NE(expected, measured): expected != measured */ #define EXPECT_NE(expected, seen) \ - __EXPECT(expected, seen, !=, 0) + __EXPECT(expected, #expected, seen, #seen, !=, 0) /** * EXPECT_LT(expected, seen) @@ -470,7 +470,7 @@ * EXPECT_LT(expected, measured): expected < measured */ #define EXPECT_LT(expected, seen) \ - __EXPECT(expected, seen, <, 0) + __EXPECT(expected, #expected, seen, #seen, <, 0) /** * EXPECT_LE(expected, seen) @@ -481,7 +481,7 @@ * EXPECT_LE(expected, measured): expected <= measured */ #define EXPECT_LE(expected, seen) \ - __EXPECT(expected, seen, <=, 0) + __EXPECT(expected, #expected, seen, #seen, <=, 0) /** * EXPECT_GT(expected, seen) @@ -492,7 +492,7 @@ * EXPECT_GT(expected, measured): expected > measured */ #define EXPECT_GT(expected, seen) \ - __EXPECT(expected, seen, >, 0) + __EXPECT(expected, #expected, seen, #seen, >, 0) /** * EXPECT_GE(expected, seen) @@ -503,7 +503,7 @@ * EXPECT_GE(expected, measured): expected >= measured */ #define EXPECT_GE(expected, seen) \ - __EXPECT(expected, seen, >=, 0) + __EXPECT(expected, #expected, seen, #seen, >=, 0) /** * EXPECT_NULL(seen) @@ -513,7 +513,7 @@ * EXPECT_NULL(measured): NULL == measured */ #define EXPECT_NULL(seen) \ - __EXPECT(NULL, seen, ==, 0) + __EXPECT(NULL, "NULL", seen, #seen, ==, 0) /** * EXPECT_TRUE(seen) @@ -523,7 +523,7 @@ * EXPECT_TRUE(measured): 0 != measured */ #define EXPECT_TRUE(seen) \ - EXPECT_NE(0, seen) + __EXPECT(0, "0", seen, #seen, !=, 0) /** * EXPECT_FALSE(seen) @@ -533,7 +533,7 @@ * EXPECT_FALSE(measured): 0 == measured */ #define EXPECT_FALSE(seen) \ - EXPECT_EQ(0, seen) + __EXPECT(0, "0", seen, #seen, ==, 0) /** * EXPECT_STREQ(expected, seen) @@ -573,7 +573,7 @@ if (_metadata->passed && _metadata->step < 255) \ _metadata->step++; -#define __EXPECT(_expected, _seen, _t, _assert) do { \ +#define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \ /* Avoid multiple evaluation of the cases */ \ __typeof__(_expected) __exp = (_expected); \ __typeof__(_seen) __seen = (_seen); \ @@ -582,8 +582,8 @@ unsigned long long __exp_print = (uintptr_t)__exp; \ unsigned long long __seen_print = (uintptr_t)__seen; \ __TH_LOG("Expected %s (%llu) %s %s (%llu)", \ - #_expected, __exp_print, #_t, \ - #_seen, __seen_print); \ + _expected_str, __exp_print, #_t, \ + _seen_str, __seen_print); \ _metadata->passed = 0; \ /* Ensure the optional handler is triggered */ \ _metadata->trigger = 1; \ From e107641b0a1273636b0ebe601eaa573001ed0f1d Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 15 Nov 2018 15:30:26 +0100 Subject: [PATCH 1503/2608] clk: imx6q: reset exclusive gates on init [ Upstream commit f7542d817733f461258fd3a47d77da35b2d9fc81 ] The exclusive gates may be set up in the wrong way by software running before the clock driver comes up. In that case the exclusive setup is locked in its initial state, as the complementary function can't be activated without disabling the initial setup first. To avoid this lock situation, reset the exclusive gates to the off state and allow the kernel to provide the proper setup. Signed-off-by: Lucas Stach Reviewed-by: Dong Aisheng Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-imx6q.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/clk/imx/clk-imx6q.c b/drivers/clk/imx/clk-imx6q.c index 8d518ad5dc13..8eb93eb2f857 100644 --- a/drivers/clk/imx/clk-imx6q.c +++ b/drivers/clk/imx/clk-imx6q.c @@ -515,8 +515,12 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) * lvds1_gate and lvds2_gate are pseudo-gates. Both can be * independently configured as clock inputs or outputs. We treat * the "output_enable" bit as a gate, even though it's really just - * enabling clock output. + * enabling clock output. Initially the gate bits are cleared, as + * otherwise the exclusive configuration gets locked in the setup done + * by software running before the clock driver, with no way to change + * it. */ + writel(readl(base + 0x160) & ~0x3c00, base + 0x160); clk[IMX6QDL_CLK_LVDS1_GATE] = imx_clk_gate_exclusive("lvds1_gate", "lvds1_sel", base + 0x160, 10, BIT(12)); clk[IMX6QDL_CLK_LVDS2_GATE] = imx_clk_gate_exclusive("lvds2_gate", "lvds2_sel", base + 0x160, 11, BIT(13)); From ca5664c3aee9a2e177ebf6e12e476a22b5863400 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 10 Dec 2018 13:39:48 +0000 Subject: [PATCH 1504/2608] arm64: Fix minor issues with the dcache_by_line_op macro [ Upstream commit 33309ecda0070506c49182530abe7728850ebe78 ] The dcache_by_line_op macro suffers from a couple of small problems: First, the GAS directives that are currently being used rely on assembler behavior that is not documented, and probably not guaranteed to produce the correct behavior going forward. As a result, we end up with some undefined symbols in cache.o: $ nm arch/arm64/mm/cache.o ... U civac ... U cvac U cvap U cvau This is due to the fact that the comparisons used to select the operation type in the dcache_by_line_op macro are comparing symbols not strings, and even though it seems that GAS is doing the right thing here (undefined symbols by the same name are equal to each other), it seems unwise to rely on this. Second, when patching in a DC CVAP instruction on CPUs that support it, the fallback path consists of a DC CVAU instruction which may be affected by CPU errata that require ARM64_WORKAROUND_CLEAN_CACHE. Solve these issues by unrolling the various maintenance routines and using the conditional directives that are documented as operating on strings. To avoid the complexity of nested alternatives, we move the DC CVAP patching to __clean_dcache_area_pop, falling back to a branch to __clean_dcache_area_poc if DCPOP is not supported by the CPU. Reported-by: Ard Biesheuvel Suggested-by: Robin Murphy Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/include/asm/assembler.h | 30 ++++++++++++++++++------------ arch/arm64/mm/cache.S | 3 +++ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 66aea4aa455d..02d73d83f0de 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -373,27 +373,33 @@ alternative_endif * size: size of the region * Corrupts: kaddr, size, tmp1, tmp2 */ + .macro __dcache_op_workaround_clean_cache, op, kaddr +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + dc \op, \kaddr +alternative_else + dc civac, \kaddr +alternative_endif + .endm + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 dcache_line_size \tmp1, \tmp2 add \size, \kaddr, \size sub \tmp2, \tmp1, #1 bic \kaddr, \kaddr, \tmp2 9998: - .if (\op == cvau || \op == cvac) -alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE - dc \op, \kaddr -alternative_else - dc civac, \kaddr -alternative_endif - .elseif (\op == cvap) -alternative_if ARM64_HAS_DCPOP - sys 3, c7, c12, 1, \kaddr // dc cvap -alternative_else - dc cvac, \kaddr -alternative_endif + .ifc \op, cvau + __dcache_op_workaround_clean_cache \op, \kaddr + .else + .ifc \op, cvac + __dcache_op_workaround_clean_cache \op, \kaddr + .else + .ifc \op, cvap + sys 3, c7, c12, 1, \kaddr // dc cvap .else dc \op, \kaddr .endif + .endif + .endif add \kaddr, \kaddr, \tmp1 cmp \kaddr, \size b.lo 9998b diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 91464e7f77cc..c1e8f3c6ffd5 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -181,6 +181,9 @@ ENDPROC(__dma_clean_area) * - size - size in question */ ENTRY(__clean_dcache_area_pop) + alternative_if_not ARM64_HAS_DCPOP + b __clean_dcache_area_poc + alternative_else_nop_endif dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret ENDPIPROC(__clean_dcache_area_pop) From 407148e8c01d5060c451a9b5ee3f65f63a7318b6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 11 Dec 2018 20:00:44 +0900 Subject: [PATCH 1505/2608] kconfig: fix file name and line number of warn_ignored_character() [ Upstream commit 77c1c0fa8b1477c5799bdad65026ea5ff676da44 ] Currently, warn_ignore_character() displays invalid file name and line number. The lexer should use current_file->name and yylineno, while the parser should use zconf_curname() and zconf_lineno(). This difference comes from that the lexer is always going ahead of the parser. The parser needs to look ahead one token to make a shift/reduce decision, so the lexer is requested to scan more text from the input file. This commit fixes the warning message from warn_ignored_character(). [Test Code] ----(Kconfig begin)---- / -----(Kconfig end)----- [Output] Before the fix: :0:warning: ignoring unsupported character '/' After the fix: Kconfig:1:warning: ignoring unsupported character '/' Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/kconfig/zconf.l | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index c410d257da06..6534dc5ac803 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -71,7 +71,7 @@ static void warn_ignored_character(char chr) { fprintf(stderr, "%s:%d:warning: ignoring unsupported character '%c'\n", - zconf_curname(), zconf_lineno(), chr); + current_file->name, yylineno, chr); } %} From 3f96ff44bb40567467ccd7c238b3349bd166deb4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 11 Dec 2018 20:00:45 +0900 Subject: [PATCH 1506/2608] kconfig: fix memory leak when EOF is encountered in quotation [ Upstream commit fbac5977d81cb2b2b7e37b11c459055d9585273c ] An unterminated string literal followed by new line is passed to the parser (with "multi-line strings not supported" warning shown), then handled properly there. On the other hand, an unterminated string literal at end of file is never passed to the parser, then results in memory leak. [Test Code] ----------(Kconfig begin)---------- source "Kconfig.inc" config A bool "a" -----------(Kconfig end)----------- --------(Kconfig.inc begin)-------- config B bool "b\No new line at end of file ---------(Kconfig.inc end)--------- [Summary from Valgrind] Before the fix: LEAK SUMMARY: definitely lost: 16 bytes in 1 blocks ... After the fix: LEAK SUMMARY: definitely lost: 0 bytes in 0 blocks ... Eliminate the memory leak path by handling this case. Of course, such a Kconfig file is wrong already, so I will add an error message later. Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/kconfig/zconf.l | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index 6534dc5ac803..0c7800112ff5 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -191,6 +191,8 @@ n [A-Za-z0-9_-] } <> { BEGIN(INITIAL); + yylval.string = text; + return T_WORD_QUOTE; } } From 7b83b9a2e6e9fa5816aee27c00ec0e94a0c0b77d Mon Sep 17 00:00:00 2001 From: Jonas Danielsson Date: Fri, 19 Oct 2018 16:40:05 +0200 Subject: [PATCH 1507/2608] mmc: atmel-mci: do not assume idle after atmci_request_end [ Upstream commit ae460c115b7aa50c9a36cf78fced07b27962c9d0 ] On our AT91SAM9260 board we use the same sdio bus for wifi and for the sd card slot. This caused the atmel-mci to give the following splat on the serial console: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 538 at drivers/mmc/host/atmel-mci.c:859 atmci_send_command+0x24/0x44 Modules linked in: CPU: 0 PID: 538 Comm: mmcqd/0 Not tainted 4.14.76 #14 Hardware name: Atmel AT91SAM9 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (__warn+0xd8/0xf4) [] (__warn) from [] (warn_slowpath_null+0x1c/0x24) [] (warn_slowpath_null) from [] (atmci_send_command+0x24/0x44) [] (atmci_send_command) from [] (atmci_start_request+0x1f4/0x2dc) [] (atmci_start_request) from [] (atmci_request+0xf0/0x164) [] (atmci_request) from [] (mmc_start_request+0x280/0x2d0) [] (mmc_start_request) from [] (mmc_start_areq+0x230/0x330) [] (mmc_start_areq) from [] (mmc_blk_issue_rw_rq+0xc4/0x310) [] (mmc_blk_issue_rw_rq) from [] (mmc_blk_issue_rq+0x118/0x5ac) [] (mmc_blk_issue_rq) from [] (mmc_queue_thread+0xc4/0x118) [] (mmc_queue_thread) from [] (kthread+0x100/0x118) [] (kthread) from [] (ret_from_fork+0x14/0x34) ---[ end trace 594371ddfa284bd6 ]--- This is: WARN_ON(host->cmd); This was fixed on our board by letting atmci_request_end determine what state we are in. Instead of unconditionally setting it to STATE_IDLE on STATE_END_REQUEST. Signed-off-by: Jonas Danielsson Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/atmel-mci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index 0a0ebf3a096d..c8a591d8a3d9 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -1954,13 +1954,14 @@ static void atmci_tasklet_func(unsigned long priv) } atmci_request_end(host, host->mrq); - state = STATE_IDLE; + goto unlock; /* atmci_request_end() sets host->state */ break; } } while (state != prev_state); host->state = state; +unlock: spin_unlock(&host->lock); } From 6b337c5918639adddd7fda40dab7ebe2506783d2 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 12 Dec 2018 15:14:17 +0100 Subject: [PATCH 1508/2608] btrfs: improve error handling of btrfs_add_link [ Upstream commit 1690dd41e0cb1dade80850ed8a3eb0121b96d22f ] In the error handling block, err holds the return value of either btrfs_del_root_ref() or btrfs_del_inode_ref() but it hasn't been checked since it's introduction with commit fe66a05a0679 (Btrfs: improve error handling for btrfs_insert_dir_item callers) in 2012. If the error handling in the error handling fails, there's not much left to do and the abort either happened earlier in the callees or is necessary here. So if one of btrfs_del_root_ref() or btrfs_del_inode_ref() failed, abort the transaction, but still return the original code of the failure stored in 'ret' as this will be reported to the user. Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 909f7ea92e0b..7e288510fd2c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6594,14 +6594,19 @@ fail_dir_item: err = btrfs_del_root_ref(trans, fs_info, key.objectid, root->root_key.objectid, parent_ino, &local_index, name, name_len); - + if (err) + btrfs_abort_transaction(trans, err); } else if (add_backref) { u64 local_index; int err; err = btrfs_del_inode_ref(trans, root, name, name_len, ino, parent_ino, &local_index); + if (err) + btrfs_abort_transaction(trans, err); } + + /* Return the original error code */ return ret; } From 035438452f85412438be0f6b4dae88b32353cfac Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 13 Dec 2018 13:58:39 +0900 Subject: [PATCH 1509/2608] tty/serial: do not free trasnmit buffer page under port lock [ Upstream commit d72402145ace0697a6a9e8e75a3de5bf3375f78d ] LKP has hit yet another circular locking dependency between uart console drivers and debugobjects [1]: CPU0 CPU1 rhltable_init() __init_work() debug_object_init uart_shutdown() /* db->lock */ /* uart_port->lock */ debug_print_object() free_page() printk() call_console_drivers() debug_check_no_obj_freed() /* uart_port->lock */ /* db->lock */ debug_print_object() So there are two dependency chains: uart_port->lock -> db->lock And db->lock -> uart_port->lock This particular circular locking dependency can be addressed in several ways: a) One way would be to move debug_print_object() out of db->lock scope and, thus, break the db->lock -> uart_port->lock chain. b) Another one would be to free() transmit buffer page out of db->lock in UART code; which is what this patch does. It makes sense to apply a) and b) independently: there are too many things going on behind free(), none of which depend on uart_port->lock. The patch fixes transmit buffer page free() in uart_shutdown() and, additionally, in uart_port_startup() (as was suggested by Dmitry Safonov). [1] https://lore.kernel.org/lkml/20181211091154.GL23332@shao2-debian/T/#u Signed-off-by: Sergey Senozhatsky Reviewed-by: Petr Mladek Acked-by: Peter Zijlstra (Intel) Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Andrew Morton Cc: Waiman Long Cc: Dmitry Safonov Cc: Steven Rostedt Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/serial_core.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 6db8844ef3ec..543d0f95f094 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -218,10 +218,15 @@ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state, if (!state->xmit.buf) { state->xmit.buf = (unsigned char *) page; uart_circ_clear(&state->xmit); + uart_port_unlock(uport, flags); } else { + uart_port_unlock(uport, flags); + /* + * Do not free() the page under the port lock, see + * uart_shutdown(). + */ free_page(page); } - uart_port_unlock(uport, flags); retval = uport->ops->startup(uport); if (retval == 0) { @@ -281,6 +286,7 @@ static void uart_shutdown(struct tty_struct *tty, struct uart_state *state) struct uart_port *uport = uart_port_check(state); struct tty_port *port = &state->port; unsigned long flags = 0; + char *xmit_buf = NULL; /* * Set the TTY IO error marker @@ -311,14 +317,18 @@ static void uart_shutdown(struct tty_struct *tty, struct uart_state *state) tty_port_set_suspended(port, 0); /* - * Free the transmit buffer page. + * Do not free() the transmit buffer page under the port lock since + * this can create various circular locking scenarios. For instance, + * console driver may need to allocate/free a debug object, which + * can endup in printk() recursion. */ uart_port_lock(state, flags); - if (state->xmit.buf) { - free_page((unsigned long)state->xmit.buf); - state->xmit.buf = NULL; - } + xmit_buf = state->xmit.buf; + state->xmit.buf = NULL; uart_port_unlock(uport, flags); + + if (xmit_buf) + free_page((unsigned long)xmit_buf); } /** From e5ae88fabeab15a87ea4663d7a9ca98a9b593b74 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 26 Nov 2018 14:12:52 +0200 Subject: [PATCH 1510/2608] perf intel-pt: Fix error with config term "pt=0" [ Upstream commit 1c6f709b9f96366cc47af23c05ecec9b8c0c392d ] Users should never use 'pt=0', but if they do it may give a meaningless error: $ perf record -e intel_pt/pt=0/u uname Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (intel_pt/pt=0/u). Fix that by forcing 'pt=1'. Committer testing: # perf record -e intel_pt/pt=0/u uname Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (intel_pt/pt=0/u). /bin/dmesg | grep -i perf may provide additional information. # perf record -e intel_pt/pt=0/u uname pt=0 doesn't make sense, forcing pt=1 Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.020 MB perf.data ] # Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/b7c5b4e5-9497-10e5-fd43-5f3e4a0fe51d@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/arch/x86/util/intel-pt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index db0ba8caf5a2..ba8ecaf52200 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -524,10 +524,21 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, struct perf_evsel *evsel) { int err; + char c; if (!evsel) return 0; + /* + * If supported, force pass-through config term (pt=1) even if user + * sets pt=0, which avoids senseless kernel errors. + */ + if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 && + !(evsel->attr.config & 1)) { + pr_warning("pt=0 doesn't make sense, forcing pt=1\n"); + evsel->attr.config |= 1; + } + err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", "cyc_thresh", "caps/psb_cyc", evsel->attr.config); From 9010bb9eaa30a52d3155a18eb0907dc9c5c77fb2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Dec 2018 11:29:48 -0300 Subject: [PATCH 1511/2608] perf svghelper: Fix unchecked usage of strncpy() [ Upstream commit 2f5302533f306d5ee87bd375aef9ca35b91762cb ] The strncpy() function may leave the destination string buffer unterminated, better use strlcpy() that we have a __weak fallback implementation for systems without it. In this specific case this would only happen if fgets() was buggy, as its man page states that it should read one less byte than the size of the destination buffer, so that it can put the nul byte at the end of it, so it would never copy 255 non-nul chars, as fgets reads into the orig buffer at most 254 non-nul chars and terminates it. But lets just switch to strlcpy to keep the original intent and silence the gcc 8.2 warning. This fixes this warning on an Alpine Linux Edge system with gcc 8.2: In function 'cpu_model', inlined from 'svg_cpu_box' at util/svghelper.c:378:2: util/svghelper.c:337:5: error: 'strncpy' output may be truncated copying 255 bytes from a string of length 255 [-Werror=stringop-truncation] strncpy(cpu_m, &buf[13], 255); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Arjan van de Ven Fixes: f48d55ce7871 ("perf: Add a SVG helper library file") Link: https://lkml.kernel.org/n/tip-xzkoo0gyr56gej39ltivuh9g@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/svghelper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 1cbada2dc6be..f735ee038713 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -334,7 +334,7 @@ static char *cpu_model(void) if (file) { while (fgets(buf, 255, file)) { if (strstr(buf, "model name")) { - strncpy(cpu_m, &buf[13], 255); + strlcpy(cpu_m, &buf[13], 255); break; } } From 05f94c60f3cbf7355f7092fe74e52c0f0bb4f48a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Dec 2018 13:52:13 -0300 Subject: [PATCH 1512/2608] perf parse-events: Fix unchecked usage of strncpy() [ Upstream commit bd8d57fb7e25e9fcf67a9eef5fa13aabe2016e07 ] The strncpy() function may leave the destination string buffer unterminated, better use strlcpy() that we have a __weak fallback implementation for systems without it. This fixes this warning on an Alpine Linux Edge system with gcc 8.2: util/parse-events.c: In function 'print_symbol_events': util/parse-events.c:2465:4: error: 'strncpy' specified bound 100 equals destination size [-Werror=stringop-truncation] strncpy(name, syms->symbol, MAX_NAME_LEN); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function 'print_symbol_events.constprop', inlined from 'print_events' at util/parse-events.c:2508:2: util/parse-events.c:2465:4: error: 'strncpy' specified bound 100 equals destination size [-Werror=stringop-truncation] strncpy(name, syms->symbol, MAX_NAME_LEN); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function 'print_symbol_events.constprop', inlined from 'print_events' at util/parse-events.c:2511:2: util/parse-events.c:2465:4: error: 'strncpy' specified bound 100 equals destination size [-Werror=stringop-truncation] strncpy(name, syms->symbol, MAX_NAME_LEN); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Fixes: 947b4ad1d198 ("perf list: Fix max event string size") Link: https://lkml.kernel.org/n/tip-b663e33bm6x8hrkie4uxh7u2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/parse-events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 53f620472151..d0b92d374ba9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2300,7 +2300,7 @@ restart: if (!name_only && strlen(syms->alias)) snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); else - strncpy(name, syms->symbol, MAX_NAME_LEN); + strlcpy(name, syms->symbol, MAX_NAME_LEN); evt_list[evt_i] = strdup(name); if (evt_list[evt_i] == NULL) From e6b1503c4d6fe1b45b12fe0fbe5ba125bcc22757 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 5 Nov 2018 18:23:25 +0900 Subject: [PATCH 1513/2608] netfilter: ipt_CLUSTERIP: check MAC address when duplicate config is set [ Upstream commit 06aa151ad1fc74a49b45336672515774a678d78d ] If same destination IP address config is already existing, that config is just used. MAC address also should be same. However, there is no MAC address checking routine. So that MAC address checking routine is added. test commands: %iptables -A INPUT -p tcp -i lo -d 192.168.0.5 --dport 80 \ -j CLUSTERIP --new --hashmode sourceip \ --clustermac 01:00:5e:00:00:20 --total-nodes 2 --local-node 1 %iptables -A INPUT -p tcp -i lo -d 192.168.0.5 --dport 80 \ -j CLUSTERIP --new --hashmode sourceip \ --clustermac 01:00:5e:00:00:21 --total-nodes 2 --local-node 1 After this patch, above commands are disallowed. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index cc7c9d67ac19..45f21489f515 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -492,7 +492,8 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) if (IS_ERR(config)) return PTR_ERR(config); } - } + } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) + return -EINVAL; ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { From 288a8efcc95094905a9782fccb4d2b4f2fabfff5 Mon Sep 17 00:00:00 2001 From: AliOS system security Date: Mon, 5 Nov 2018 15:31:42 +0800 Subject: [PATCH 1514/2608] dm crypt: use u64 instead of sector_t to store iv_offset [ Upstream commit 8d683dcd65c037efc9fb38c696ec9b65b306e573 ] The iv_offset in the mapping table of crypt target is a 64bit number when IV algorithm is plain64, plain64be, essiv or benbi. It will be assigned to iv_offset of struct crypt_config, cc_sector of struct convert_context and iv_sector of struct dm_crypt_request. These structures members are defined as a sector_t. But sector_t is 32bit when CONFIG_LBDAF is not set in 32bit kernel. In this situation sector_t is not big enough to store the 64bit iv_offset. Here is a reproducer. Prepare test image and device (loop is automatically allocated by cryptsetup): # dd if=/dev/zero of=tst.img bs=1M count=1 # echo "tst"|cryptsetup open --type plain -c aes-xts-plain64 \ --skip 500000000000000000 tst.img test On 32bit system (use IV offset value that overflows to 64bit; CONFIG_LBDAF if off) and device checksum is wrong: # dmsetup table test --showkeys 0 2048 crypt aes-xts-plain64 dfa7cfe3c481f2239155739c42e539ae8f2d38f304dcc89d20b26f69daaf0933 3551657984 7:0 0 # sha256sum /dev/mapper/test 533e25c09176632b3794f35303488c4a8f3f965dffffa6ec2df347c168cb6c19 /dev/mapper/test On 64bit system (and on 32bit system with the patch), table and checksum is now correct: # dmsetup table test --showkeys 0 2048 crypt aes-xts-plain64 dfa7cfe3c481f2239155739c42e539ae8f2d38f304dcc89d20b26f69daaf0933 500000000000000000 7:0 0 # sha256sum /dev/mapper/test 5d16160f9d5f8c33d8051e65fdb4f003cc31cd652b5abb08f03aa6fce0df75fc /dev/mapper/test Signed-off-by: AliOS system security Tested-and-Reviewed-by: Milan Broz Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-crypt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index c60d29d09687..2652ef68d58d 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -49,7 +49,7 @@ struct convert_context { struct bio *bio_out; struct bvec_iter iter_in; struct bvec_iter iter_out; - sector_t cc_sector; + u64 cc_sector; atomic_t cc_pending; union { struct skcipher_request *req; @@ -81,7 +81,7 @@ struct dm_crypt_request { struct convert_context *ctx; struct scatterlist sg_in[4]; struct scatterlist sg_out[4]; - sector_t iv_sector; + u64 iv_sector; }; struct crypt_config; @@ -172,7 +172,7 @@ struct crypt_config { struct iv_lmk_private lmk; struct iv_tcw_private tcw; } iv_gen_private; - sector_t iv_offset; + u64 iv_offset; unsigned int iv_size; unsigned short int sector_size; unsigned char sector_shift; From 8561f28c81dc0fa8469fd2c87a7888d919100d12 Mon Sep 17 00:00:00 2001 From: Nikos Tsironis Date: Wed, 31 Oct 2018 17:53:09 -0400 Subject: [PATCH 1515/2608] dm kcopyd: Fix bug causing workqueue stalls [ Upstream commit d7e6b8dfc7bcb3f4f3a18313581f67486a725b52 ] When using kcopyd to run callbacks through dm_kcopyd_do_callback() or submitting copy jobs with a source size of 0, the jobs are pushed directly to the complete_jobs list, which could be under processing by the kcopyd thread. As a result, the kcopyd thread can continue running completed jobs indefinitely, without releasing the CPU, as long as someone keeps submitting new completed jobs through the aforementioned paths. Processing of work items, queued for execution on the same CPU as the currently running kcopyd thread, is thus stalled for excessive amounts of time, hurting performance. Running the following test, from the device mapper test suite [1], dmtest run --suite snapshot -n parallel_io_to_many_snaps_N , with 8 active snapshots, we get, in dmesg, messages like the following: [68899.948523] BUG: workqueue lockup - pool cpus=0 node=0 flags=0x0 nice=0 stuck for 95s! [68899.949282] Showing busy workqueues and worker pools: [68899.949288] workqueue events: flags=0x0 [68899.949295] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=2/256 [68899.949306] pending: vmstat_shepherd, cache_reap [68899.949331] workqueue mm_percpu_wq: flags=0x8 [68899.949337] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 [68899.949345] pending: vmstat_update [68899.949387] workqueue dm_bufio_cache: flags=0x8 [68899.949392] pwq 4: cpus=2 node=0 flags=0x0 nice=0 active=1/256 [68899.949400] pending: work_fn [dm_bufio] [68899.949423] workqueue kcopyd: flags=0x8 [68899.949429] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 [68899.949437] pending: do_work [dm_mod] [68899.949452] workqueue kcopyd: flags=0x8 [68899.949458] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=2/256 [68899.949466] in-flight: 13:do_work [dm_mod] [68899.949474] pending: do_work [dm_mod] [68899.949487] workqueue kcopyd: flags=0x8 [68899.949493] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 [68899.949501] pending: do_work [dm_mod] [68899.949515] workqueue kcopyd: flags=0x8 [68899.949521] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 [68899.949529] pending: do_work [dm_mod] [68899.949541] workqueue kcopyd: flags=0x8 [68899.949547] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 [68899.949555] pending: do_work [dm_mod] [68899.949568] pool 0: cpus=0 node=0 flags=0x0 nice=0 hung=95s workers=4 idle: 27130 27223 1084 Fix this by splitting the complete_jobs list into two parts: A user facing part, named callback_jobs, and one used internally by kcopyd, retaining the name complete_jobs. dm_kcopyd_do_callback() and dispatch_job() now push their jobs to the callback_jobs list, which is spliced to the complete_jobs list once, every time the kcopyd thread wakes up. This prevents kcopyd from hogging the CPU indefinitely and causing workqueue stalls. Re-running the aforementioned test: * Workqueue stalls are eliminated * The maximum writing time among all targets is reduced from 09m37.10s to 06m04.85s and the total run time of the test is reduced from 10m43.591s to 7m19.199s [1] https://github.com/jthornber/device-mapper-test-suite Signed-off-by: Nikos Tsironis Signed-off-by: Ilias Tsitsimpis Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-kcopyd.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index d4b326914f06..b9d1897bcf5b 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -55,15 +55,17 @@ struct dm_kcopyd_client { struct dm_kcopyd_throttle *throttle; /* - * We maintain three lists of jobs: + * We maintain four lists of jobs: * * i) jobs waiting for pages * ii) jobs that have pages, and are waiting for the io to be issued. - * iii) jobs that have completed. + * iii) jobs that don't need to do any IO and just run a callback + * iv) jobs that have completed. * - * All three of these are protected by job_lock. + * All four of these are protected by job_lock. */ spinlock_t job_lock; + struct list_head callback_jobs; struct list_head complete_jobs; struct list_head io_jobs; struct list_head pages_jobs; @@ -622,6 +624,7 @@ static void do_work(struct work_struct *work) struct dm_kcopyd_client *kc = container_of(work, struct dm_kcopyd_client, kcopyd_work); struct blk_plug plug; + unsigned long flags; /* * The order that these are called is *very* important. @@ -630,6 +633,10 @@ static void do_work(struct work_struct *work) * list. io jobs call wake when they complete and it all * starts again. */ + spin_lock_irqsave(&kc->job_lock, flags); + list_splice_tail_init(&kc->callback_jobs, &kc->complete_jobs); + spin_unlock_irqrestore(&kc->job_lock, flags); + blk_start_plug(&plug); process_jobs(&kc->complete_jobs, kc, run_complete_job); process_jobs(&kc->pages_jobs, kc, run_pages_job); @@ -647,7 +654,7 @@ static void dispatch_job(struct kcopyd_job *job) struct dm_kcopyd_client *kc = job->kc; atomic_inc(&kc->nr_jobs); if (unlikely(!job->source.count)) - push(&kc->complete_jobs, job); + push(&kc->callback_jobs, job); else if (job->pages == &zero_page_list) push(&kc->io_jobs, job); else @@ -857,7 +864,7 @@ void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err) job->read_err = read_err; job->write_err = write_err; - push(&kc->complete_jobs, job); + push(&kc->callback_jobs, job); wake(kc); } EXPORT_SYMBOL(dm_kcopyd_do_callback); @@ -887,6 +894,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro return ERR_PTR(-ENOMEM); spin_lock_init(&kc->job_lock); + INIT_LIST_HEAD(&kc->callback_jobs); INIT_LIST_HEAD(&kc->complete_jobs); INIT_LIST_HEAD(&kc->io_jobs); INIT_LIST_HEAD(&kc->pages_jobs); @@ -936,6 +944,7 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) /* Wait for completion of all jobs submitted by this client. */ wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); + BUG_ON(!list_empty(&kc->callback_jobs)); BUG_ON(!list_empty(&kc->complete_jobs)); BUG_ON(!list_empty(&kc->io_jobs)); BUG_ON(!list_empty(&kc->pages_jobs)); From 55f603dcc121acca50066dde8ac1b68f86d7ef19 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Dec 2018 15:00:52 -0300 Subject: [PATCH 1516/2608] tools lib subcmd: Don't add the kernel sources to the include path [ Upstream commit ece9804985b57e1ccd83b1fb6288520955a29d51 ] At some point we decided not to directly include kernel sources files when building tools/perf/, but when tools/lib/subcmd/ was forked from tools/perf it somehow ended up adding it via these two lines in its Makefile: CFLAGS += -I$(srctree)/include/uapi CFLAGS += -I$(srctree)/include As $(srctree) points to the kernel sources. Removing those lines and keeping just: CFLAGS += -I$(srctree)/tools/include/ Is enough to build tools/perf and tools/objtool. This fixes the build when building from the sources in environments such as the Android NDK crossbuilding from a fedora:26 system: subcmd-util.h:11:15: error: expected ',' or ';' before 'void' static inline void report(const char *prefix, const char *err, va_list params) ^ In file included from /git/perf/include/uapi/linux/stddef.h:2:0, from /git/perf/include/uapi/linux/posix_types.h:5, from /opt/android-ndk-r12b/platforms/android-24/arch-arm/usr/include/sys/types.h:36, from /opt/android-ndk-r12b/platforms/android-24/arch-arm/usr/include/unistd.h:33, from run-command.c:2: subcmd-util.h:18:17: error: '__no_instrument_function__' attribute applies only to functions The /opt/android-ndk-r12b/platforms/android-24/arch-arm/usr/include/sys/types.h file that includes linux/posix_types.h ends up getting the one in the kernel sources causing the breakage. Fix it. Test built tools/objtool/ too. Reported-by: Jiri Olsa Tested-by: Jiri Olsa Cc: Adrian Hunter Cc: Josh Poimboeuf Cc: Namhyung Kim Fixes: 4b6ab94eabe4 ("perf subcmd: Create subcmd library") Link: https://lkml.kernel.org/n/tip-5lhaoecrj12t0bqwvpiu14sm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/lib/subcmd/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index 95563b8e1ad7..ed61fb3a46c0 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -36,8 +36,6 @@ endif CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE CFLAGS += -I$(srctree)/tools/include/ -CFLAGS += -I$(srctree)/include/uapi -CFLAGS += -I$(srctree)/include SUBCMD_IN := $(OUTPUT)libsubcmd-in.o From 8189ee4798783fa79f54b9ad2e3e05cb35faec58 Mon Sep 17 00:00:00 2001 From: Nikos Tsironis Date: Wed, 31 Oct 2018 17:53:08 -0400 Subject: [PATCH 1517/2608] dm snapshot: Fix excessive memory usage and workqueue stalls [ Upstream commit 721b1d98fb517ae99ab3b757021cf81db41e67be ] kcopyd has no upper limit to the number of jobs one can allocate and issue. Under certain workloads this can lead to excessive memory usage and workqueue stalls. For example, when creating multiple dm-snapshot targets with a 4K chunk size and then writing to the origin through the page cache. Syncing the page cache causes a large number of BIOs to be issued to the dm-snapshot origin target, which itself issues an even larger (because of the BIO splitting taking place) number of kcopyd jobs. Running the following test, from the device mapper test suite [1], dmtest run --suite snapshot -n many_snapshots_of_same_volume_N , with 8 active snapshots, results in the kcopyd job slab cache growing to 10G. Depending on the available system RAM this can lead to the OOM killer killing user processes: [463.492878] kthreadd invoked oom-killer: gfp_mask=0x6040c0(GFP_KERNEL|__GFP_COMP), nodemask=(null), order=1, oom_score_adj=0 [463.492894] kthreadd cpuset=/ mems_allowed=0 [463.492948] CPU: 7 PID: 2 Comm: kthreadd Not tainted 4.19.0-rc7 #3 [463.492950] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [463.492952] Call Trace: [463.492964] dump_stack+0x7d/0xbb [463.492973] dump_header+0x6b/0x2fc [463.492987] ? lockdep_hardirqs_on+0xee/0x190 [463.493012] oom_kill_process+0x302/0x370 [463.493021] out_of_memory+0x113/0x560 [463.493030] __alloc_pages_slowpath+0xf40/0x1020 [463.493055] __alloc_pages_nodemask+0x348/0x3c0 [463.493067] cache_grow_begin+0x81/0x8b0 [463.493072] ? cache_grow_begin+0x874/0x8b0 [463.493078] fallback_alloc+0x1e4/0x280 [463.493092] kmem_cache_alloc_node+0xd6/0x370 [463.493098] ? copy_process.part.31+0x1c5/0x20d0 [463.493105] copy_process.part.31+0x1c5/0x20d0 [463.493115] ? __lock_acquire+0x3cc/0x1550 [463.493121] ? __switch_to_asm+0x34/0x70 [463.493129] ? kthread_create_worker_on_cpu+0x70/0x70 [463.493135] ? finish_task_switch+0x90/0x280 [463.493165] _do_fork+0xe0/0x6d0 [463.493191] ? kthreadd+0x19f/0x220 [463.493233] kernel_thread+0x25/0x30 [463.493235] kthreadd+0x1bf/0x220 [463.493242] ? kthread_create_on_cpu+0x90/0x90 [463.493248] ret_from_fork+0x3a/0x50 [463.493279] Mem-Info: [463.493285] active_anon:20631 inactive_anon:4831 isolated_anon:0 [463.493285] active_file:80216 inactive_file:80107 isolated_file:435 [463.493285] unevictable:0 dirty:51266 writeback:109372 unstable:0 [463.493285] slab_reclaimable:31191 slab_unreclaimable:3483521 [463.493285] mapped:526 shmem:4903 pagetables:1759 bounce:0 [463.493285] free:33623 free_pcp:2392 free_cma:0 ... [463.493489] Unreclaimable slab info: [463.493513] Name Used Total [463.493522] bio-6 1028KB 1028KB [463.493525] bio-5 1028KB 1028KB [463.493528] dm_snap_pending_exception 236783KB 243789KB [463.493531] dm_exception 41KB 42KB [463.493534] bio-4 1216KB 1216KB [463.493537] bio-3 439396KB 439396KB [463.493539] kcopyd_job 6973427KB 6973427KB ... [463.494340] Out of memory: Kill process 1298 (ruby2.3) score 1 or sacrifice child [463.494673] Killed process 1298 (ruby2.3) total-vm:435740kB, anon-rss:20180kB, file-rss:4kB, shmem-rss:0kB [463.506437] oom_reaper: reaped process 1298 (ruby2.3), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB Moreover, issuing a large number of kcopyd jobs results in kcopyd hogging the CPU, while processing them. As a result, processing of work items, queued for execution on the same CPU as the currently running kcopyd thread, is stalled for long periods of time, hurting performance. Running the aforementioned test we get, in dmesg, messages like the following: [67501.194592] BUG: workqueue lockup - pool cpus=4 node=0 flags=0x0 nice=0 stuck for 27s! [67501.195586] Showing busy workqueues and worker pools: [67501.195591] workqueue events: flags=0x0 [67501.195597] pwq 8: cpus=4 node=0 flags=0x0 nice=0 active=1/256 [67501.195611] pending: cache_reap [67501.195641] workqueue mm_percpu_wq: flags=0x8 [67501.195645] pwq 8: cpus=4 node=0 flags=0x0 nice=0 active=1/256 [67501.195656] pending: vmstat_update [67501.195682] workqueue kblockd: flags=0x18 [67501.195687] pwq 5: cpus=2 node=0 flags=0x0 nice=-20 active=1/256 [67501.195698] pending: blk_timeout_work [67501.195753] workqueue kcopyd: flags=0x8 [67501.195757] pwq 8: cpus=4 node=0 flags=0x0 nice=0 active=1/256 [67501.195768] pending: do_work [dm_mod] [67501.195802] workqueue kcopyd: flags=0x8 [67501.195806] pwq 8: cpus=4 node=0 flags=0x0 nice=0 active=1/256 [67501.195817] pending: do_work [dm_mod] [67501.195834] workqueue kcopyd: flags=0x8 [67501.195838] pwq 8: cpus=4 node=0 flags=0x0 nice=0 active=1/256 [67501.195848] pending: do_work [dm_mod] [67501.195881] workqueue kcopyd: flags=0x8 [67501.195885] pwq 8: cpus=4 node=0 flags=0x0 nice=0 active=1/256 [67501.195896] pending: do_work [dm_mod] [67501.195920] workqueue kcopyd: flags=0x8 [67501.195924] pwq 8: cpus=4 node=0 flags=0x0 nice=0 active=2/256 [67501.195935] in-flight: 67:do_work [dm_mod] [67501.195945] pending: do_work [dm_mod] [67501.195961] pool 8: cpus=4 node=0 flags=0x0 nice=0 hung=27s workers=3 idle: 129 23765 The root cause for these issues is the way dm-snapshot uses kcopyd. In particular, the lack of an explicit or implicit limit to the maximum number of in-flight COW jobs. The merging path is not affected because it implicitly limits the in-flight kcopyd jobs to one. Fix these issues by using a semaphore to limit the maximum number of in-flight kcopyd jobs. We grab the semaphore before allocating a new kcopyd job in start_copy() and start_full_bio() and release it after the job finishes in copy_callback(). The initial semaphore value is configurable through a module parameter, to allow fine tuning the maximum number of in-flight COW jobs. Setting this parameter to zero initializes the semaphore to INT_MAX. A default value of 2048 maximum in-flight kcopyd jobs was chosen. This value was decided experimentally as a trade-off between memory consumption, stalling the kernel's workqueues and maintaining a high enough throughput. Re-running the aforementioned test: * Workqueue stalls are eliminated * kcopyd's job slab cache uses a maximum of 130MB * The time taken by the test to write to the snapshot-origin target is reduced from 05m20.48s to 03m26.38s [1] https://github.com/jthornber/device-mapper-test-suite Signed-off-by: Nikos Tsironis Signed-off-by: Ilias Tsitsimpis Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-snap.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index a0613bd8ed00..b502debc6df3 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "dm.h" @@ -105,6 +106,9 @@ struct dm_snapshot { /* The on disk metadata handler */ struct dm_exception_store *store; + /* Maximum number of in-flight COW jobs. */ + struct semaphore cow_count; + struct dm_kcopyd_client *kcopyd_client; /* Wait for events based on state_bits */ @@ -145,6 +149,19 @@ struct dm_snapshot { #define RUNNING_MERGE 0 #define SHUTDOWN_MERGE 1 +/* + * Maximum number of chunks being copied on write. + * + * The value was decided experimentally as a trade-off between memory + * consumption, stalling the kernel's workqueues and maintaining a high enough + * throughput. + */ +#define DEFAULT_COW_THRESHOLD 2048 + +static int cow_threshold = DEFAULT_COW_THRESHOLD; +module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644); +MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write"); + DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, "A percentage of time allocated for copy on write"); @@ -1189,6 +1206,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } + sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX); + s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) { r = PTR_ERR(s->kcopyd_client); @@ -1560,6 +1579,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) } list_add(&pe->out_of_order_entry, lh); } + up(&s->cow_count); } /* @@ -1583,6 +1603,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) dest.count = src.count; /* Hand over to kcopyd */ + down(&s->cow_count); dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } @@ -1602,6 +1623,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe, pe->full_bio = bio; pe->full_bio_end_io = bio->bi_end_io; + down(&s->cow_count); callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, copy_callback, pe); From 8c3e1bccef9275219c2c24f8efe15d641f091bd4 Mon Sep 17 00:00:00 2001 From: Javier Barrio Date: Thu, 13 Dec 2018 01:06:29 +0100 Subject: [PATCH 1518/2608] quota: Lock s_umount in exclusive mode for Q_XQUOTA{ON,OFF} quotactls. [ Upstream commit 41c4f85cdac280d356df1f483000ecec4a8868be ] Commit 1fa5efe3622db58cb8c7b9a50665e9eb9a6c7e97 (ext4: Use generic helpers for quotaon and quotaoff) made possible to call quotactl(Q_XQUOTAON/OFF) on ext4 filesystems with sysfile quota support. This leads to calling dquot_enable/disable without s_umount held in excl. mode, because quotactl_cmd_onoff checks only for Q_QUOTAON/OFF. The following WARN_ON_ONCE triggers (in this case for dquot_enable, ext4, latest Linus' tree): [ 117.807056] EXT4-fs (dm-0): mounted filesystem with ordered data mode. Opts: quota,prjquota [...] [ 155.036847] WARNING: CPU: 0 PID: 2343 at fs/quota/dquot.c:2469 dquot_enable+0x34/0xb9 [ 155.036851] Modules linked in: quota_v2 quota_tree ipv6 af_packet joydev mousedev psmouse serio_raw pcspkr i2c_piix4 intel_agp intel_gtt e1000 ttm drm_kms_helper drm agpgart fb_sys_fops syscopyarea sysfillrect sysimgblt i2c_core input_leds kvm_intel kvm irqbypass qemu_fw_cfg floppy evdev parport_pc parport button crc32c_generic dm_mod ata_generic pata_acpi ata_piix libata loop ext4 crc16 mbcache jbd2 usb_storage usbcore sd_mod scsi_mod [ 155.036901] CPU: 0 PID: 2343 Comm: qctl Not tainted 4.20.0-rc6-00025-gf5d582777bcb #9 [ 155.036903] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 155.036911] RIP: 0010:dquot_enable+0x34/0xb9 [ 155.036915] Code: 41 56 41 55 41 54 55 53 4c 8b 6f 28 74 02 0f 0b 4d 8d 7d 70 49 89 fc 89 cb 41 89 d6 89 f5 4c 89 ff e8 23 09 ea ff 85 c0 74 0a <0f> 0b 4c 89 ff e8 8b 09 ea ff 85 db 74 6a 41 8b b5 f8 00 00 00 0f [ 155.036918] RSP: 0018:ffffb09b00493e08 EFLAGS: 00010202 [ 155.036922] RAX: 0000000000000001 RBX: 0000000000000008 RCX: 0000000000000008 [ 155.036924] RDX: 0000000000000001 RSI: 0000000000000002 RDI: ffff9781b67cd870 [ 155.036926] RBP: 0000000000000002 R08: 0000000000000000 R09: 61c8864680b583eb [ 155.036929] R10: ffffb09b00493e48 R11: ffffffffff7ce7d4 R12: ffff9781b7ee8d78 [ 155.036932] R13: ffff9781b67cd800 R14: 0000000000000004 R15: ffff9781b67cd870 [ 155.036936] FS: 00007fd813250b88(0000) GS:ffff9781ba000000(0000) knlGS:0000000000000000 [ 155.036939] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 155.036942] CR2: 00007fd812ff61d6 CR3: 000000007c882000 CR4: 00000000000006b0 [ 155.036951] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 155.036953] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 155.036955] Call Trace: [ 155.037004] dquot_quota_enable+0x8b/0xd0 [ 155.037011] kernel_quotactl+0x628/0x74e [ 155.037027] ? do_mprotect_pkey+0x2a6/0x2cd [ 155.037034] __x64_sys_quotactl+0x1a/0x1d [ 155.037041] do_syscall_64+0x55/0xe4 [ 155.037078] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 155.037105] RIP: 0033:0x7fd812fe1198 [ 155.037109] Code: 02 77 0d 48 89 c1 48 c1 e9 3f 75 04 48 8b 04 24 48 83 c4 50 5b c3 48 83 ec 08 49 89 ca 48 63 d2 48 63 ff b8 b3 00 00 00 0f 05 <48> 89 c7 e8 c1 eb ff ff 5a c3 48 63 ff b8 bb 00 00 00 0f 05 48 89 [ 155.037112] RSP: 002b:00007ffe8cd7b050 EFLAGS: 00000206 ORIG_RAX: 00000000000000b3 [ 155.037116] RAX: ffffffffffffffda RBX: 00007ffe8cd7b148 RCX: 00007fd812fe1198 [ 155.037119] RDX: 0000000000000000 RSI: 00007ffe8cd7cea9 RDI: 0000000000580102 [ 155.037121] RBP: 00007ffe8cd7b0f0 R08: 000055fc8eba8a9d R09: 0000000000000000 [ 155.037124] R10: 00007ffe8cd7b074 R11: 0000000000000206 R12: 00007ffe8cd7b168 [ 155.037126] R13: 000055fc8eba8897 R14: 0000000000000000 R15: 0000000000000000 [ 155.037131] ---[ end trace 210f864257175c51 ]--- and then the syscall proceeds without s_umount locking. This patch locks the superblock ->s_umount sem. in exclusive mode for all Q_XQUOTAON/OFF quotactls too in addition to Q_QUOTAON/OFF. AFAICT, other than ext4, only xfs and ocfs2 are affected by this change. The VFS will now call in xfs_quota_* functions with s_umount held, which wasn't the case before. This looks good to me but I can not say for sure. Ext4 and ocfs2 where already beeing called with s_umount exclusive via quota_quotaon/off which is basically the same. Signed-off-by: Javier Barrio Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/quota/quota.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 3f02bab0db4e..a89c1f05b22e 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -793,7 +793,8 @@ static int quotactl_cmd_write(int cmd) /* Return true if quotactl command is manipulating quota on/off state */ static bool quotactl_cmd_onoff(int cmd) { - return (cmd == Q_QUOTAON) || (cmd == Q_QUOTAOFF); + return (cmd == Q_QUOTAON) || (cmd == Q_QUOTAOFF) || + (cmd == Q_XQUOTAON) || (cmd == Q_XQUOTAOFF); } /* From 16df3ffca77a3d954afe15618f2da7a388386004 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sun, 25 Nov 2018 00:00:49 -0500 Subject: [PATCH 1519/2608] clocksource/drivers/integrator-ap: Add missing of_node_put() [ Upstream commit 5eb73c831171115d3b4347e1e7124a5a35d8086c ] The function of_find_node_by_path() acquires a reference to the node returned by it and that reference needs to be dropped by its caller. integrator_ap_timer_init_of() doesn't do that. The pri_node and the sec_node are used as an identifier to compare against the current node, so we can directly drop the refcount after getting the node from the path as it is not used as pointer. By dropping the refcount right after getting it, a single variable is needed instead of two. Fix this by use a single variable and drop the refcount right after of_find_node_by_path(). Signed-off-by: Yangtao Li Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin --- drivers/clocksource/timer-integrator-ap.c | 25 +++++++++++++++-------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/clocksource/timer-integrator-ap.c b/drivers/clocksource/timer-integrator-ap.c index 62d24690ba02..9701107806a7 100644 --- a/drivers/clocksource/timer-integrator-ap.c +++ b/drivers/clocksource/timer-integrator-ap.c @@ -181,8 +181,7 @@ static int __init integrator_ap_timer_init_of(struct device_node *node) int irq; struct clk *clk; unsigned long rate; - struct device_node *pri_node; - struct device_node *sec_node; + struct device_node *alias_node; base = of_io_request_and_map(node, 0, "integrator-timer"); if (IS_ERR(base)) @@ -204,7 +203,18 @@ static int __init integrator_ap_timer_init_of(struct device_node *node) return err; } - pri_node = of_find_node_by_path(path); + alias_node = of_find_node_by_path(path); + + /* + * The pointer is used as an identifier not as a pointer, we + * can drop the refcount on the of__node immediately after + * getting it. + */ + of_node_put(alias_node); + + if (node == alias_node) + /* The primary timer lacks IRQ, use as clocksource */ + return integrator_clocksource_init(rate, base); err = of_property_read_string(of_aliases, "arm,timer-secondary", &path); @@ -213,14 +223,11 @@ static int __init integrator_ap_timer_init_of(struct device_node *node) return err; } + alias_node = of_find_node_by_path(path); - sec_node = of_find_node_by_path(path); + of_node_put(alias_node); - if (node == pri_node) - /* The primary timer lacks IRQ, use as clocksource */ - return integrator_clocksource_init(rate, base); - - if (node == sec_node) { + if (node == alias_node) { /* The secondary timer will drive the clock event */ irq = irq_of_parse_and_map(node, 0); return integrator_clockevent_init(rate, base, irq); From 7afd8e3b85e318dd52b745e83c3a55c5fbf929c3 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 19 Dec 2018 20:00:42 +0900 Subject: [PATCH 1520/2608] ALSA: bebob: fix model-id of unit for Apogee Ensemble [ Upstream commit 644b2e97405b0b74845e1d3c2b4fe4c34858062b ] This commit fixes hard-coded model-id for an unit of Apogee Ensemble with a correct value. This unit uses DM1500 ASIC produced ArchWave AG (formerly known as BridgeCo AG). I note that this model supports three modes in the number of data channels in tx/rx streams; 8 ch pairs, 10 ch pairs, 18 ch pairs. The mode is switched by Vendor-dependent AV/C command, like: $ cd linux-firewire-utils $ ./firewire-request /dev/fw1 fcp 0x00ff000003dbeb0600000000 (8ch pairs) $ ./firewire-request /dev/fw1 fcp 0x00ff000003dbeb0601000000 (10ch pairs) $ ./firewire-request /dev/fw1 fcp 0x00ff000003dbeb0602000000 (18ch pairs) When switching between different mode, the unit disappears from IEEE 1394 bus, then appears on the bus with different combination of stream formats. In a mode of 18 ch pairs, available sampling rate is up to 96.0 kHz, else up to 192.0 kHz. $ ./hinawa-config-rom-printer /dev/fw1 { 'bus-info': { 'adj': False, 'bmc': True, 'chip_ID': 21474898341, 'cmc': True, 'cyc_clk_acc': 100, 'generation': 2, 'imc': True, 'isc': True, 'link_spd': 2, 'max_ROM': 1, 'max_rec': 512, 'name': '1394', 'node_vendor_ID': 987, 'pmc': False}, 'root-directory': [ ['HARDWARE_VERSION', 19], [ 'NODE_CAPABILITIES', { 'addressing': {'64': True, 'fix': True, 'prv': False}, 'misc': {'int': False, 'ms': False, 'spt': True}, 'state': { 'atn': False, 'ded': False, 'drq': True, 'elo': False, 'init': False, 'lst': True, 'off': False}, 'testing': {'bas': False, 'ext': False}}], ['VENDOR', 987], ['DESCRIPTOR', 'Apogee Electronics'], ['MODEL', 126702], ['DESCRIPTOR', 'Ensemble'], ['VERSION', 5297], [ 'UNIT', [ ['SPECIFIER_ID', 41005], ['VERSION', 65537], ['MODEL', 126702], ['DESCRIPTOR', 'Ensemble']]], [ 'DEPENDENT_INFO', [ ['SPECIFIER_ID', 2037], ['VERSION', 1], [(58, 'IMMEDIATE'), 16777159], [(59, 'IMMEDIATE'), 1048576], [(60, 'IMMEDIATE'), 16777159], [(61, 'IMMEDIATE'), 6291456]]]]} Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/firewire/bebob/bebob.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/bebob/bebob.c b/sound/firewire/bebob/bebob.c index 93676354f87f..de4af8a41ff0 100644 --- a/sound/firewire/bebob/bebob.c +++ b/sound/firewire/bebob/bebob.c @@ -434,7 +434,7 @@ static const struct ieee1394_device_id bebob_id_table[] = { /* Apogee Electronics, DA/AD/DD-16X (X-FireWire card) */ SND_BEBOB_DEV_ENTRY(VEN_APOGEE, 0x00010048, &spec_normal), /* Apogee Electronics, Ensemble */ - SND_BEBOB_DEV_ENTRY(VEN_APOGEE, 0x00001eee, &spec_normal), + SND_BEBOB_DEV_ENTRY(VEN_APOGEE, 0x01eeee, &spec_normal), /* ESI, Quatafire610 */ SND_BEBOB_DEV_ENTRY(VEN_ESI, 0x00010064, &spec_normal), /* AcousticReality, eARMasterOne */ From d9c25a6fe016afa9c511d1439db8ca5c88a7582a Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 19 Dec 2018 13:39:09 +0100 Subject: [PATCH 1521/2608] sysfs: Disable lockdep for driver bind/unbind files [ Upstream commit 4f4b374332ec0ae9c738ff8ec9bed5cd97ff9adc ] This is the much more correct fix for my earlier attempt at: https://lkml.org/lkml/2018/12/10/118 Short recap: - There's not actually a locking issue, it's just lockdep being a bit too eager to complain about a possible deadlock. - Contrary to what I claimed the real problem is recursion on kn->count. Greg pointed me at sysfs_break_active_protection(), used by the scsi subsystem to allow a sysfs file to unbind itself. That would be a real deadlock, which isn't what's happening here. Also, breaking the active protection means we'd need to manually handle all the lifetime fun. - With Rafael we discussed the task_work approach, which kinda works, but has two downsides: It's a functional change for a lockdep annotation issue, and it won't work for the bind file (which needs to get the errno from the driver load function back to userspace). - Greg also asked why this never showed up: To hit this you need to unregister a 2nd driver from the unload code of your first driver. I guess only gpus do that. The bug has always been there, but only with a recent patch series did we add more locks so that lockdep built a chain from unbinding the snd-hda driver to the acpi_video_unregister call. Full lockdep splat: [12301.898799] ============================================ [12301.898805] WARNING: possible recursive locking detected [12301.898811] 4.20.0-rc7+ #84 Not tainted [12301.898815] -------------------------------------------- [12301.898821] bash/5297 is trying to acquire lock: [12301.898826] 00000000f61c6093 (kn->count#39){++++}, at: kernfs_remove_by_name_ns+0x3b/0x80 [12301.898841] but task is already holding lock: [12301.898847] 000000005f634021 (kn->count#39){++++}, at: kernfs_fop_write+0xdc/0x190 [12301.898856] other info that might help us debug this: [12301.898862] Possible unsafe locking scenario: [12301.898867] CPU0 [12301.898870] ---- [12301.898874] lock(kn->count#39); [12301.898879] lock(kn->count#39); [12301.898883] *** DEADLOCK *** [12301.898891] May be due to missing lock nesting notation [12301.898899] 5 locks held by bash/5297: [12301.898903] #0: 00000000cd800e54 (sb_writers#4){.+.+}, at: vfs_write+0x17f/0x1b0 [12301.898915] #1: 000000000465e7c2 (&of->mutex){+.+.}, at: kernfs_fop_write+0xd3/0x190 [12301.898925] #2: 000000005f634021 (kn->count#39){++++}, at: kernfs_fop_write+0xdc/0x190 [12301.898936] #3: 00000000414ef7ac (&dev->mutex){....}, at: device_release_driver_internal+0x34/0x240 [12301.898950] #4: 000000003218fbdf (register_count_mutex){+.+.}, at: acpi_video_unregister+0xe/0x40 [12301.898960] stack backtrace: [12301.898968] CPU: 1 PID: 5297 Comm: bash Not tainted 4.20.0-rc7+ #84 [12301.898974] Hardware name: Hewlett-Packard HP EliteBook 8460p/161C, BIOS 68SCF Ver. F.01 03/11/2011 [12301.898982] Call Trace: [12301.898989] dump_stack+0x67/0x9b [12301.898997] __lock_acquire+0x6ad/0x1410 [12301.899003] ? kernfs_remove_by_name_ns+0x3b/0x80 [12301.899010] ? find_held_lock+0x2d/0x90 [12301.899017] ? mutex_spin_on_owner+0xe4/0x150 [12301.899023] ? find_held_lock+0x2d/0x90 [12301.899030] ? lock_acquire+0x90/0x180 [12301.899036] lock_acquire+0x90/0x180 [12301.899042] ? kernfs_remove_by_name_ns+0x3b/0x80 [12301.899049] __kernfs_remove+0x296/0x310 [12301.899055] ? kernfs_remove_by_name_ns+0x3b/0x80 [12301.899060] ? kernfs_name_hash+0xd/0x80 [12301.899066] ? kernfs_find_ns+0x6c/0x100 [12301.899073] kernfs_remove_by_name_ns+0x3b/0x80 [12301.899080] bus_remove_driver+0x92/0xa0 [12301.899085] acpi_video_unregister+0x24/0x40 [12301.899127] i915_driver_unload+0x42/0x130 [i915] [12301.899160] i915_pci_remove+0x19/0x30 [i915] [12301.899169] pci_device_remove+0x36/0xb0 [12301.899176] device_release_driver_internal+0x185/0x240 [12301.899183] unbind_store+0xaf/0x180 [12301.899189] kernfs_fop_write+0x104/0x190 [12301.899195] __vfs_write+0x31/0x180 [12301.899203] ? rcu_read_lock_sched_held+0x6f/0x80 [12301.899209] ? rcu_sync_lockdep_assert+0x29/0x50 [12301.899216] ? __sb_start_write+0x13c/0x1a0 [12301.899221] ? vfs_write+0x17f/0x1b0 [12301.899227] vfs_write+0xb9/0x1b0 [12301.899233] ksys_write+0x50/0xc0 [12301.899239] do_syscall_64+0x4b/0x180 [12301.899247] entry_SYSCALL_64_after_hwframe+0x49/0xbe [12301.899253] RIP: 0033:0x7f452ac7f7a4 [12301.899259] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 80 00 00 00 00 8b 05 aa f0 2c 00 48 63 ff 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 f3 c3 66 90 55 53 48 89 d5 48 89 f3 48 83 [12301.899273] RSP: 002b:00007ffceafa6918 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [12301.899282] RAX: ffffffffffffffda RBX: 000000000000000d RCX: 00007f452ac7f7a4 [12301.899288] RDX: 000000000000000d RSI: 00005612a1abf7c0 RDI: 0000000000000001 [12301.899295] RBP: 00005612a1abf7c0 R08: 000000000000000a R09: 00005612a1c46730 [12301.899301] R10: 000000000000000a R11: 0000000000000246 R12: 000000000000000d [12301.899308] R13: 0000000000000001 R14: 00007f452af4a740 R15: 000000000000000d Looking around I've noticed that usb and i2c already handle similar recursion problems, where a sysfs file can unbind the same type of sysfs somewhere else in the hierarchy. Relevant commits are: commit 356c05d58af05d582e634b54b40050c73609617b Author: Alan Stern Date: Mon May 14 13:30:03 2012 -0400 sysfs: get rid of some lockdep false positives commit e9b526fe704812364bca07edd15eadeba163ebfb Author: Alexander Sverdlin Date: Fri May 17 14:56:35 2013 +0200 i2c: suppress lockdep warning on delete_device Implement the same trick for driver bind/unbind. v2: Put the macro into bus.c (Greg). Reviewed-by: Rafael J. Wysocki Cc: Ramalingam C Cc: Arend van Spriel Cc: Andy Shevchenko Cc: Geert Uytterhoeven Cc: Bartosz Golaszewski Cc: Heikki Krogerus Cc: Vivek Gautam Cc: Joe Perches Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/bus.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 22a64fd3309b..1cf1460f8c90 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -33,6 +33,9 @@ static struct kset *system_kset; #define to_drv_attr(_attr) container_of(_attr, struct driver_attribute, attr) +#define DRIVER_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \ + struct driver_attribute driver_attr_##_name = \ + __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) static int __must_check bus_rescan_devices_helper(struct device *dev, void *data); @@ -197,7 +200,7 @@ static ssize_t unbind_store(struct device_driver *drv, const char *buf, bus_put(bus); return err; } -static DRIVER_ATTR_WO(unbind); +static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, S_IWUSR, NULL, unbind_store); /* * Manually attach a device to a driver. @@ -233,7 +236,7 @@ static ssize_t bind_store(struct device_driver *drv, const char *buf, bus_put(bus); return err; } -static DRIVER_ATTR_WO(bind); +static DRIVER_ATTR_IGNORE_LOCKDEP(bind, S_IWUSR, NULL, bind_store); static ssize_t show_drivers_autoprobe(struct bus_type *bus, char *buf) { From 9d3e1c0981c68e655707a3b0ab2e33e294ed40f8 Mon Sep 17 00:00:00 2001 From: Parvi Kaustubhi Date: Tue, 11 Dec 2018 14:15:42 -0800 Subject: [PATCH 1522/2608] IB/usnic: Fix potential deadlock [ Upstream commit 8036e90f92aae2784b855a0007ae2d8154d28b3c ] Acquiring the rtnl lock while holding usdev_lock could result in a deadlock. For example: usnic_ib_query_port() | mutex_lock(&us_ibdev->usdev_lock) | ib_get_eth_speed() | rtnl_lock() rtnl_lock() | usnic_ib_netdevice_event() | mutex_lock(&us_ibdev->usdev_lock) This commit moves the usdev_lock acquisition after the rtnl lock has been released. This is safe to do because usdev_lock is not protecting anything being accessed in ib_get_eth_speed(). Hence, the correct order of holding locks (rtnl -> usdev_lock) is not violated. Signed-off-by: Parvi Kaustubhi Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 3c3453d213dc..fdfa25059723 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -310,13 +310,16 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, usnic_dbg("\n"); - mutex_lock(&us_ibdev->usdev_lock); if (ib_get_eth_speed(ibdev, port, &props->active_speed, - &props->active_width)) { - mutex_unlock(&us_ibdev->usdev_lock); + &props->active_width)) return -EINVAL; - } + /* + * usdev_lock is acquired after (and not before) ib_get_eth_speed call + * because acquiring rtnl_lock in ib_get_eth_speed, while holding + * usdev_lock could lead to a deadlock. + */ + mutex_lock(&us_ibdev->usdev_lock); /* props being zeroed by the caller, avoid zeroing it here */ props->lid = 0; From ceecd0c696456068c2a0e1f0d3c32e5290d115ab Mon Sep 17 00:00:00 2001 From: Kevin Barnett Date: Fri, 7 Dec 2018 16:29:51 -0600 Subject: [PATCH 1523/2608] scsi: smartpqi: correct lun reset issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2ba55c9851d74eb015a554ef69ddf2ef061d5780 ] Problem: The Linux kernel takes a logical volume offline after a LUN reset. This is generally accompanied by this message in the dmesg output: Device offlined - not ready after error recovery Root Cause: The root cause is a "quirk" in the timeout handling in the Linux SCSI layer. The Linux kernel places a 30-second timeout on most media access commands (reads and writes) that it send to device drivers. When a media access command times out, the Linux kernel goes into error recovery mode for the LUN that was the target of the command that timed out. Every command that timed out is kept on a list inside of the Linux kernel to be retried later. The kernel attempts to recover the command(s) that timed out by issuing a LUN reset followed by a TEST UNIT READY. If the LUN reset and TEST UNIT READY commands are successful, the kernel retries the command(s) that timed out. Each SCSI command issued by the kernel has a result field associated with it. This field indicates the final result of the command (success or error). When a command times out, the kernel places a value in this result field indicating that the command timed out. The "quirk" is that after the LUN reset and TEST UNIT READY commands are completed, the kernel checks each command on the timed-out command list before retrying it. If the result field is still "timed out", the kernel treats that command as not having been successfully recovered for a retry. If the number of commands that are in this state are greater than two, the kernel takes the LUN offline. Fix: When our RAIDStack receives a LUN reset, it simply waits until all outstanding commands complete. Generally, all of these outstanding commands complete successfully. Therefore, the fix in the smartpqi driver is to always set the command result field to indicate success when a request completes successfully. This normally isn’t necessary because the result field is always initialized to success when the command is submitted to the driver. So when the command completes successfully, the result field is left untouched. But in this case, the kernel changes the result field behind the driver’s back and then expects the field to be changed by the driver as the commands that timed-out complete. Reviewed-by: Dave Carroll Reviewed-by: Scott Teel Signed-off-by: Kevin Barnett Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/smartpqi/smartpqi_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 83bdbd84eb01..b662f58203ac 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -2709,6 +2709,9 @@ static unsigned int pqi_process_io_intr(struct pqi_ctrl_info *ctrl_info, switch (response->header.iu_type) { case PQI_RESPONSE_IU_RAID_PATH_IO_SUCCESS: case PQI_RESPONSE_IU_AIO_PATH_IO_SUCCESS: + if (io_request->scmd) + io_request->scmd->result = 0; + /* fall through */ case PQI_RESPONSE_IU_GENERAL_MANAGEMENT: break; case PQI_RESPONSE_IU_TASK_MANAGEMENT: From 008b4a703b2c45137a60be6a96838e1485598aec Mon Sep 17 00:00:00 2001 From: Yanjiang Jin Date: Thu, 20 Dec 2018 16:32:35 +0800 Subject: [PATCH 1524/2608] scsi: smartpqi: call pqi_free_interrupts() in pqi_shutdown() [ Upstream commit e57b2945aa654e48f85a41e8917793c64ecb9de8 ] We must free all irqs during shutdown, else kexec's 2nd kernel would hang in pqi_wait_for_completion_io() as below: Call trace: pqi_wait_for_completion_io pqi_submit_raid_request_synchronous.constprop.78+0x23c/0x310 [smartpqi] pqi_configure_events+0xec/0x1f8 [smartpqi] pqi_ctrl_init+0x814/0xca0 [smartpqi] pqi_pci_probe+0x400/0x46c [smartpqi] local_pci_probe+0x48/0xb0 pci_device_probe+0x14c/0x1b0 really_probe+0x218/0x3fc driver_probe_device+0x70/0x140 __driver_attach+0x11c/0x134 bus_for_each_dev+0x70/0xc8 driver_attach+0x30/0x38 bus_add_driver+0x1f0/0x294 driver_register+0x74/0x12c __pci_register_driver+0x64/0x70 pqi_init+0xd0/0x10000 [smartpqi] do_one_initcall+0x60/0x1d8 do_init_module+0x64/0x1f8 load_module+0x10ec/0x1350 __se_sys_finit_module+0xd4/0x100 __arm64_sys_finit_module+0x28/0x34 el0_svc_handler+0x104/0x160 el0_svc+0x8/0xc This happens only in the following combinations: 1. smartpqi is built as module, not built-in; 2. We have a disk connected to smartpqi card; 3. Both kexec's 1st and 2nd kernels use this disk as Rootfs' mount point. Signed-off-by: Yanjiang Jin Acked-by: Don Brace Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/smartpqi/smartpqi_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index b662f58203ac..bc15999f1c7c 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -6703,6 +6703,7 @@ static void pqi_shutdown(struct pci_dev *pci_dev) * storage. */ rc = pqi_flush_cache(ctrl_info, SHUTDOWN); + pqi_free_interrupts(ctrl_info); pqi_reset(ctrl_info); if (rc == 0) return; From c010d4948a768ac1139968ab6e87701d0d02f536 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 13 Dec 2018 08:27:27 -0500 Subject: [PATCH 1525/2608] scsi: megaraid: fix out-of-bound array accesses [ Upstream commit c7a082e4242fd8cd21a441071e622f87c16bdacc ] UBSAN reported those with MegaRAID SAS-3 3108, [ 77.467308] UBSAN: Undefined behaviour in drivers/scsi/megaraid/megaraid_sas_fp.c:117:32 [ 77.475402] index 255 is out of range for type 'MR_LD_SPAN_MAP [1]' [ 77.481677] CPU: 16 PID: 333 Comm: kworker/16:1 Not tainted 4.20.0-rc5+ #1 [ 77.488556] Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.50 06/01/2018 [ 77.495791] Workqueue: events work_for_cpu_fn [ 77.500154] Call trace: [ 77.502610] dump_backtrace+0x0/0x2c8 [ 77.506279] show_stack+0x24/0x30 [ 77.509604] dump_stack+0x118/0x19c [ 77.513098] ubsan_epilogue+0x14/0x60 [ 77.516765] __ubsan_handle_out_of_bounds+0xfc/0x13c [ 77.521767] mr_update_load_balance_params+0x150/0x158 [megaraid_sas] [ 77.528230] MR_ValidateMapInfo+0x2cc/0x10d0 [megaraid_sas] [ 77.533825] megasas_get_map_info+0x244/0x2f0 [megaraid_sas] [ 77.539505] megasas_init_adapter_fusion+0x9b0/0xf48 [megaraid_sas] [ 77.545794] megasas_init_fw+0x1ab4/0x3518 [megaraid_sas] [ 77.551212] megasas_probe_one+0x2c4/0xbe0 [megaraid_sas] [ 77.556614] local_pci_probe+0x7c/0xf0 [ 77.560365] work_for_cpu_fn+0x34/0x50 [ 77.564118] process_one_work+0x61c/0xf08 [ 77.568129] worker_thread+0x534/0xa70 [ 77.571882] kthread+0x1c8/0x1d0 [ 77.575114] ret_from_fork+0x10/0x1c [ 89.240332] UBSAN: Undefined behaviour in drivers/scsi/megaraid/megaraid_sas_fp.c:117:32 [ 89.248426] index 255 is out of range for type 'MR_LD_SPAN_MAP [1]' [ 89.254700] CPU: 16 PID: 95 Comm: kworker/u130:0 Not tainted 4.20.0-rc5+ #1 [ 89.261665] Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.50 06/01/2018 [ 89.268903] Workqueue: events_unbound async_run_entry_fn [ 89.274222] Call trace: [ 89.276680] dump_backtrace+0x0/0x2c8 [ 89.280348] show_stack+0x24/0x30 [ 89.283671] dump_stack+0x118/0x19c [ 89.287167] ubsan_epilogue+0x14/0x60 [ 89.290835] __ubsan_handle_out_of_bounds+0xfc/0x13c [ 89.295828] MR_LdRaidGet+0x50/0x58 [megaraid_sas] [ 89.300638] megasas_build_io_fusion+0xbb8/0xd90 [megaraid_sas] [ 89.306576] megasas_build_and_issue_cmd_fusion+0x138/0x460 [megaraid_sas] [ 89.313468] megasas_queue_command+0x398/0x3d0 [megaraid_sas] [ 89.319222] scsi_dispatch_cmd+0x1dc/0x8a8 [ 89.323321] scsi_request_fn+0x8e8/0xdd0 [ 89.327249] __blk_run_queue+0xc4/0x158 [ 89.331090] blk_execute_rq_nowait+0xf4/0x158 [ 89.335449] blk_execute_rq+0xdc/0x158 [ 89.339202] __scsi_execute+0x130/0x258 [ 89.343041] scsi_probe_and_add_lun+0x2fc/0x1488 [ 89.347661] __scsi_scan_target+0x1cc/0x8c8 [ 89.351848] scsi_scan_channel.part.3+0x8c/0xc0 [ 89.356382] scsi_scan_host_selected+0x130/0x1f0 [ 89.361002] do_scsi_scan_host+0xd8/0xf0 [ 89.364927] do_scan_async+0x9c/0x320 [ 89.368594] async_run_entry_fn+0x138/0x420 [ 89.372780] process_one_work+0x61c/0xf08 [ 89.376793] worker_thread+0x13c/0xa70 [ 89.380546] kthread+0x1c8/0x1d0 [ 89.383778] ret_from_fork+0x10/0x1c This is because when populating Driver Map using firmware raid map, all non-existing VDs set their ldTgtIdToLd to 0xff, so it can be skipped later. From drivers/scsi/megaraid/megaraid_sas_base.c , memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS); From drivers/scsi/megaraid/megaraid_sas_fp.c , /* For non existing VDs, iterate to next VD*/ if (ld >= (MAX_LOGICAL_DRIVES_EXT - 1)) continue; However, there are a few places that failed to skip those non-existing VDs due to off-by-one errors. Then, those 0xff leaked into MR_LdRaidGet(0xff, map) and triggered the out-of-bound accesses. Fixes: 51087a8617fe ("megaraid_sas : Extended VD support") Signed-off-by: Qian Cai Acked-by: Sumit Saxena Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/megaraid/megaraid_sas_fp.c | 2 +- drivers/scsi/megaraid/megaraid_sas_fusion.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c index f2ffde430ec1..9a2a62e39e4c 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fp.c +++ b/drivers/scsi/megaraid/megaraid_sas_fp.c @@ -1266,7 +1266,7 @@ void mr_update_load_balance_params(struct MR_DRV_RAID_MAP_ALL *drv_map, for (ldCount = 0; ldCount < MAX_LOGICAL_DRIVES_EXT; ldCount++) { ld = MR_TargetIdToLdGet(ldCount, drv_map); - if (ld >= MAX_LOGICAL_DRIVES_EXT) { + if (ld >= MAX_LOGICAL_DRIVES_EXT - 1) { lbInfo[ldCount].loadBalanceFlag = 0; continue; } diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 06a2e3d9fc5b..7be2b9e11332 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2529,7 +2529,7 @@ static void megasas_build_ld_nonrw_fusion(struct megasas_instance *instance, device_id < instance->fw_supported_vd_count)) { ld = MR_TargetIdToLdGet(device_id, local_map_ptr); - if (ld >= instance->fw_supported_vd_count) + if (ld >= instance->fw_supported_vd_count - 1) fp_possible = 0; else { raid = MR_LdRaidGet(ld, local_map_ptr); From f976e59e25950cce40926f843bdf26404a59b3a9 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Fri, 28 Dec 2018 00:32:50 -0800 Subject: [PATCH 1526/2608] ocfs2: fix panic due to unrecovered local alloc [ Upstream commit 532e1e54c8140188e192348c790317921cb2dc1c ] mount.ocfs2 ignore the inconsistent error that journal is clean but local alloc is unrecovered. After mount, local alloc not empty, then reserver cluster didn't alloc a new local alloc window, reserveration map is empty(ocfs2_reservation_map.m_bitmap_len = 0), that triggered the following panic. This issue was reported at https://oss.oracle.com/pipermail/ocfs2-devel/2015-May/010854.html and was advised to fixed during mount. But this is a very unusual inconsistent state, usually journal dirty flag should be cleared at the last stage of umount until every other things go right. We may need do further debug to check that. Any way to avoid possible futher corruption, mount should be abort and fsck should be run. (mount.ocfs2,1765,1):ocfs2_load_local_alloc:353 ERROR: Local alloc hasn't been recovered! found = 6518, set = 6518, taken = 8192, off = 15912372 ocfs2: Mounting device (202,64) on (node 0, slot 3) with ordered data mode. o2dlm: Joining domain 89CEAC63CC4F4D03AC185B44E0EE0F3F ( 0 1 2 3 4 5 6 8 ) 8 nodes ocfs2: Mounting device (202,80) on (node 0, slot 3) with ordered data mode. o2hb: Region 89CEAC63CC4F4D03AC185B44E0EE0F3F (xvdf) is now a quorum device o2net: Accepted connection from node yvwsoa17p (num 7) at 172.22.77.88:7777 o2dlm: Node 7 joins domain 64FE421C8C984E6D96ED12C55FEE2435 ( 0 1 2 3 4 5 6 7 8 ) 9 nodes o2dlm: Node 7 joins domain 89CEAC63CC4F4D03AC185B44E0EE0F3F ( 0 1 2 3 4 5 6 7 8 ) 9 nodes ------------[ cut here ]------------ kernel BUG at fs/ocfs2/reservations.c:507! invalid opcode: 0000 [#1] SMP Modules linked in: ocfs2 rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs fscache lockd grace ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs sunrpc ipt_REJECT nf_reject_ipv4 nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr ipv6 ovmapi ppdev parport_pc parport xen_netfront fb_sys_fops sysimgblt sysfillrect syscopyarea acpi_cpufreq pcspkr i2c_piix4 i2c_core sg ext4 jbd2 mbcache2 sr_mod cdrom xen_blkfront pata_acpi ata_generic ata_piix floppy dm_mirror dm_region_hash dm_log dm_mod CPU: 0 PID: 4349 Comm: startWebLogic.s Not tainted 4.1.12-124.19.2.el6uek.x86_64 #2 Hardware name: Xen HVM domU, BIOS 4.4.4OVM 09/06/2018 task: ffff8803fb04e200 ti: ffff8800ea4d8000 task.ti: ffff8800ea4d8000 RIP: 0010:[] [] __ocfs2_resv_find_window+0x498/0x760 [ocfs2] Call Trace: ocfs2_resmap_resv_bits+0x10d/0x400 [ocfs2] ocfs2_claim_local_alloc_bits+0xd0/0x640 [ocfs2] __ocfs2_claim_clusters+0x178/0x360 [ocfs2] ocfs2_claim_clusters+0x1f/0x30 [ocfs2] ocfs2_convert_inline_data_to_extents+0x634/0xa60 [ocfs2] ocfs2_write_begin_nolock+0x1c6/0x1da0 [ocfs2] ocfs2_write_begin+0x13e/0x230 [ocfs2] generic_perform_write+0xbf/0x1c0 __generic_file_write_iter+0x19c/0x1d0 ocfs2_file_write_iter+0x589/0x1360 [ocfs2] __vfs_write+0xb8/0x110 vfs_write+0xa9/0x1b0 SyS_write+0x46/0xb0 system_call_fastpath+0x18/0xd7 Code: ff ff 8b 75 b8 39 75 b0 8b 45 c8 89 45 98 0f 84 e5 fe ff ff 45 8b 74 24 18 41 8b 54 24 1c e9 56 fc ff ff 85 c0 0f 85 48 ff ff ff <0f> 0b 48 8b 05 cf c3 de ff 48 ba 00 00 00 00 00 00 00 10 48 85 RIP __ocfs2_resv_find_window+0x498/0x760 [ocfs2] RSP ---[ end trace 566f07529f2edf3c ]--- Kernel panic - not syncing: Fatal exception Kernel Offset: disabled Link: http://lkml.kernel.org/r/20181121020023.3034-2-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Reviewed-by: Yiwen Jiang Acked-by: Joseph Qi Cc: Jun Piao Cc: Mark Fasheh Cc: Joel Becker Cc: Changwei Ge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/localalloc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index fe0d1f9571bb..5d53d0d63d19 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -345,13 +345,18 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) if (num_used || alloc->id1.bitmap1.i_used || alloc->id1.bitmap1.i_total - || la->la_bm_off) - mlog(ML_ERROR, "Local alloc hasn't been recovered!\n" + || la->la_bm_off) { + mlog(ML_ERROR, "inconsistent detected, clean journal with" + " unrecovered local alloc, please run fsck.ocfs2!\n" "found = %u, set = %u, taken = %u, off = %u\n", num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), le32_to_cpu(alloc->id1.bitmap1.i_total), OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); + status = -EINVAL; + goto bail; + } + osb->local_alloc_bh = alloc_bh; osb->local_alloc_state = OCFS2_LA_ENABLED; From 694c20fe01729c6b96f5fb7dbeb8bb85b7c9af91 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 28 Dec 2018 00:37:20 -0800 Subject: [PATCH 1527/2608] mm/page-writeback.c: don't break integrity writeback on ->writepage() error [ Upstream commit 3fa750dcf29e8606e3969d13d8e188cc1c0f511d ] write_cache_pages() is used in both background and integrity writeback scenarios by various filesystems. Background writeback is mostly concerned with cleaning a certain number of dirty pages based on various mm heuristics. It may not write the full set of dirty pages or wait for I/O to complete. Integrity writeback is responsible for persisting a set of dirty pages before the writeback job completes. For example, an fsync() call must perform integrity writeback to ensure data is on disk before the call returns. write_cache_pages() unconditionally breaks out of its processing loop in the event of a ->writepage() error. This is fine for background writeback, which had no strict requirements and will eventually come around again. This can cause problems for integrity writeback on filesystems that might need to clean up state associated with failed page writeouts. For example, XFS performs internal delayed allocation accounting before returning a ->writepage() error, where applicable. If the current writeback happens to be associated with an unmount and write_cache_pages() completes the writeback prematurely due to error, the filesystem is unmounted in an inconsistent state if dirty+delalloc pages still exist. To handle this problem, update write_cache_pages() to always process the full set of pages for integrity writeback regardless of ->writepage() errors. Save the first encountered error and return it to the caller once complete. This facilitates XFS (or any other fs that expects integrity writeback to process the entire set of dirty pages) to clean up its internal state completely in the event of persistent mapping errors. Background writeback continues to exit on the first error encountered. [akpm@linux-foundation.org: fix typo in comment] Link: http://lkml.kernel.org/r/20181116134304.32440-1-bfoster@redhat.com Signed-off-by: Brian Foster Reviewed-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/page-writeback.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3175ac850a53..e001de5ac50c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2157,6 +2157,7 @@ int write_cache_pages(struct address_space *mapping, { int ret = 0; int done = 0; + int error; struct pagevec pvec; int nr_pages; pgoff_t uninitialized_var(writeback_index); @@ -2253,25 +2254,31 @@ continue_unlock: goto continue_unlock; trace_wbc_writepage(wbc, inode_to_bdi(mapping->host)); - ret = (*writepage)(page, wbc, data); - if (unlikely(ret)) { - if (ret == AOP_WRITEPAGE_ACTIVATE) { + error = (*writepage)(page, wbc, data); + if (unlikely(error)) { + /* + * Handle errors according to the type of + * writeback. There's no need to continue for + * background writeback. Just push done_index + * past this page so media errors won't choke + * writeout for the entire file. For integrity + * writeback, we must process the entire dirty + * set regardless of errors because the fs may + * still have state to clear for each page. In + * that case we continue processing and return + * the first error. + */ + if (error == AOP_WRITEPAGE_ACTIVATE) { unlock_page(page); - ret = 0; - } else { - /* - * done_index is set past this page, - * so media errors will not choke - * background writeout for the entire - * file. This has consequences for - * range_cyclic semantics (ie. it may - * not be suitable for data integrity - * writeout). - */ + error = 0; + } else if (wbc->sync_mode != WB_SYNC_ALL) { + ret = error; done_index = page->index + 1; done = 1; break; } + if (!ret) + ret = error; } /* From 4fb12a087a80dbedf56e4157b755bdea958aae7e Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Fri, 28 Dec 2018 00:34:39 -0800 Subject: [PATCH 1528/2608] mm/swap: use nr_node_ids for avail_lists in swap_info_struct [ Upstream commit 66f71da9dd38af17dc17209cdde7987d4679a699 ] Since a2468cc9bfdf ("swap: choose swap device according to numa node"), avail_lists field of swap_info_struct is changed to an array with MAX_NUMNODES elements. This made swap_info_struct size increased to 40KiB and needs an order-4 page to hold it. This is not optimal in that: 1 Most systems have way less than MAX_NUMNODES(1024) nodes so it is a waste of memory; 2 It could cause swapon failure if the swap device is swapped on after system has been running for a while, due to no order-4 page is available as pointed out by Vasily Averin. Solve the above two issues by using nr_node_ids(which is the actual possible node number the running system has) for avail_lists instead of MAX_NUMNODES. nr_node_ids is unknown at compile time so can't be directly used when declaring this array. What I did here is to declare avail_lists as zero element array and allocate space for it when allocating space for swap_info_struct. The reason why keep using array but not pointer is plist_for_each_entry needs the field to be part of the struct, so pointer will not work. This patch is on top of Vasily Averin's fix commit. I think the use of kvzalloc for swap_info_struct is still needed in case nr_node_ids is really big on some systems. Link: http://lkml.kernel.org/r/20181115083847.GA11129@intel.com Signed-off-by: Aaron Lu Reviewed-by: Andrew Morton Acked-by: Michal Hocko Cc: Vasily Averin Cc: Huang Ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/swap.h | 11 ++++++++++- mm/swapfile.c | 3 ++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index f02fb5db8914..4fd1ab9565ba 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -231,7 +231,6 @@ struct swap_info_struct { unsigned long flags; /* SWP_USED etc: see above */ signed short prio; /* swap priority of this type */ struct plist_node list; /* entry in swap_active_head */ - struct plist_node avail_lists[MAX_NUMNODES];/* entry in swap_avail_heads */ signed char type; /* strange name for an index */ unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ @@ -272,6 +271,16 @@ struct swap_info_struct { */ struct work_struct discard_work; /* discard worker */ struct swap_cluster_list discard_clusters; /* discard clusters list */ + struct plist_node avail_lists[0]; /* + * entries in swap_avail_heads, one + * entry per node. + * Must be last as the number of the + * array is nr_node_ids, which is not + * a fixed value so have to allocate + * dynamically. + * And it has to be an array so that + * plist_for_each_* can work. + */ }; #ifdef CONFIG_64BIT diff --git a/mm/swapfile.c b/mm/swapfile.c index af3c4c5a0b4e..4f9e522643a2 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2830,8 +2830,9 @@ static struct swap_info_struct *alloc_swap_info(void) struct swap_info_struct *p; unsigned int type; int i; + int size = sizeof(*p) + nr_node_ids * sizeof(struct plist_node); - p = kvzalloc(sizeof(*p), GFP_KERNEL); + p = kvzalloc(size, GFP_KERNEL); if (!p) return ERR_PTR(-ENOMEM); From 696ce77bba5ca4e486a9b6ed4d27a41fcd2f7169 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 28 Dec 2018 00:38:17 -0800 Subject: [PATCH 1529/2608] mm, proc: be more verbose about unstable VMA flags in /proc//smaps [ Upstream commit 7550c6079846a24f30d15ac75a941c8515dbedfb ] Patch series "THP eligibility reporting via proc". This series of three patches aims at making THP eligibility reporting much more robust and long term sustainable. The trigger for the change is a regression report [2] and the long follow up discussion. In short the specific application didn't have good API to query whether a particular mapping can be backed by THP so it has used VMA flags to workaround that. These flags represent a deep internal state of VMAs and as such they should be used by userspace with a great deal of caution. A similar has happened for [3] when users complained that VM_MIXEDMAP is no longer set on DAX mappings. Again a lack of a proper API led to an abuse. The first patch in the series tries to emphasise that that the semantic of flags might change and any application consuming those should be really careful. The remaining two patches provide a more suitable interface to address [2] and provide a consistent API to query the THP status both for each VMA and process wide as well. [1] http://lkml.kernel.org/r/20181120103515.25280-1-mhocko@kernel.org [2] http://lkml.kernel.org/r/http://lkml.kernel.org/r/alpine.DEB.2.21.1809241054050.224429@chino.kir.corp.google.com [3] http://lkml.kernel.org/r/20181002100531.GC4135@quack2.suse.cz This patch (of 3): Even though vma flags exported via /proc//smaps are explicitly documented to be not guaranteed for future compatibility the warning doesn't go far enough because it doesn't mention semantic changes to those flags. And they are important as well because these flags are a deep implementation internal to the MM code and the semantic might change at any time. Let's consider two recent examples: http://lkml.kernel.org/r/20181002100531.GC4135@quack2.suse.cz : commit e1fb4a086495 "dax: remove VM_MIXEDMAP for fsdax and device dax" has : removed VM_MIXEDMAP flag from DAX VMAs. Now our testing shows that in the : mean time certain customer of ours started poking into /proc//smaps : and looks at VMA flags there and if VM_MIXEDMAP is missing among the VMA : flags, the application just fails to start complaining that DAX support is : missing in the kernel. http://lkml.kernel.org/r/alpine.DEB.2.21.1809241054050.224429@chino.kir.corp.google.com : Commit 1860033237d4 ("mm: make PR_SET_THP_DISABLE immediately active") : introduced a regression in that userspace cannot always determine the set : of vmas where thp is ineligible. : Userspace relies on the "nh" flag being emitted as part of /proc/pid/smaps : to determine if a vma is eligible to be backed by hugepages. : Previous to this commit, prctl(PR_SET_THP_DISABLE, 1) would cause thp to : be disabled and emit "nh" as a flag for the corresponding vmas as part of : /proc/pid/smaps. After the commit, thp is disabled by means of an mm : flag and "nh" is not emitted. : This causes smaps parsing libraries to assume a vma is eligible for thp : and ends up puzzling the user on why its memory is not backed by thp. In both cases userspace was relying on a semantic of a specific VMA flag. The primary reason why that happened is a lack of a proper interface. While this has been worked on and it will be fixed properly, it seems that our wording could see some refinement and be more vocal about semantic aspect of these flags as well. Link: http://lkml.kernel.org/r/20181211143641.3503-2-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Jan Kara Acked-by: Dan Williams Acked-by: David Rientjes Acked-by: Mike Rapoport Acked-by: Vlastimil Babka Cc: Dan Williams Cc: David Rientjes Cc: Paul Oppenheimer Cc: William Kucharski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- Documentation/filesystems/proc.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index adba21b5ada7..4cee34ce496e 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -494,7 +494,9 @@ manner. The codes are the following: Note that there is no guarantee that every flag and associated mnemonic will be present in all further kernel releases. Things get changed, the flags may -be vanished or the reverse -- new added. +be vanished or the reverse -- new added. Interpretation of their meaning +might change in future as well. So each consumer of these flags has to +follow each specific kernel version for the exact semantic. This file is only present if the CONFIG_MMU kernel configuration option is enabled. From 53818c7644887757fb9181c988e0fd4c17a9c1cb Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 5 Dec 2017 13:55:44 -0500 Subject: [PATCH 1530/2608] nfs: fix a deadlock in nfs client initialization commit c156618e15101a9cc8c815108fec0300a0ec6637 upstream. The following deadlock can occur between a process waiting for a client to initialize in while walking the client list during nfsv4 server trunking detection and another process waiting for the nfs_clid_init_mutex so it can initialize that client: Process 1 Process 2 --------- --------- spin_lock(&nn->nfs_client_lock); list_add_tail(&CLIENTA->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); spin_lock(&nn->nfs_client_lock); list_add_tail(&CLIENTB->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); mutex_lock(&nfs_clid_init_mutex); nfs41_walk_client_list(clp, result, cred); nfs_wait_client_init_complete(CLIENTA); (waiting for nfs_clid_init_mutex) Make sure nfs_match_client() only evaluates clients that have completed initialization in order to prevent that deadlock. This patch also fixes v4.0 trunking behavior by not marking the client NFS_CS_READY until the clientid has been confirmed. Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker Signed-off-by: Qian Lu Signed-off-by: Greg Kroah-Hartman --- fs/nfs/client.c | 11 +++++++++++ fs/nfs/nfs4client.c | 17 +++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 22880ef6d8dd..7d6ddfd60271 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -291,12 +291,23 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat const struct sockaddr *sap = data->addr; struct nfs_net *nn = net_generic(data->net, nfs_net_id); +again: list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; /* Don't match clients that failed to initialise properly */ if (clp->cl_cons_state < 0) continue; + /* If a client is still initializing then we need to wait */ + if (clp->cl_cons_state > NFS_CS_READY) { + atomic_inc(&clp->cl_count); + spin_unlock(&nn->nfs_client_lock); + nfs_wait_client_init_complete(clp); + nfs_put_client(clp); + spin_lock(&nn->nfs_client_lock); + goto again; + } + /* Different NFS versions cannot share the same nfs_client */ if (clp->rpc_ops != data->nfs_mod->rpc_ops) continue; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index fed9c8005c17..8f96f6548dc8 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -404,15 +404,19 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, if (error < 0) goto error; - if (!nfs4_has_session(clp)) - nfs_mark_client_ready(clp, NFS_CS_READY); - error = nfs4_discover_server_trunking(clp, &old); if (error < 0) goto error; - if (clp != old) + if (clp != old) { clp->cl_preserve_clid = true; + /* + * Mark the client as having failed initialization so other + * processes walking the nfs_client_list in nfs_match_client() + * won't try to use it. + */ + nfs_mark_client_ready(clp, -EPERM); + } nfs_put_client(clp); clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags); return old; @@ -539,6 +543,9 @@ int nfs40_walk_client_list(struct nfs_client *new, spin_lock(&nn->nfs_client_lock); list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { + if (pos == new) + goto found; + status = nfs4_match_client(pos, new, &prev, nn); if (status < 0) goto out_unlock; @@ -559,6 +566,7 @@ int nfs40_walk_client_list(struct nfs_client *new, * way that a SETCLIENTID_CONFIRM to pos can succeed is * if new and pos point to the same server: */ +found: atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); @@ -572,6 +580,7 @@ int nfs40_walk_client_list(struct nfs_client *new, case 0: nfs4_swap_callback_idents(pos, new); pos->cl_confirm = new->cl_confirm; + nfs_mark_client_ready(pos, NFS_CS_READY); prev = NULL; *result = pos; From de616eb216055ed367c7ecda073d8e8b9fb90a81 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Thu, 15 Feb 2018 16:58:26 -0600 Subject: [PATCH 1531/2608] ipmi:pci: Blacklist a Realtek "IPMI" device commit bc48fa1b9d3b04106055b27078da824cd209865a upstream. Realtek has some sort of "Virtual" IPMI device on the PCI bus as a KCS controller, but whatever it is, it's not one. Ignore it if seen. [ Commit 13d0b35c (ipmi_si: Move PCI setup to another file) from Linux 4.15-rc1 has not been back ported, so the PCI code is still in `drivers/char/ipmi/ipmi_si_intf.c`, requiring to apply the commit manually. This fixes a 100 s boot delay on the HP EliteDesk 705 G4 MT with Linux 4.14.94. ] Reported-by: Chris Chiu Signed-off-by: Corey Minyard Tested-by: Daniel Drake Signed-off-by: Paul Menzel Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_si_intf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index c04aa11f0e21..a106cf7b5ee0 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2447,6 +2447,15 @@ static int ipmi_pci_probe_regspacing(struct smi_info *info) return DEFAULT_REGSPACING; } +static struct pci_device_id ipmi_pci_blacklist[] = { + /* + * This is a "Virtual IPMI device", whatever that is. It appears + * as a KCS device by the class, but it is not one. + */ + { PCI_VDEVICE(REALTEK, 0x816c) }, + { 0, } +}; + static int ipmi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -2454,6 +2463,9 @@ static int ipmi_pci_probe(struct pci_dev *pdev, int class_type = pdev->class & PCI_ERMC_CLASSCODE_TYPE_MASK; struct smi_info *info; + if (pci_match_id(ipmi_pci_blacklist, pdev)) + return -ENODEV; + info = smi_info_alloc(); if (!info) return -ENOMEM; From bfbd7e95b6b531d159c0e9615e04f8320ff21487 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 19 Jun 2018 14:34:08 -0500 Subject: [PATCH 1532/2608] cifs: allow disabling insecure dialects in the config commit 7420451f6a109f7f8f1bf283f34d08eba3259fb3 upstream. allow disabling cifs (SMB1 ie vers=1.0) and vers=2.0 in the config for the build of cifs.ko if want to always prevent mounting with these less secure dialects. Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Reviewed-by: Jeremy Allison Cc: Alakesh Haloi Signed-off-by: Greg Kroah-Hartman --- fs/cifs/Kconfig | 17 ++++++++++++++++- fs/cifs/connect.c | 9 +++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 7b95e7971d18..a78c4133724c 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -66,9 +66,24 @@ config CIFS_STATS2 Unless you are a developer or are doing network performance analysis or tuning, say N. +config CIFS_ALLOW_INSECURE_LEGACY + bool "Support legacy servers which use less secure dialects" + depends on CIFS + default y + help + Modern dialects, SMB2.1 and later (including SMB3 and 3.1.1), have + additional security features, including protection against + man-in-the-middle attacks and stronger crypto hashes, so the use + of legacy dialects (SMB1/CIFS and SMB2.0) is discouraged. + + Disabling this option prevents users from using vers=1.0 or vers=2.0 + on mounts with cifs.ko + + If unsure, say Y. + config CIFS_WEAK_PW_HASH bool "Support legacy servers which use weaker LANMAN security" - depends on CIFS + depends on CIFS && CIFS_ALLOW_INSECURE_LEGACY help Modern CIFS servers including Samba and most Windows versions (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index fd24c72bd2cd..d6248137c219 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1130,6 +1130,7 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol) substring_t args[MAX_OPT_ARGS]; switch (match_token(value, cifs_smb_version_tokens, args)) { +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY case Smb_1: vol->ops = &smb1_operations; vol->vals = &smb1_values; @@ -1138,6 +1139,14 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol) vol->ops = &smb20_operations; vol->vals = &smb20_values; break; +#else + case Smb_1: + cifs_dbg(VFS, "vers=1.0 (cifs) mount not permitted when legacy dialects disabled\n"); + return 1; + case Smb_20: + cifs_dbg(VFS, "vers=2.0 mount not permitted when legacy dialects disabled\n"); + return 1; +#endif /* CIFS_ALLOW_INSECURE_LEGACY */ case Smb_21: vol->ops = &smb21_operations; vol->vals = &smb21_values; From e89ec9b92f9b75b44e1ff9f52dab48bd01834ed6 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 11 Jan 2019 13:58:53 +0800 Subject: [PATCH 1533/2608] drm/i915/gvt: Fix mmap range check commit 51b00d8509dc69c98740da2ad07308b630d3eb7d upstream. This is to fix missed mmap range check on vGPU bar2 region and only allow to map vGPU allocated GMADDR range, which means user space should support sparse mmap to get proper offset for mmap vGPU aperture. And this takes care of actual pgoff in mmap request as original code always does from beginning of vGPU aperture. Fixes: 659643f7d814 ("drm/i915/gvt/kvmgt: add vfio/mdev support to KVMGT") Cc: "Monroy, Rodrigo Axel" Cc: "Orrala Contreras, Alfredo" Cc: stable@vger.kernel.org # v4.10+ Reviewed-by: Hang Yuan Signed-off-by: Zhenyu Wang Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gvt/kvmgt.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 73c672fc17c4..9834b7c1c9d4 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -807,11 +807,18 @@ write_err: return -EFAULT; } +static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, + unsigned long off) +{ + return off >= vgpu_aperture_offset(vgpu) && + off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu); +} + static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) { unsigned int index; u64 virtaddr; - unsigned long req_size, pgoff = 0; + unsigned long req_size, pgoff, req_start; pgprot_t pg_prot; struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); @@ -829,7 +836,17 @@ static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) pg_prot = vma->vm_page_prot; virtaddr = vma->vm_start; req_size = vma->vm_end - vma->vm_start; - pgoff = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT; + pgoff = vma->vm_pgoff & + ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); + req_start = pgoff << PAGE_SHIFT; + + if (!intel_vgpu_in_aperture(vgpu, req_start)) + return -EINVAL; + if (req_start + req_size > + vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu)) + return -EINVAL; + + pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff; return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot); } From 413cb66bbb700adb356b7eb47cc7c843fe20a364 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Nov 2018 22:57:34 +0000 Subject: [PATCH 1534/2608] PCI: dwc: Move interrupt acking into the proper callback commit 3f7bb2ec20ce07c02b2002349d256c91a463fcc5 upstream. The write to the status register is really an ACK for the HW, and should be treated as such by the driver. Let's move it to the irq_ack() callback, which will prevent people from moving it around in order to paper over other bugs. Fixes: 8c934095fa2f ("PCI: dwc: Clear MSI interrupt status after it is handled, not before") Fixes: 7c5925afbc58 ("PCI: dwc: Move MSI IRQs allocation to IRQ domains hierarchical API") Link: https://lore.kernel.org/linux-pci/20181113225734.8026-1-marc.zyngier@arm.com/ Reported-by: Trent Piepho Tested-by: Niklas Cassel Tested-by: Gustavo Pimentel Tested-by: Stanimir Varbanov Signed-off-by: Marc Zyngier [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/dwc/pcie-designware-host.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/pci/dwc/pcie-designware-host.c b/drivers/pci/dwc/pcie-designware-host.c index bc3e2d8d0cce..58b38c54a7cf 100644 --- a/drivers/pci/dwc/pcie-designware-host.c +++ b/drivers/pci/dwc/pcie-designware-host.c @@ -45,8 +45,19 @@ static int dw_pcie_wr_own_conf(struct pcie_port *pp, int where, int size, return dw_pcie_write(pci->dbi_base + where, size, val); } +static void dwc_irq_ack(struct irq_data *d) +{ + struct msi_desc *msi = irq_data_get_msi_desc(d); + struct pcie_port *pp = msi_desc_to_pci_sysdata(msi); + int pos = d->hwirq % 32; + int i = d->hwirq / 32; + + dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_STATUS + i * 12, 4, BIT(pos)); +} + static struct irq_chip dw_msi_irq_chip = { .name = "PCI-MSI", + .irq_ack = dwc_irq_ack, .irq_enable = pci_msi_unmask_irq, .irq_disable = pci_msi_mask_irq, .irq_mask = pci_msi_mask_irq, @@ -72,8 +83,6 @@ irqreturn_t dw_handle_msi_irq(struct pcie_port *pp) pos)) != 32) { irq = irq_find_mapping(pp->irq_domain, i * 32 + pos); generic_handle_irq(irq); - dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_STATUS + i * 12, - 4, 1 << pos); pos++; } } @@ -263,7 +272,7 @@ static struct msi_controller dw_pcie_msi_chip = { static int dw_pcie_msi_map(struct irq_domain *domain, unsigned int irq, irq_hw_number_t hwirq) { - irq_set_chip_and_handler(irq, &dw_msi_irq_chip, handle_simple_irq); + irq_set_chip_and_handler(irq, &dw_msi_irq_chip, handle_edge_irq); irq_set_chip_data(irq, domain->host_data); return 0; From 7c307d3217cd254480dbd483d94b9233564be3f1 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 16 Nov 2018 09:59:21 -0600 Subject: [PATCH 1535/2608] ipmi:ssif: Fix handling of multi-part return messages commit 7d6380cd40f7993f75c4bde5b36f6019237e8719 upstream. The block number was not being compared right, it was off by one when checking the response. Some statistics wouldn't be incremented properly in some cases. Check to see if that middle-part messages always have 31 bytes of data. Signed-off-by: Corey Minyard Cc: stable@vger.kernel.org # 4.4 Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_ssif.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 0904ab442d31..ab701f668ebc 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -645,8 +645,9 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, /* Remove the multi-part read marker. */ len -= 2; + data += 2; for (i = 0; i < len; i++) - ssif_info->data[i] = data[i+2]; + ssif_info->data[i] = data[i]; ssif_info->multi_len = len; ssif_info->multi_pos = 1; @@ -674,8 +675,19 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, } blocknum = data[0]; + len--; + data++; - if (ssif_info->multi_len + len - 1 > IPMI_MAX_MSG_LENGTH) { + if (blocknum != 0xff && len != 31) { + /* All blocks but the last must have 31 data bytes. */ + result = -EIO; + if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) + pr_info("Received middle message <31\n"); + + goto continue_op; + } + + if (ssif_info->multi_len + len > IPMI_MAX_MSG_LENGTH) { /* Received message too big, abort the operation. */ result = -E2BIG; if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) @@ -684,16 +696,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, goto continue_op; } - /* Remove the blocknum from the data. */ - len--; for (i = 0; i < len; i++) - ssif_info->data[i + ssif_info->multi_len] = data[i + 1]; + ssif_info->data[i + ssif_info->multi_len] = data[i]; ssif_info->multi_len += len; if (blocknum == 0xff) { /* End of read */ len = ssif_info->multi_len; data = ssif_info->data; - } else if (blocknum + 1 != ssif_info->multi_pos) { + } else if (blocknum != ssif_info->multi_pos) { /* * Out of sequence block, just abort. Block * numbers start at zero for the second block, @@ -721,6 +731,7 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, } } + continue_op: if (result < 0) { ssif_inc_stat(ssif_info, receive_errors); } else { @@ -728,8 +739,6 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, ssif_inc_stat(ssif_info, received_message_parts); } - - continue_op: if (ssif_info->ssif_debug & SSIF_DEBUG_STATE) pr_info(PFX "DONE 1: state = %d, result=%d.\n", ssif_info->ssif_state, result); From e6608e1f2fbd5827df9fa0da9ab1ad64f68be8d7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Jan 2019 09:37:07 +0100 Subject: [PATCH 1536/2608] Linux 4.14.96 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 70cc37cb3e99..57b45169ed85 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 95 +SUBLEVEL = 96 EXTRAVERSION = NAME = Petit Gorille From 99afa1927af3d498d241249eab8e87da8265e7a3 Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Thu, 17 Jan 2019 14:20:14 +0000 Subject: [PATCH 1537/2608] amd-xgbe: Fix mdio access for non-zero ports and clause 45 PHYs [ Upstream commit 5ab3121beeb76aa6090195b67d237115860dd9ec ] The XGBE hardware has support for performing MDIO operations using an MDIO command request. The driver mistakenly uses the mdio port address as the MDIO command request device address instead of the MDIO command request port address. Additionally, the driver does not properly check for and create a clause 45 MDIO command. Check the supplied MDIO register to determine if the request is a clause 45 operation (MII_ADDR_C45). For a clause 45 operation, extract the device address and register number from the supplied MDIO register and use them to set the MDIO command request device address and register number fields. For a clause 22 operation, the MDIO request device address is set to zero and the MDIO command request register number is set to the supplied MDIO register. In either case, the supplied MDIO port address is used as the MDIO command request port address. Fixes: 732f2ab7afb9 ("amd-xgbe: Add support for MDIO attached PHYs") Signed-off-by: Tom Lendacky Tested-by: Shyam Sundar S K Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amd/xgbe/xgbe-common.h | 2 -- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 22 +++++++++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index d272dc6984ac..b40d4377cc71 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -431,8 +431,6 @@ #define MAC_MDIOSCAR_PA_WIDTH 5 #define MAC_MDIOSCAR_RA_INDEX 0 #define MAC_MDIOSCAR_RA_WIDTH 16 -#define MAC_MDIOSCAR_REG_INDEX 0 -#define MAC_MDIOSCAR_REG_WIDTH 21 #define MAC_MDIOSCCDR_BUSY_INDEX 22 #define MAC_MDIOSCCDR_BUSY_WIDTH 1 #define MAC_MDIOSCCDR_CMD_INDEX 16 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index e107e180e2c8..1e4bb33925e6 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -1284,6 +1284,20 @@ static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad, } } +static unsigned int xgbe_create_mdio_sca(int port, int reg) +{ + unsigned int mdio_sca, da; + + da = (reg & MII_ADDR_C45) ? reg >> 16 : 0; + + mdio_sca = 0; + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, RA, reg); + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, PA, port); + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, da); + + return mdio_sca; +} + static int xgbe_write_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, int reg, u16 val) { @@ -1291,9 +1305,7 @@ static int xgbe_write_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, reinit_completion(&pdata->mdio_complete); - mdio_sca = 0; - XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg); - XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr); + mdio_sca = xgbe_create_mdio_sca(addr, reg); XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); mdio_sccd = 0; @@ -1317,9 +1329,7 @@ static int xgbe_read_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, reinit_completion(&pdata->mdio_complete); - mdio_sca = 0; - XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg); - XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr); + mdio_sca = xgbe_create_mdio_sca(addr, reg); XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); mdio_sccd = 0; From 1ae7a7cb7ab8016e84781dd9cddd705d9e794248 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Thu, 17 Jan 2019 09:46:41 +0800 Subject: [PATCH 1538/2608] net: bridge: Fix ethernet header pointer before check skb forwardable [ Upstream commit 28c1382fa28f2e2d9d0d6f25ae879b5af2ecbd03 ] The skb header should be set to ethernet header before using is_skb_forwardable. Because the ethernet header length has been considered in is_skb_forwardable(including dev->hard_header_len length). To reproduce the issue: 1, add 2 ports on linux bridge br using following commands: $ brctl addbr br $ brctl addif br eth0 $ brctl addif br eth1 2, the MTU of eth0 and eth1 is 1500 3, send a packet(Data 1480, UDP 8, IP 20, Ethernet 14, VLAN 4) from eth0 to eth1 So the expect result is packet larger than 1500 cannot pass through eth0 and eth1. But currently, the packet passes through success, it means eth1's MTU limit doesn't take effect. Fixes: f6367b4660dd ("bridge: use is_skb_forwardable in forward path") Cc: bridge@lists.linux-foundation.org Cc: Nkolay Aleksandrov Cc: Roopa Prabhu Cc: Stephen Hemminger Signed-off-by: Yunjian Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_forward.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 48fb17417fac..57f69f31a2a2 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -35,10 +35,10 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { + skb_push(skb, ETH_HLEN); if (!is_skb_forwardable(skb->dev, skb)) goto drop; - skb_push(skb, ETH_HLEN); br_drop_fake_rtable(skb); if (skb->ip_summed == CHECKSUM_PARTIAL && @@ -96,12 +96,11 @@ static void __br_forward(const struct net_bridge_port *to, net = dev_net(indev); } else { if (unlikely(netpoll_tx_running(to->br->dev))) { - if (!is_skb_forwardable(skb->dev, skb)) { + skb_push(skb, ETH_HLEN); + if (!is_skb_forwardable(skb->dev, skb)) kfree_skb(skb); - } else { - skb_push(skb, ETH_HLEN); + else br_netpoll_send_skb(to, skb); - } return; } br_hook = NF_BR_LOCAL_OUT; From 66a011d153c67599c0dc11704ce4ef3f387b1e22 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Thu, 17 Jan 2019 15:34:38 +0000 Subject: [PATCH 1539/2608] net: Fix usage of pskb_trim_rcsum [ Upstream commit 6c57f0458022298e4da1729c67bd33ce41c14e7a ] In certain cases, pskb_trim_rcsum() may change skb pointers. Reinitialize header pointers afterwards to avoid potential use-after-frees. Add a note in the documentation of pskb_trim_rcsum(). Found by KASAN. Signed-off-by: Ross Lagerwall Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/pppoe.c | 1 + include/linux/skbuff.h | 1 + net/bridge/br_netfilter_ipv6.c | 1 + net/bridge/netfilter/nft_reject_bridge.c | 1 + net/ipv4/ip_input.c | 1 + 5 files changed, 5 insertions(+) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 951892da3352..c37ef5287caa 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -445,6 +445,7 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, if (pskb_trim_rcsum(skb, len)) goto drop; + ph = pppoe_hdr(skb); pn = pppoe_pernet(dev_net(dev)); /* Note that get_item does a sock_hold(), so sk_pppox(po) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f6250555ce7d..39c2570ddcf6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3163,6 +3163,7 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len); * * This is exactly the same as pskb_trim except that it ensures the * checksum of received packets are still valid after the operation. + * It can change skb pointers. */ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 96c072e71ea2..5811208863b7 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -131,6 +131,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) IPSTATS_MIB_INDISCARDS); goto drop; } + hdr = ipv6_hdr(skb); } if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb)) goto drop; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index eaf05de37f75..b09ec869c913 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -230,6 +230,7 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook) pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) return false; + ip6h = ipv6_hdr(skb); thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) return false; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 57fc13c6ab2b..1b160378ea9c 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -481,6 +481,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, goto drop; } + iph = ip_hdr(skb); skb->transport_header = skb->network_header + iph->ihl*4; /* Remove any debris in the socket control block */ From 6bd069b588989fed713681bb82e8c1c6d5de66a5 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 16 Jan 2019 10:53:58 +0100 Subject: [PATCH 1540/2608] net: phy: mdio_bus: add missing device_del() in mdiobus_register() error handling [ Upstream commit e40e2a2e78664fa90ea4b9bdf4a84efce2fea9d9 ] The current code in __mdiobus_register() doesn't properly handle failures returned by the devm_gpiod_get_optional() call: it returns immediately, without unregistering the device that was added by the call to device_register() earlier in the function. This leaves a stale device, which then causes a NULL pointer dereference in the code that handles deferred probing: [ 1.489982] Unable to handle kernel NULL pointer dereference at virtual address 00000074 [ 1.498110] pgd = (ptrval) [ 1.500838] [00000074] *pgd=00000000 [ 1.504432] Internal error: Oops: 17 [#1] SMP ARM [ 1.509133] Modules linked in: [ 1.512192] CPU: 1 PID: 51 Comm: kworker/1:3 Not tainted 4.20.0-00039-g3b73a4cc8b3e-dirty #99 [ 1.520708] Hardware name: Xilinx Zynq Platform [ 1.525261] Workqueue: events deferred_probe_work_func [ 1.530403] PC is at klist_next+0x10/0xfc [ 1.534403] LR is at device_for_each_child+0x40/0x94 [ 1.539361] pc : [] lr : [] psr: 200e0013 [ 1.545628] sp : ceeefe68 ip : 00000001 fp : ffffe000 [ 1.550863] r10: 00000000 r9 : c0c66790 r8 : 00000000 [ 1.556079] r7 : c0457d44 r6 : 00000000 r5 : ceeefe8c r4 : cfa2ec78 [ 1.562604] r3 : 00000064 r2 : c0457d44 r1 : ceeefe8c r0 : 00000064 [ 1.569129] Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [ 1.576263] Control: 18c5387d Table: 0ed7804a DAC: 00000051 [ 1.582013] Process kworker/1:3 (pid: 51, stack limit = 0x(ptrval)) [ 1.588280] Stack: (0xceeefe68 to 0xceef0000) [ 1.592630] fe60: cfa2ec78 c0c03c08 00000000 c0457d44 00000000 c0c66790 [ 1.600814] fe80: 00000000 c0455d90 ceeefeac 00000064 00000000 0d7a542e cee9d494 cfa2ec78 [ 1.608998] fea0: cfa2ec78 00000000 c0457d44 c0457d7c cee9d494 c0c03c08 00000000 c0455dac [ 1.617182] fec0: cf98ba44 cf926a00 cee9d494 0d7a542e 00000000 cf935a10 cf935a10 cf935a10 [ 1.625366] fee0: c0c4e9b8 c0457d7c c0c4e80c 00000001 cf935a10 c0457df4 cf935a10 c0c4e99c [ 1.633550] ff00: c0c4e99c c045a27c c0c4e9c4 ced63f80 cfde8a80 cfdebc00 00000000 c013893c [ 1.641734] ff20: cfde8a80 cfde8a80 c07bd354 ced63f80 ced63f94 cfde8a80 00000008 c0c02d00 [ 1.649936] ff40: cfde8a98 cfde8a80 ffffe000 c0139a30 ffffe000 c0c6624a c07bd354 00000000 [ 1.658120] ff60: ffffe000 cee9e780 ceebfe00 00000000 ceeee000 ced63f80 c0139788 cf8cdea4 [ 1.666304] ff80: cee9e79c c013e598 00000001 ceebfe00 c013e44c 00000000 00000000 00000000 [ 1.674488] ffa0: 00000000 00000000 00000000 c01010e8 00000000 00000000 00000000 00000000 [ 1.682671] ffc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 1.690855] ffe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 [ 1.699058] [] (klist_next) from [] (device_for_each_child+0x40/0x94) [ 1.707241] [] (device_for_each_child) from [] (device_reorder_to_tail+0x38/0x88) [ 1.716476] [] (device_reorder_to_tail) from [] (device_for_each_child+0x5c/0x94) [ 1.725692] [] (device_for_each_child) from [] (device_reorder_to_tail+0x38/0x88) [ 1.734927] [] (device_reorder_to_tail) from [] (device_pm_move_to_tail+0x28/0x40) [ 1.744235] [] (device_pm_move_to_tail) from [] (deferred_probe_work_func+0x58/0x8c) [ 1.753746] [] (deferred_probe_work_func) from [] (process_one_work+0x210/0x4fc) [ 1.762888] [] (process_one_work) from [] (worker_thread+0x2a8/0x5c0) [ 1.771072] [] (worker_thread) from [] (kthread+0x14c/0x154) [ 1.778482] [] (kthread) from [] (ret_from_fork+0x14/0x2c) [ 1.785689] Exception stack(0xceeeffb0 to 0xceeefff8) [ 1.790739] ffa0: 00000000 00000000 00000000 00000000 [ 1.798923] ffc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 1.807107] ffe0: 00000000 00000000 00000000 00000000 00000013 00000000 [ 1.813724] Code: e92d47f0 e1a05000 e8900048 e1a00003 (e5937010) [ 1.819844] ---[ end trace 3c2c0c8b65399ec9 ]--- The actual error that we had from devm_gpiod_get_optional() was -EPROBE_DEFER, due to the GPIO being provided by a driver that is probed later than the Ethernet controller driver. To fix this, we simply add the missing device_del() invocation in the error path. Fixes: 69226896ad636 ("mdio_bus: Issue GPIO RESET to PHYs") Signed-off-by: Thomas Petazzoni Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/mdio_bus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 2df7b62c1a36..1ece41277993 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -358,6 +358,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) if (IS_ERR(gpiod)) { dev_err(&bus->dev, "mii_bus %s couldn't get reset GPIO\n", bus->id); + device_del(&bus->dev); return PTR_ERR(gpiod); } else if (gpiod) { bus->reset_gpiod = gpiod; From 6da1dfff12d475a0aff984736e4806aa033df669 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 11 Jan 2019 18:55:42 -0800 Subject: [PATCH 1541/2608] net_sched: refetch skb protocol for each filter [ Upstream commit cd0c4e70fc0ccfa705cdf55efb27519ce9337a26 ] Martin reported a set of filters don't work after changing from reclassify to continue. Looking into the code, it looks like skb protocol is not always fetched for each iteration of the filters. But, as demonstrated by Martin, TC actions could modify skb->protocol, for example act_vlan, this means we have to refetch skb protocol in each iteration, rather than using the one we fetch in the beginning of the loop. This bug is _not_ introduced by commit 3b3ae880266d ("net: sched: consolidate tc_classify{,_compat}"), technically, if act_vlan is the only action that modifies skb protocol, then it is commit c7e2b9689ef8 ("sched: introduce vlan action") which introduced this bug. Reported-by: Martin Olsson Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_api.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 04a70793c1fe..32819d1e2075 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -318,7 +318,6 @@ EXPORT_SYMBOL(tcf_block_put); int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { - __be16 protocol = tc_skb_protocol(skb); #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 4; const struct tcf_proto *orig_tp = tp; @@ -328,6 +327,7 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, reclassify: #endif for (; tp; tp = rcu_dereference_bh(tp->next)) { + __be16 protocol = tc_skb_protocol(skb); int err; if (tp->protocol != protocol && @@ -359,7 +359,6 @@ reset: } tp = first_tp; - protocol = tc_skb_protocol(skb); goto reclassify; #endif } From 520126ca0ef79ecf67784b36b67601d54b0df42f Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Mon, 14 Jan 2019 09:16:56 +0000 Subject: [PATCH 1542/2608] openvswitch: Avoid OOB read when parsing flow nlattrs [ Upstream commit 04a4af334b971814eedf4e4a413343ad3287d9a9 ] For nested and variable attributes, the expected length of an attribute is not known and marked by a negative number. This results in an OOB read when the expected length is later used to check if the attribute is all zeros. Fix this by using the actual length of the attribute rather than the expected length. Signed-off-by: Ross Lagerwall Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/flow_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index f70e9cbf33d5..e687b89dafe6 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -459,7 +459,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, return -EINVAL; } - if (!nz || !is_all_zero(nla_data(nla), expected_len)) { + if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) { attrs |= 1 << type; a[type] = nla; } From 1981e4c9a97f8ad8e8dfcb5ef1707e25fad85279 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 16 Jan 2019 16:54:42 +0800 Subject: [PATCH 1543/2608] vhost: log dirty page correctly [ Upstream commit cc5e710759470bc7f3c61d11fd54586f15fdbdf4 ] Vhost dirty page logging API is designed to sync through GPA. But we try to log GIOVA when device IOTLB is enabled. This is wrong and may lead to missing data after migration. To solve this issue, when logging with device IOTLB enabled, we will: 1) reuse the device IOTLB translation result of GIOVA->HVA mapping to get HVA, for writable descriptor, get HVA through iovec. For used ring update, translate its GIOVA to HVA 2) traverse the GPA->HVA mapping to get the possible GPA and log through GPA. Pay attention this reverse mapping is not guaranteed to be unique, so we should log each possible GPA in this case. This fix the failure of scp to guest during migration. In -next, we will probably support passing GIOVA->GPA instead of GIOVA->HVA. Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API") Reported-by: Jintack Lim Cc: Jintack Lim Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 3 +- drivers/vhost/vhost.c | 97 ++++++++++++++++++++++++++++++++++++------- drivers/vhost/vhost.h | 3 +- 3 files changed, 87 insertions(+), 16 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 6123b4dd8638..4eba9ee179e3 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -851,7 +851,8 @@ static void handle_rx(struct vhost_net *net) vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, headcount); if (unlikely(vq_log)) - vhost_log_write(vq, vq_log, log, vhost_len); + vhost_log_write(vq, vq_log, log, vhost_len, + vq->iov, in); total_len += vhost_len; if (unlikely(total_len >= VHOST_NET_WEIGHT)) { vhost_poll_queue(&vq->poll); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 97518685ab58..37fcb3ca89f1 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1726,13 +1726,87 @@ static int log_write(void __user *log_base, return r; } +static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) +{ + struct vhost_umem *umem = vq->umem; + struct vhost_umem_node *u; + u64 start, end, l, min; + int r; + bool hit = false; + + while (len) { + min = len; + /* More than one GPAs can be mapped into a single HVA. So + * iterate all possible umems here to be safe. + */ + list_for_each_entry(u, &umem->umem_list, link) { + if (u->userspace_addr > hva - 1 + len || + u->userspace_addr - 1 + u->size < hva) + continue; + start = max(u->userspace_addr, hva); + end = min(u->userspace_addr - 1 + u->size, + hva - 1 + len); + l = end - start + 1; + r = log_write(vq->log_base, + u->start + start - u->userspace_addr, + l); + if (r < 0) + return r; + hit = true; + min = min(l, min); + } + + if (!hit) + return -EFAULT; + + len -= min; + hva += min; + } + + return 0; +} + +static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) +{ + struct iovec iov[64]; + int i, ret; + + if (!vq->iotlb) + return log_write(vq->log_base, vq->log_addr + used_offset, len); + + ret = translate_desc(vq, (uintptr_t)vq->used + used_offset, + len, iov, 64, VHOST_ACCESS_WO); + if (ret) + return ret; + + for (i = 0; i < ret; i++) { + ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base, + iov[i].iov_len); + if (ret) + return ret; + } + + return 0; +} + int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, - unsigned int log_num, u64 len) + unsigned int log_num, u64 len, struct iovec *iov, int count) { int i, r; /* Make sure data written is seen before log. */ smp_wmb(); + + if (vq->iotlb) { + for (i = 0; i < count; i++) { + r = log_write_hva(vq, (uintptr_t)iov[i].iov_base, + iov[i].iov_len); + if (r < 0) + return r; + } + return 0; + } + for (i = 0; i < log_num; ++i) { u64 l = min(log[i].len, len); r = log_write(vq->log_base, log[i].addr, l); @@ -1762,9 +1836,8 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq) smp_wmb(); /* Log used flag write. */ used = &vq->used->flags; - log_write(vq->log_base, vq->log_addr + - (used - (void __user *)vq->used), - sizeof vq->used->flags); + log_used(vq, (used - (void __user *)vq->used), + sizeof vq->used->flags); if (vq->log_ctx) eventfd_signal(vq->log_ctx, 1); } @@ -1782,9 +1855,8 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) smp_wmb(); /* Log avail event write */ used = vhost_avail_event(vq); - log_write(vq->log_base, vq->log_addr + - (used - (void __user *)vq->used), - sizeof *vhost_avail_event(vq)); + log_used(vq, (used - (void __user *)vq->used), + sizeof *vhost_avail_event(vq)); if (vq->log_ctx) eventfd_signal(vq->log_ctx, 1); } @@ -2189,10 +2261,8 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, /* Make sure data is seen before log. */ smp_wmb(); /* Log used ring entry write. */ - log_write(vq->log_base, - vq->log_addr + - ((void __user *)used - (void __user *)vq->used), - count * sizeof *used); + log_used(vq, ((void __user *)used - (void __user *)vq->used), + count * sizeof *used); } old = vq->last_used_idx; new = (vq->last_used_idx += count); @@ -2234,9 +2304,8 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, /* Make sure used idx is seen before log. */ smp_wmb(); /* Log used index update. */ - log_write(vq->log_base, - vq->log_addr + offsetof(struct vring_used, idx), - sizeof vq->used->idx); + log_used(vq, offsetof(struct vring_used, idx), + sizeof vq->used->idx); if (vq->log_ctx) eventfd_signal(vq->log_ctx, 1); } diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 79c6e7a60a5e..75d21d4a8354 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -208,7 +208,8 @@ bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *); bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, - unsigned int log_num, u64 len); + unsigned int log_num, u64 len, + struct iovec *iov, int count); int vq_iotlb_prefetch(struct vhost_virtqueue *vq); struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type); From 0781b0f9b5ea0f7dff67e9950f3fcaa3fc6e67c4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 9 Jan 2019 09:57:39 +0000 Subject: [PATCH 1544/2608] net: ipv4: Fix memory leak in network namespace dismantle [ Upstream commit f97f4dd8b3bb9d0993d2491e0f22024c68109184 ] IPv4 routing tables are flushed in two cases: 1. In response to events in the netdev and inetaddr notification chains 2. When a network namespace is being dismantled In both cases only routes associated with a dead nexthop group are flushed. However, a nexthop group will only be marked as dead in case it is populated with actual nexthops using a nexthop device. This is not the case when the route in question is an error route (e.g., 'blackhole', 'unreachable'). Therefore, when a network namespace is being dismantled such routes are not flushed and leaked [1]. To reproduce: # ip netns add blue # ip -n blue route add unreachable 192.0.2.0/24 # ip netns del blue Fix this by not skipping error routes that are not marked with RTNH_F_DEAD when flushing the routing tables. To prevent the flushing of such routes in case #1, add a parameter to fib_table_flush() that indicates if the table is flushed as part of namespace dismantle or not. Note that this problem does not exist in IPv6 since error routes are associated with the loopback device. [1] unreferenced object 0xffff888066650338 (size 56): comm "ip", pid 1206, jiffies 4294786063 (age 26.235s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 b0 1c 62 61 80 88 ff ff ..........ba.... e8 8b a1 64 80 88 ff ff 00 07 00 08 fe 00 00 00 ...d............ backtrace: [<00000000856ed27d>] inet_rtm_newroute+0x129/0x220 [<00000000fcdfc00a>] rtnetlink_rcv_msg+0x397/0xa20 [<00000000cb85801a>] netlink_rcv_skb+0x132/0x380 [<00000000ebc991d2>] netlink_unicast+0x4c0/0x690 [<0000000014f62875>] netlink_sendmsg+0x929/0xe10 [<00000000bac9d967>] sock_sendmsg+0xc8/0x110 [<00000000223e6485>] ___sys_sendmsg+0x77a/0x8f0 [<000000002e94f880>] __sys_sendmsg+0xf7/0x250 [<00000000ccb1fa72>] do_syscall_64+0x14d/0x610 [<00000000ffbe3dae>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<000000003a8b605b>] 0xffffffffffffffff unreferenced object 0xffff888061621c88 (size 48): comm "ip", pid 1206, jiffies 4294786063 (age 26.235s) hex dump (first 32 bytes): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk 6b 6b 6b 6b 6b 6b 6b 6b d8 8e 26 5f 80 88 ff ff kkkkkkkk..&_.... backtrace: [<00000000733609e3>] fib_table_insert+0x978/0x1500 [<00000000856ed27d>] inet_rtm_newroute+0x129/0x220 [<00000000fcdfc00a>] rtnetlink_rcv_msg+0x397/0xa20 [<00000000cb85801a>] netlink_rcv_skb+0x132/0x380 [<00000000ebc991d2>] netlink_unicast+0x4c0/0x690 [<0000000014f62875>] netlink_sendmsg+0x929/0xe10 [<00000000bac9d967>] sock_sendmsg+0xc8/0x110 [<00000000223e6485>] ___sys_sendmsg+0x77a/0x8f0 [<000000002e94f880>] __sys_sendmsg+0xf7/0x250 [<00000000ccb1fa72>] do_syscall_64+0x14d/0x610 [<00000000ffbe3dae>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<000000003a8b605b>] 0xffffffffffffffff Fixes: 8cced9eff1d4 ("[NETNS]: Enable routing configuration in non-initial namespace.") Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip_fib.h | 2 +- net/ipv4/fib_frontend.c | 4 ++-- net/ipv4/fib_trie.c | 15 ++++++++++++--- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 32df52869a14..b711317a796c 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -233,7 +233,7 @@ int fib_table_delete(struct net *, struct fib_table *, struct fib_config *, struct netlink_ext_ack *extack); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); -int fib_table_flush(struct net *net, struct fib_table *table); +int fib_table_flush(struct net *net, struct fib_table *table, bool flush_all); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1b3f860f7dcd..b5317b2b191d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -193,7 +193,7 @@ static void fib_flush(struct net *net) struct fib_table *tb; hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) - flushed += fib_table_flush(net, tb); + flushed += fib_table_flush(net, tb, false); } if (flushed) @@ -1299,7 +1299,7 @@ static void ip_fib_net_exit(struct net *net) hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { hlist_del(&tb->tb_hlist); - fib_table_flush(net, tb); + fib_table_flush(net, tb, true); fib_free_table(tb); } } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index c636650a6a70..bb847d280778 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1836,7 +1836,7 @@ void fib_table_flush_external(struct fib_table *tb) } /* Caller must hold RTNL. */ -int fib_table_flush(struct net *net, struct fib_table *tb) +int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *pn = t->kv; @@ -1884,8 +1884,17 @@ int fib_table_flush(struct net *net, struct fib_table *tb) hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; - if (!fi || !(fi->fib_flags & RTNH_F_DEAD) || - tb->tb_id != fa->tb_id) { + if (!fi || tb->tb_id != fa->tb_id || + (!(fi->fib_flags & RTNH_F_DEAD) && + !fib_props[fa->fa_type].error)) { + slen = fa->fa_slen; + continue; + } + + /* Do not flush error routes if network namespace is + * not being dismantled + */ + if (!flush_all && fib_props[fa->fa_type].error) { slen = fa->fa_slen; continue; } From ab6688714226ee66463d703178360d20bea95064 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 10 Jan 2019 14:40:33 -0500 Subject: [PATCH 1545/2608] tcp: allow MSG_ZEROCOPY transmission also in CLOSE_WAIT state [ Upstream commit 13d7f46386e060df31b727c9975e38306fa51e7a ] TCP transmission with MSG_ZEROCOPY fails if the peer closes its end of the connection and so transitions this socket to CLOSE_WAIT state. Transmission in close wait state is acceptable. Other similar tests in the stack (e.g., in FastOpen) accept both states. Relax this test, too. Link: https://www.mail-archive.com/netdev@vger.kernel.org/msg276886.html Link: https://www.mail-archive.com/netdev@vger.kernel.org/msg227390.html Fixes: f214f915e7db ("tcp: enable MSG_ZEROCOPY") Reported-by: Marek Majkowski Signed-off-by: Willem de Bruijn CC: Yuchung Cheng CC: Neal Cardwell CC: Soheil Hassas Yeganeh CC: Alexey Kodanev Acked-by: Soheil Hassas Yeganeh Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8109985e78a1..fd14501ac3af 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1178,7 +1178,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) flags = msg->msg_flags; if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) { - if (sk->sk_state != TCP_ESTABLISHED) { + if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { err = -EINVAL; goto out_err; } From 01c85b4b4ea08b4f0c65489fd2f759b026d243a4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 6 Jul 2018 12:30:20 +0200 Subject: [PATCH 1546/2608] ipfrag: really prevent allocation on netns exit [ Upstream commit f6f2a4a2eb92bc73671204198bb2f8ab53ff59fb ] Setting the low threshold to 0 has no effect on frags allocation, we need to clear high_thresh instead. The code was pre-existent to commit 648700f76b03 ("inet: frags: use rhashtables for reassembly units"), but before the above, such assignment had a different role: prevent concurrent eviction from the worker and the netns cleanup helper. Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_fragment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 653be98fe3fb..6ffee9d2b0e5 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -90,7 +90,7 @@ static void inet_frags_free_cb(void *ptr, void *arg) void inet_frags_exit_net(struct netns_frags *nf) { - nf->low_thresh = 0; /* prevent creation of new frags */ + nf->high_thresh = 0; /* prevent creation of new frags */ rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL); } From c461661969ce6b199e43104745b564dd4f1266b8 Mon Sep 17 00:00:00 2001 From: Vijay Viswanath Date: Fri, 25 Jan 2019 17:11:41 +0200 Subject: [PATCH 1547/2608] mmc: Kconfig: Enable CONFIG_MMC_SDHCI_IO_ACCESSORS commit 99d570da309813f67e9c741edeff55bafc6c1d5e upstream. Enable CONFIG_MMC_SDHCI_IO_ACCESSORS so that SDHC controller specific register read and write APIs, if registered, can be used. Signed-off-by: Vijay Viswanath Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Cc: Koen Vandeputte Cc: Loic Poulain Signed-off-by: Georgi Djakov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 8c15637178ff..9ed786935a30 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -429,6 +429,7 @@ config MMC_SDHCI_MSM tristate "Qualcomm SDHCI Controller Support" depends on ARCH_QCOM || (ARM && COMPILE_TEST) depends on MMC_SDHCI_PLTFM + select MMC_SDHCI_IO_ACCESSORS help This selects the Secure Digital Host Controller Interface (SDHCI) support present in Qualcomm SOCs. The controller supports From db818691e83e94e57924f0d7a0a1c288e6430f24 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Sun, 13 Jan 2019 14:24:48 +0200 Subject: [PATCH 1548/2608] mei: me: add denverton innovation engine device IDs commit f7ee8ead151f9d0b8dac6ab6c3ff49bbe809c564 upstream. Add the Denverton innovation engine (IE) device ids. The IE is an ME-like device which provides HW security offloading. Cc: Signed-off-by: Tomas Winkler Signed-off-by: Alexander Usyskin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 2 ++ drivers/misc/mei/pci-me.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index e4b10b2d1a08..23739a60517f 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -127,6 +127,8 @@ #define MEI_DEV_ID_BXT_M 0x1A9A /* Broxton M */ #define MEI_DEV_ID_APL_I 0x5A9A /* Apollo Lake I */ +#define MEI_DEV_ID_DNV_IE 0x19E5 /* Denverton IE */ + #define MEI_DEV_ID_GLK 0x319A /* Gemini Lake */ #define MEI_DEV_ID_KBP 0xA2BA /* Kaby Point */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index c77e08cbbfd1..04bf2dd134d0 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -93,6 +93,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_DNV_IE, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_GLK, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_KBP, MEI_ME_PCH8_CFG)}, From d9046ae68fa61a010af89e146821d643c9a5daba Mon Sep 17 00:00:00 2001 From: Max Schulze Date: Mon, 7 Jan 2019 08:31:49 +0100 Subject: [PATCH 1549/2608] USB: serial: simple: add Motorola Tetra TPG2200 device id commit b81c2c33eab79dfd3650293b2227ee5c6036585c upstream. Add new Motorola Tetra device id for Motorola Solutions TETRA PEI device T: Bus=02 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=0cad ProdID=9016 Rev=24.16 S: Manufacturer=Motorola Solutions, Inc. S: Product=TETRA PEI interface C: #Ifs= 2 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=usb_serial_simple I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=usb_serial_simple Signed-off-by: Max Schulze Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/usb-serial-simple.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 6d6acf2c07c3..511242111403 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -88,7 +88,8 @@ DEVICE(moto_modem, MOTO_IDS); /* Motorola Tetra driver */ #define MOTOROLA_TETRA_IDS() \ { USB_DEVICE(0x0cad, 0x9011) }, /* Motorola Solutions TETRA PEI */ \ - { USB_DEVICE(0x0cad, 0x9012) } /* MTP6550 */ + { USB_DEVICE(0x0cad, 0x9012) }, /* MTP6550 */ \ + { USB_DEVICE(0x0cad, 0x9016) } /* TPG2200 */ DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS); /* Novatel Wireless GPS driver */ From d1b8cba6a3445ae9a7ea619b7cecac766651ec72 Mon Sep 17 00:00:00 2001 From: Charles Yeh Date: Tue, 15 Jan 2019 23:13:56 +0800 Subject: [PATCH 1550/2608] USB: serial: pl2303: add new PID to support PL2303TB commit 4dcf9ddc9ad5ab649abafa98c5a4d54b1a33dabb upstream. Add new PID to support PL2303TB (TYPE_HX) Signed-off-by: Charles Yeh Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/pl2303.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 5fa1e6fb49a6..5e86be81f4c9 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -49,6 +49,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_HCR331) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_MOTOROLA) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_ZTEK) }, + { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_TB) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID), diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index b46e74a90af2..f21445acc486 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -13,6 +13,7 @@ #define PL2303_VENDOR_ID 0x067b #define PL2303_PRODUCT_ID 0x2303 +#define PL2303_PRODUCT_ID_TB 0x2304 #define PL2303_PRODUCT_ID_RSAQ2 0x04bb #define PL2303_PRODUCT_ID_DCU11 0x1234 #define PL2303_PRODUCT_ID_PHAROS 0xaaa0 @@ -25,6 +26,7 @@ #define PL2303_PRODUCT_ID_MOTOROLA 0x0307 #define PL2303_PRODUCT_ID_ZTEK 0xe1f1 + #define ATEN_VENDOR_ID 0x0557 #define ATEN_VENDOR_ID2 0x0547 #define ATEN_PRODUCT_ID 0x2008 From 27657a6aee5946d64a7211afca59d8cc3b1fda82 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 25 Dec 2018 20:29:48 -0600 Subject: [PATCH 1551/2608] ASoC: atom: fix a missing check of snd_pcm_lib_malloc_pages commit 44fabd8cdaaa3acb80ad2bb3b5c61ae2136af661 upstream. snd_pcm_lib_malloc_pages() may fail, so let's check its status and return its error code upstream. Signed-off-by: Kangjie Lu Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/atom/sst-mfld-platform-pcm.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c index 43e7fdd19f29..4558c8b93036 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c +++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c @@ -399,7 +399,13 @@ static int sst_media_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) { - snd_pcm_lib_malloc_pages(substream, params_buffer_bytes(params)); + int ret; + + ret = + snd_pcm_lib_malloc_pages(substream, + params_buffer_bytes(params)); + if (ret) + return ret; memset(substream->runtime->dma_area, 0, params_buffer_bytes(params)); return 0; } From 585a4feb045615fe7ba028787c489bce090b6ca4 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 15 Jan 2019 11:57:23 -0600 Subject: [PATCH 1552/2608] ASoC: rt5514-spi: Fix potential NULL pointer dereference commit 060d0bf491874daece47053c4e1fb0489eb867d2 upstream. There is a potential NULL pointer dereference in case devm_kzalloc() fails and returns NULL. Fix this by adding a NULL check on rt5514_dsp. This issue was detected with the help of Coccinelle. Fixes: 6eebf35b0e4a ("ASoC: rt5514: add rt5514 SPI driver") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/rt5514-spi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/rt5514-spi.c b/sound/soc/codecs/rt5514-spi.c index 12f2ecf3a4fe..662afc529060 100644 --- a/sound/soc/codecs/rt5514-spi.c +++ b/sound/soc/codecs/rt5514-spi.c @@ -265,6 +265,8 @@ static int rt5514_spi_pcm_probe(struct snd_soc_platform *platform) rt5514_dsp = devm_kzalloc(platform->dev, sizeof(*rt5514_dsp), GFP_KERNEL); + if (!rt5514_dsp) + return -ENOMEM; rt5514_dsp->dev = &rt5514_spi->dev; mutex_init(&rt5514_dsp->dma_lock); From 752c738a2b050898f97820cae39dad2d66943fc2 Mon Sep 17 00:00:00 2001 From: Anthony Wong Date: Sat, 19 Jan 2019 12:22:31 +0800 Subject: [PATCH 1553/2608] ALSA: hda - Add mute LED support for HP ProBook 470 G5 commit 699390381a7bae2fab01a22f742a17235c44ed8a upstream. Support speaker and mic mute LEDs on HP ProBook 470 G5. BugLink: https://bugs.launchpad.net/bugs/1811254 Signed-off-by: Anthony Wong Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 0a225dc85044..fb1cec46380d 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -969,6 +969,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x836e, "HP ProBook 455 G5", CXT_FIXUP_MUTE_LED_GPIO), + SND_PCI_QUIRK(0x103c, 0x837f, "HP ProBook 470 G5", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8455, "HP Z2 G4", CXT_FIXUP_HP_MIC_NO_PRESENCE), From 95907c66b3d4666dcad900e9a48f921bccdec3c5 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 14 Jan 2019 18:16:48 +0300 Subject: [PATCH 1554/2608] ARCv2: lib: memeset: fix doing prefetchw outside of buffer commit e6a72b7daeeb521753803550f0ed711152bb2555 upstream. ARCv2 optimized memset uses PREFETCHW instruction for prefetching the next cache line but doesn't ensure that the line is not past the end of the buffer. PRETECHW changes the line ownership and marks it dirty, which can cause issues in SMP config when next line was already owned by other core. Fix the issue by avoiding the PREFETCHW Some more details: The current code has 3 logical loops (ignroing the unaligned part) (a) Big loop for doing aligned 64 bytes per iteration with PREALLOC (b) Loop for 32 x 2 bytes with PREFETCHW (c) any left over bytes loop (a) was already eliding the last 64 bytes, so PREALLOC was safe. The fix was removing PREFETCW from (b). Another potential issue (applicable to configs with 32 or 128 byte L1 cache line) is that PREALLOC assumes 64 byte cache line and may not do the right thing specially for 32b. While it would be easy to adapt, there are no known configs with those lie sizes, so for now, just compile out PREALLOC in such cases. Signed-off-by: Eugeniy Paltsev Cc: stable@vger.kernel.org #4.4+ Signed-off-by: Vineet Gupta [vgupta: rewrote changelog, used asm .macro vs. "C" macro] Signed-off-by: Greg Kroah-Hartman --- arch/arc/lib/memset-archs.S | 40 +++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S index 62ad4bcb841a..f230bb7092fd 100644 --- a/arch/arc/lib/memset-archs.S +++ b/arch/arc/lib/memset-archs.S @@ -7,11 +7,39 @@ */ #include +#include -#undef PREALLOC_NOT_AVAIL +/* + * The memset implementation below is optimized to use prefetchw and prealloc + * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6) + * If you want to implement optimized memset for other possible L1 data cache + * line lengths (32B and 128B) you should rewrite code carefully checking + * we don't call any prefetchw/prealloc instruction for L1 cache lines which + * don't belongs to memset area. + */ + +#if L1_CACHE_SHIFT == 6 + +.macro PREALLOC_INSTR reg, off + prealloc [\reg, \off] +.endm + +.macro PREFETCHW_INSTR reg, off + prefetchw [\reg, \off] +.endm + +#else + +.macro PREALLOC_INSTR +.endm + +.macro PREFETCHW_INSTR +.endm + +#endif ENTRY_CFI(memset) - prefetchw [r0] ; Prefetch the write location + PREFETCHW_INSTR r0, 0 ; Prefetch the first write location mov.f 0, r2 ;;; if size is zero jz.d [blink] @@ -48,11 +76,8 @@ ENTRY_CFI(memset) lpnz @.Lset64bytes ;; LOOP START -#ifdef PREALLOC_NOT_AVAIL - prefetchw [r3, 64] ;Prefetch the next write location -#else - prealloc [r3, 64] -#endif + PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching + #ifdef CONFIG_ARC_HAS_LL64 std.ab r4, [r3, 8] std.ab r4, [r3, 8] @@ -85,7 +110,6 @@ ENTRY_CFI(memset) lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes lpnz .Lset32bytes ;; LOOP START - prefetchw [r3, 32] ;Prefetch the next write location #ifdef CONFIG_ARC_HAS_LL64 std.ab r4, [r3, 8] std.ab r4, [r3, 8] From 377f080a20503684c4728ee14ea30002d310a06b Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 19 Dec 2018 19:16:16 +0300 Subject: [PATCH 1555/2608] ARC: adjust memblock_reserve of kernel memory commit a3010a0465383300f909f62b8a83f83ffa7b2517 upstream. In setup_arch_memory we reserve the memory area wherein the kernel is located. Current implementation may reserve more memory than it actually required in case of CONFIG_LINUX_LINK_BASE is not equal to CONFIG_LINUX_RAM_BASE. This happens because we calculate start of the reserved region relatively to the CONFIG_LINUX_RAM_BASE and end of the region relatively to the CONFIG_LINUX_RAM_BASE. For example in case of HSDK board we wasted 256MiB of physical memory: ------------------->8------------------------------ Memory: 770416K/1048576K available (5496K kernel code, 240K rwdata, 1064K rodata, 2200K init, 275K bss, 278160K reserved, 0K cma-reserved) ------------------->8------------------------------ Fix that. Fixes: 9ed68785f7f2b ("ARC: mm: Decouple RAM base address from kernel link addr") Cc: stable@vger.kernel.org #4.14+ Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/mm/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index ba145065c579..f890b2f9f82f 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -138,7 +138,8 @@ void __init setup_arch_memory(void) */ memblock_add_node(low_mem_start, low_mem_sz, 0); - memblock_reserve(low_mem_start, __pa(_end) - low_mem_start); + memblock_reserve(CONFIG_LINUX_LINK_BASE, + __pa(_end) - CONFIG_LINUX_LINK_BASE); #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) From 04af9475e2e6732ffc8a534ed53f72e261b952a0 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 17 Dec 2018 12:54:23 +0300 Subject: [PATCH 1556/2608] ARC: perf: map generic branches to correct hardware condition commit 3affbf0e154ee351add6fcc254c59c3f3947fa8f upstream. So far we've mapped branches to "ijmp" which also counts conditional branches NOT taken. This makes us different from other architectures such as ARM which seem to be counting only taken branches. So use "ijmptak" hardware condition which only counts (all jump instructions that are taken) 'ijmptak' event is available on both ARCompact and ARCv2 ISA based cores. Signed-off-by: Eugeniy Paltsev Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta [vgupta: reworked changelog] Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index 9185541035cc..6958545390f0 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h @@ -103,7 +103,8 @@ static const char * const arc_pmu_ev_hw_map[] = { /* counts condition */ [PERF_COUNT_HW_INSTRUCTIONS] = "iall", - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", /* Excludes ZOL jumps */ + /* All jump instructions that are taken */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak", [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */ #ifdef CONFIG_ISA_ARCV2 [PERF_COUNT_HW_BRANCH_MISSES] = "bpmp", From bd9759a4a4e264988dd54864a54710d5cf98a3b6 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 9 Nov 2018 09:21:47 +0100 Subject: [PATCH 1557/2608] s390/early: improve machine detection commit 03aa047ef2db4985e444af6ee1c1dd084ad9fb4c upstream. Right now the early machine detection code check stsi 3.2.2 for "KVM" and set MACHINE_IS_VM if this is different. As the console detection uses diagnose 8 if MACHINE_IS_VM returns true this will crash Linux early for any non z/VM system that sets a different value than KVM. So instead of assuming z/VM, do not set any of MACHINE_IS_LPAR, MACHINE_IS_VM, or MACHINE_IS_KVM. CC: stable@vger.kernel.org Reviewed-by: Heiko Carstens Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/early.c | 4 ++-- arch/s390/kernel/setup.c | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index a3219837fa70..4ba5ad44a21a 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -226,10 +226,10 @@ static noinline __init void detect_machine_type(void) if (stsi(vmms, 3, 2, 2) || !vmms->count) return; - /* Running under KVM? If not we assume z/VM */ + /* Detect known hypervisors */ if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) S390_lowcore.machine_flags |= MACHINE_FLAG_KVM; - else + else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4)) S390_lowcore.machine_flags |= MACHINE_FLAG_VM; } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 98c1f7941142..3cb71fc94995 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -884,6 +884,8 @@ void __init setup_arch(char **cmdline_p) pr_info("Linux is running under KVM in 64-bit mode\n"); else if (MACHINE_IS_LPAR) pr_info("Linux is running natively in 64-bit mode\n"); + else + pr_info("Linux is running as a guest in 64-bit mode\n"); /* Have one command line that is parsed and saved in /proc/cmdline */ /* boot_command_line has been already set up in early.c */ From 826ea4c10833b734b9a403b2bd1d857a58cf8fb3 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 9 Jan 2019 13:00:03 +0100 Subject: [PATCH 1558/2608] s390/smp: fix CPU hotplug deadlock with CPU rescan commit b7cb707c373094ce4008d4a6ac9b6b366ec52da5 upstream. smp_rescan_cpus() is called without the device_hotplug_lock, which can lead to a dedlock when a new CPU is found and immediately set online by a udev rule. This was observed on an older kernel version, where the cpu_hotplug_begin() loop was still present, and it resulted in hanging chcpu and systemd-udev processes. This specific deadlock will not show on current kernels. However, there may be other possible deadlocks, and since smp_rescan_cpus() can still trigger a CPU hotplug operation, the device_hotplug_lock should be held. For reference, this was the deadlock with the old cpu_hotplug_begin() loop: chcpu (rescan) systemd-udevd echo 1 > /sys/../rescan -> smp_rescan_cpus() -> (*) get_online_cpus() (increases refcount) -> smp_add_present_cpu() (new CPU found) -> register_cpu() -> device_add() -> udev "add" event triggered -----------> udev rule sets CPU online -> echo 1 > /sys/.../online -> lock_device_hotplug_sysfs() (this is missing in rescan path) -> device_online() -> (**) device_lock(new CPU dev) -> cpu_up() -> cpu_hotplug_begin() (loops until refcount == 0) -> deadlock with (*) -> bus_probe_device() -> device_attach() -> device_lock(new CPU dev) -> deadlock with (**) Fix this by taking the device_hotplug_lock in the CPU rescan path. Cc: Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/smp.c | 4 ++++ drivers/s390/char/sclp_config.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index ae5df4177803..1734cfc84cf1 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1168,7 +1168,11 @@ static ssize_t __ref rescan_store(struct device *dev, { int rc; + rc = lock_device_hotplug_sysfs(); + if (rc) + return rc; rc = smp_rescan_cpus(); + unlock_device_hotplug(); return rc ? rc : count; } static DEVICE_ATTR(rescan, 0200, NULL, rescan_store); diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index 194ffd5c8580..039b2074db7e 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -60,7 +60,9 @@ static void sclp_cpu_capability_notify(struct work_struct *work) static void __ref sclp_cpu_change_notify(struct work_struct *work) { + lock_device_hotplug(); smp_rescan_cpus(); + unlock_device_hotplug(); } static void sclp_conf_receiver_fn(struct evbuf_header *evbuf) From 4be809db216f72010396bf01a7cd7b7ee835dfd1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 9 Jan 2019 13:02:36 -0600 Subject: [PATCH 1559/2608] char/mwave: fix potential Spectre v1 vulnerability commit 701956d4018e5d5438570e39e8bda47edd32c489 upstream. ipcnum is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/char/mwave/mwavedd.c:299 mwave_ioctl() warn: potential spectre issue 'pDrvData->IPCs' [w] (local cap) Fix this by sanitizing ipcnum before using it to index pDrvData->IPCs. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman --- drivers/char/mwave/mwavedd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c index b5e3103c1175..e43c876a9223 100644 --- a/drivers/char/mwave/mwavedd.c +++ b/drivers/char/mwave/mwavedd.c @@ -59,6 +59,7 @@ #include #include #include +#include #include "smapi.h" #include "mwavedd.h" #include "3780i.h" @@ -289,6 +290,8 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, ipcnum); return -EINVAL; } + ipcnum = array_index_nospec(ipcnum, + ARRAY_SIZE(pDrvData->IPCs)); PRINTK_3(TRACE_MWAVE, "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC" " ipcnum %x entry usIntCount %x\n", @@ -317,6 +320,8 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, " Invalid ipcnum %x\n", ipcnum); return -EINVAL; } + ipcnum = array_index_nospec(ipcnum, + ARRAY_SIZE(pDrvData->IPCs)); PRINTK_3(TRACE_MWAVE, "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC" " ipcnum %x, usIntCount %x\n", @@ -383,6 +388,8 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, ipcnum); return -EINVAL; } + ipcnum = array_index_nospec(ipcnum, + ARRAY_SIZE(pDrvData->IPCs)); mutex_lock(&mwave_mutex); if (pDrvData->IPCs[ipcnum].bIsEnabled == true) { pDrvData->IPCs[ipcnum].bIsEnabled = false; From 5e405657e13a69c260c532f7ecd02e975d49e6fc Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Mon, 7 Jan 2019 18:28:58 +0100 Subject: [PATCH 1560/2608] staging: rtl8188eu: Add device code for D-Link DWA-121 rev B1 commit 5f74a8cbb38d10615ed46bc3e37d9a4c9af8045a upstream. This device was added to the stand-alone driver on github. Add it to the staging driver as well. Link: https://github.com/lwfinger/rtl8188eu/commit/a0619a07cd1e Signed-off-by: Michael Straube Acked-by: Larry Finger Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/usb_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index 32c7225a831e..2fc7056cbff7 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -43,6 +43,7 @@ static const struct usb_device_id rtw_usb_id_tbl[] = { {USB_DEVICE(0x2001, 0x330F)}, /* DLink DWA-125 REV D1 */ {USB_DEVICE(0x2001, 0x3310)}, /* Dlink DWA-123 REV D1 */ {USB_DEVICE(0x2001, 0x3311)}, /* DLink GO-USB-N150 REV B1 */ + {USB_DEVICE(0x2001, 0x331B)}, /* D-Link DWA-121 rev B1 */ {USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */ {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */ From a358f0becce04eed99eb50111f4cc22b25792de8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 20 Jan 2019 10:46:58 +0100 Subject: [PATCH 1561/2608] tty: Handle problem if line discipline does not have receive_buf commit 27cfb3a53be46a54ec5e0bd04e51995b74c90343 upstream. Some tty line disciplines do not have a receive buf callback, so properly check for that before calling it. If they do not have this callback, just eat the character quietly, as we can't fail this call. Reported-by: Jann Horn Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 417b81c67fe9..7e351d205393 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2180,7 +2180,8 @@ static int tiocsti(struct tty_struct *tty, char __user *p) ld = tty_ldisc_ref_wait(tty); if (!ld) return -EIO; - ld->ops->receive_buf(tty, &ch, &mbz, 1); + if (ld->ops->receive_buf) + ld->ops->receive_buf(tty, &ch, &mbz, 1); tty_ldisc_deref(ld); return 0; } From be9497af163acd7587920417dfb35ec7e9acda3a Mon Sep 17 00:00:00 2001 From: Samir Virmani Date: Wed, 16 Jan 2019 10:28:07 -0800 Subject: [PATCH 1562/2608] uart: Fix crash in uart_write and uart_put_char commit aff9cf5955185d1f183227e46c5f8673fa483813 upstream. We were experiencing a crash similar to the one reported as part of commit:a5ba1d95e46e ("uart: fix race between uart_put_char() and uart_shutdown()") in our testbed as well. We continue to observe the same crash after integrating the commit a5ba1d95e46e ("uart: fix race between uart_put_char() and uart_shutdown()") On reviewing the change, the port lock should be taken prior to checking for if (!circ->buf) in fn. __uart_put_char and other fns. that update the buffer uart_state->xmit. Traceback: [11/27/2018 06:24:32.4870] Unable to handle kernel NULL pointer dereference at virtual address 0000003b [11/27/2018 06:24:32.4950] PC is at memcpy+0x48/0x180 [11/27/2018 06:24:32.4950] LR is at uart_write+0x74/0x120 [11/27/2018 06:24:32.4950] pc : [] lr : [] pstate: 000001c5 [11/27/2018 06:24:32.4950] sp : ffffffc076433d30 [11/27/2018 06:24:32.4950] x29: ffffffc076433d30 x28: 0000000000000140 [11/27/2018 06:24:32.4950] x27: ffffffc0009b9d5e x26: ffffffc07ce36580 [11/27/2018 06:24:32.4950] x25: 0000000000000000 x24: 0000000000000140 [11/27/2018 06:24:32.4950] x23: ffffffc000891200 x22: ffffffc01fc34000 [11/27/2018 06:24:32.4950] x21: 0000000000000fff x20: 0000000000000076 [11/27/2018 06:24:32.4950] x19: 0000000000000076 x18: 0000000000000000 [11/27/2018 06:24:32.4950] x17: 000000000047cf08 x16: ffffffc000099e68 [11/27/2018 06:24:32.4950] x15: 0000000000000018 x14: 776d726966205948 [11/27/2018 06:24:32.4950] x13: 50203a6c6974755f x12: 74647075205d3333 [11/27/2018 06:24:32.4950] x11: 3a35323a36203831 x10: 30322f37322f3131 [11/27/2018 06:24:32.4950] x9 : 5b205d303638342e x8 : 746164206f742070 [11/27/2018 06:24:32.4950] x7 : 7520736920657261 x6 : 000000000000003b [11/27/2018 06:24:32.4950] x5 : 000000000000817a x4 : 0000000000000008 [11/27/2018 06:24:32.4950] x3 : 2f37322f31312a5b x2 : 000000000000006e [11/27/2018 06:24:32.4950] x1 : ffffffc0009b9cf0 x0 : 000000000000003b [11/27/2018 06:24:32.4950] CPU2: stopping [11/27/2018 06:24:32.4950] CPU: 2 PID: 0 Comm: swapper/2 Tainted: P D O 4.1.51 #3 [11/27/2018 06:24:32.4950] Hardware name: Broadcom-v8A (DT) [11/27/2018 06:24:32.4950] Call trace: [11/27/2018 06:24:32.4950] [] dump_backtrace+0x0/0x150 [11/27/2018 06:24:32.4950] [] show_stack+0x14/0x20 [11/27/2018 06:24:32.4950] [] dump_stack+0x90/0xb0 [11/27/2018 06:24:32.4950] [] handle_IPI+0x18c/0x1a0 [11/27/2018 06:24:32.4950] [] gic_handle_irq+0x88/0x90 Fixes: a5ba1d95e46e ("uart: fix race between uart_put_char() and uart_shutdown()") Cc: stable Signed-off-by: Samir Virmani Acked-by: Tycho Andersen Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 543d0f95f094..94ac6c6e8fb8 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -563,10 +563,12 @@ static int uart_put_char(struct tty_struct *tty, unsigned char c) int ret = 0; circ = &state->xmit; - if (!circ->buf) - return 0; - port = uart_port_lock(state, flags); + if (!circ->buf) { + uart_port_unlock(port, flags); + return 0; + } + if (port && uart_circ_chars_free(circ) != 0) { circ->buf[circ->head] = c; circ->head = (circ->head + 1) & (UART_XMIT_SIZE - 1); @@ -599,11 +601,13 @@ static int uart_write(struct tty_struct *tty, return -EL3HLT; } - circ = &state->xmit; - if (!circ->buf) - return 0; - port = uart_port_lock(state, flags); + circ = &state->xmit; + if (!circ->buf) { + uart_port_unlock(port, flags); + return 0; + } + while (port) { c = CIRC_SPACE_TO_END(circ->head, circ->tail, UART_XMIT_SIZE); if (count < c) From d9231608a7da07f115c55a3d05eca60bcf45f52b Mon Sep 17 00:00:00 2001 From: Paul Fulghum Date: Tue, 1 Jan 2019 12:28:53 -0800 Subject: [PATCH 1563/2608] tty/n_hdlc: fix __might_sleep warning commit fc01d8c61ce02c034e67378cd3e645734bc18c8c upstream. Fix __might_sleep warning[1] in tty/n_hdlc.c read due to copy_to_user call while current is TASK_INTERRUPTIBLE. This is a false positive since the code path does not depend on current state remaining TASK_INTERRUPTIBLE. The loop breaks out and sets TASK_RUNNING after calling copy_to_user. This patch supresses the warning by setting TASK_RUNNING before calling copy_to_user. [1] https://syzkaller.appspot.com/bug?id=17d5de7f1fcab794cb8c40032f893f52de899324 Signed-off-by: Paul Fulghum Reported-by: syzbot Cc: Tetsuo Handa Cc: Alan Cox Cc: stable Acked-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_hdlc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index 7b2a466616d6..08bd6b965847 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -598,6 +598,7 @@ static ssize_t n_hdlc_tty_read(struct tty_struct *tty, struct file *file, /* too large for caller's buffer */ ret = -EOVERFLOW; } else { + __set_current_state(TASK_RUNNING); if (copy_to_user(buf, rbuf->buf, rbuf->count)) ret = -EFAULT; else From a28dc8a5686a1641934f66b2a09c68a804810fe5 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 4 Jan 2019 15:19:42 +0100 Subject: [PATCH 1564/2608] hv_balloon: avoid touching uninitialized struct page during tail onlining commit da8ced360ca8ad72d8f41f5c8fcd5b0e63e1555f upstream. Hyper-V memory hotplug protocol has 2M granularity and in Linux x86 we use 128M. To deal with it we implement partial section onlining by registering custom page onlining callback (hv_online_page()). Later, when more memory arrives we try to online the 'tail' (see hv_bring_pgs_online()). It was found that in some cases this 'tail' onlining causes issues: BUG: Bad page state in process kworker/0:2 pfn:109e3a page:ffffe08344278e80 count:0 mapcount:1 mapping:0000000000000000 index:0x0 flags: 0xfffff80000000() raw: 000fffff80000000 dead000000000100 dead000000000200 0000000000000000 raw: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 page dumped because: nonzero mapcount ... Workqueue: events hot_add_req [hv_balloon] Call Trace: dump_stack+0x5c/0x80 bad_page.cold.112+0x7f/0xb2 free_pcppages_bulk+0x4b8/0x690 free_unref_page+0x54/0x70 hv_page_online_one+0x5c/0x80 [hv_balloon] hot_add_req.cold.24+0x182/0x835 [hv_balloon] ... Turns out that we now have deferred struct page initialization for memory hotplug so e.g. memory_block_action() in drivers/base/memory.c does pages_correctly_probed() check and in that check it avoids inspecting struct pages and checks sections instead. But in Hyper-V balloon driver we do PageReserved(pfn_to_page()) check and this is now wrong. Switch to checking online_section_nr() instead. Signed-off-by: Vitaly Kuznetsov Cc: stable@kernel.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_balloon.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index db0e6652d7ef..0824405f93fb 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -846,12 +846,14 @@ static unsigned long handle_pg_range(unsigned long pg_start, pfn_cnt -= pgs_ol; /* * Check if the corresponding memory block is already - * online by checking its last previously backed page. - * In case it is we need to bring rest (which was not - * backed previously) online too. + * online. It is possible to observe struct pages still + * being uninitialized here so check section instead. + * In case the section is online we need to bring the + * rest of pfns (which were not backed previously) + * online too. */ if (start_pfn > has->start_pfn && - !PageReserved(pfn_to_page(start_pfn - 1))) + online_section_nr(pfn_to_section_nr(start_pfn))) hv_bring_pgs_online(has, start_pfn, pgs_ol); } From be0cb9e226b7c594070eb64ef8c18f3793f63976 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 17 Dec 2018 20:16:09 +0000 Subject: [PATCH 1565/2608] Drivers: hv: vmbus: Check for ring when getting debug info commit ba50bf1ce9a51fc97db58b96d01306aa70bc3979 upstream. fc96df16a1ce is good and can already fix the "return stack garbage" issue, but let's also improve hv_ringbuffer_get_debuginfo(), which would silently return stack garbage, if people forget to check channel->state or ring_info->ring_buffer, when using the function in the future. Having an error check in the function would eliminate the potential risk. Add a Fixes tag to indicate the patch depdendency. Fixes: fc96df16a1ce ("Drivers: hv: vmbus: Return -EINVAL for the sys files for unopened channels") Cc: stable@vger.kernel.org Cc: K. Y. Srinivasan Cc: Haiyang Zhang Signed-off-by: Stephen Hemminger Signed-off-by: Dexuan Cui Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hv/ring_buffer.c | 29 +++++++------ drivers/hv/vmbus_drv.c | 91 +++++++++++++++++++++++++++------------- include/linux/hyperv.h | 5 ++- 3 files changed, 78 insertions(+), 47 deletions(-) diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 3f8dde8d59ba..74c1dfb8183b 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -141,26 +141,25 @@ static u32 hv_copyto_ringbuffer( } /* Get various debug metrics for the specified ring buffer. */ -void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, - struct hv_ring_buffer_debug_info *debug_info) +int hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, + struct hv_ring_buffer_debug_info *debug_info) { u32 bytes_avail_towrite; u32 bytes_avail_toread; - if (ring_info->ring_buffer) { - hv_get_ringbuffer_availbytes(ring_info, - &bytes_avail_toread, - &bytes_avail_towrite); + if (!ring_info->ring_buffer) + return -EINVAL; - debug_info->bytes_avail_toread = bytes_avail_toread; - debug_info->bytes_avail_towrite = bytes_avail_towrite; - debug_info->current_read_index = - ring_info->ring_buffer->read_index; - debug_info->current_write_index = - ring_info->ring_buffer->write_index; - debug_info->current_interrupt_mask = - ring_info->ring_buffer->interrupt_mask; - } + hv_get_ringbuffer_availbytes(ring_info, + &bytes_avail_toread, + &bytes_avail_towrite); + debug_info->bytes_avail_toread = bytes_avail_toread; + debug_info->bytes_avail_towrite = bytes_avail_towrite; + debug_info->current_read_index = ring_info->ring_buffer->read_index; + debug_info->current_write_index = ring_info->ring_buffer->write_index; + debug_info->current_interrupt_mask + = ring_info->ring_buffer->interrupt_mask; + return 0; } EXPORT_SYMBOL_GPL(hv_ringbuffer_get_debuginfo); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 4218a616f1d3..1fd812ed679b 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -297,12 +297,16 @@ static ssize_t out_intr_mask_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", outbound.current_interrupt_mask); } static DEVICE_ATTR_RO(out_intr_mask); @@ -312,12 +316,15 @@ static ssize_t out_read_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.current_read_index); } static DEVICE_ATTR_RO(out_read_index); @@ -328,12 +335,15 @@ static ssize_t out_write_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.current_write_index); } static DEVICE_ATTR_RO(out_write_index); @@ -344,12 +354,15 @@ static ssize_t out_read_bytes_avail_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.bytes_avail_toread); } static DEVICE_ATTR_RO(out_read_bytes_avail); @@ -360,12 +373,15 @@ static ssize_t out_write_bytes_avail_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); } static DEVICE_ATTR_RO(out_write_bytes_avail); @@ -375,12 +391,15 @@ static ssize_t in_intr_mask_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.current_interrupt_mask); } static DEVICE_ATTR_RO(in_intr_mask); @@ -390,12 +409,15 @@ static ssize_t in_read_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.current_read_index); } static DEVICE_ATTR_RO(in_read_index); @@ -405,12 +427,15 @@ static ssize_t in_write_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.current_write_index); } static DEVICE_ATTR_RO(in_write_index); @@ -421,12 +446,15 @@ static ssize_t in_read_bytes_avail_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.bytes_avail_toread); } static DEVICE_ATTR_RO(in_read_bytes_avail); @@ -437,12 +465,15 @@ static ssize_t in_write_bytes_avail_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); } static DEVICE_ATTR_RO(in_write_bytes_avail); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index d1324d3c72b0..8d3ca6da3342 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1130,8 +1130,9 @@ struct hv_ring_buffer_debug_info { u32 bytes_avail_towrite; }; -void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, - struct hv_ring_buffer_debug_info *debug_info); + +int hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, + struct hv_ring_buffer_debug_info *debug_info); /* Vmbus interface */ #define vmbus_driver_register(driver) \ From 8a9c970fabb704846e527fdd4eeca0cab3a91137 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 17 Jan 2019 08:21:24 -0800 Subject: [PATCH 1566/2608] CIFS: Fix possible hang during async MTU reads and writes commit acc58d0bab55a50e02c25f00bd6a210ee121595f upstream. When doing MTU i/o we need to leave some credits for possible reopen requests and other operations happening in parallel. Currently we leave 1 credit which is not enough even for reopen only: we need at least 2 credits if durable handle reconnect fails. Also there may be other operations at the same time including compounding ones which require 3 credits at a time each. Fix this by leaving 8 credits which is big enough to cover most scenarios. Was able to reproduce this when server was configured to give out fewer credits than usual. The proper fix would be to reconnect a file handle first and then obtain credits for an MTU request but this leads to bigger code changes and should happen in other patches. Cc: Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 3372eedaa94d..d4360404c62b 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -153,14 +153,14 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, scredits = server->credits; /* can deadlock with reopen */ - if (scredits == 1) { + if (scredits <= 8) { *num = SMB2_MAX_BUFFER_SIZE; *credits = 0; break; } - /* leave one credit for a possible reopen */ - scredits--; + /* leave some credits for reopen and other ops */ + scredits -= 8; *num = min_t(unsigned int, size, scredits * SMB2_MAX_BUFFER_SIZE); From 482592b71d257f4e674505af72d8e23405a64478 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 17 Jan 2019 15:29:26 -0800 Subject: [PATCH 1567/2608] CIFS: Fix credits calculations for reads with errors commit 8004c78c68e894e4fd5ac3c22cc22eb7dc24cabc upstream. Currently we mark MID as malformed if we get an error from server in a read response. This leads to not properly processing credits in the readv callback. Fix this by marking such a response as normal received response and process it appropriately. Cc: Signed-off-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifssmb.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 2e936f94f102..905d0fa1a1cc 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1445,18 +1445,26 @@ cifs_discard_remaining_data(struct TCP_Server_Info *server) } static int -cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) +__cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid, + bool malformed) { int length; - struct cifs_readdata *rdata = mid->callback_data; length = cifs_discard_remaining_data(server); - dequeue_mid(mid, rdata->result); + dequeue_mid(mid, malformed); mid->resp_buf = server->smallbuf; server->smallbuf = NULL; return length; } +static int +cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) +{ + struct cifs_readdata *rdata = mid->callback_data; + + return __cifs_readv_discard(server, mid, rdata->result); +} + int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) { @@ -1496,12 +1504,23 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) return -1; } + /* set up first two iov for signature check and to get credits */ + rdata->iov[0].iov_base = buf; + rdata->iov[0].iov_len = 4; + rdata->iov[1].iov_base = buf + 4; + rdata->iov[1].iov_len = server->total_read - 4; + cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", + rdata->iov[0].iov_base, rdata->iov[0].iov_len); + cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n", + rdata->iov[1].iov_base, rdata->iov[1].iov_len); + /* Was the SMB read successful? */ rdata->result = server->ops->map_error(buf, false); if (rdata->result != 0) { cifs_dbg(FYI, "%s: server returned error %d\n", __func__, rdata->result); - return cifs_readv_discard(server, mid); + /* normal error on read response */ + return __cifs_readv_discard(server, mid, false); } /* Is there enough to get to the rest of the READ_RSP header? */ @@ -1544,14 +1563,6 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) server->total_read += length; } - /* set up first iov for signature check */ - rdata->iov[0].iov_base = buf; - rdata->iov[0].iov_len = 4; - rdata->iov[1].iov_base = buf + 4; - rdata->iov[1].iov_len = server->total_read - 4; - cifs_dbg(FYI, "0: iov_base=%p iov_len=%u\n", - rdata->iov[0].iov_base, server->total_read); - /* how much data is in the response? */ data_len = server->ops->read_data_length(buf); if (data_offset + data_len > buflen) { From 1c69ba8b1453eb1c9f77536433616dab0a147c65 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 18 Jan 2019 15:38:11 -0800 Subject: [PATCH 1568/2608] CIFS: Fix credit calculation for encrypted reads with errors commit ec678eae746dd25766a61c4095e2b649d3b20b09 upstream. We do need to account for credits received in error responses to read requests on encrypted sessions. Cc: Signed-off-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index d4360404c62b..05548379fbb7 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2531,11 +2531,23 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, server->ops->is_status_pending(buf, server, 0)) return -1; - rdata->result = server->ops->map_error(buf, false); + /* set up first two iov to get credits */ + rdata->iov[0].iov_base = buf; + rdata->iov[0].iov_len = 4; + rdata->iov[1].iov_base = buf + 4; + rdata->iov[1].iov_len = + min_t(unsigned int, buf_len, server->vals->read_rsp_size) - 4; + cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", + rdata->iov[0].iov_base, rdata->iov[0].iov_len); + cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n", + rdata->iov[1].iov_base, rdata->iov[1].iov_len); + + rdata->result = server->ops->map_error(buf, true); if (rdata->result != 0) { cifs_dbg(FYI, "%s: server returned error %d\n", __func__, rdata->result); - dequeue_mid(mid, rdata->result); + /* normal error on read response */ + dequeue_mid(mid, false); return 0; } @@ -2605,14 +2617,6 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, return 0; } - /* set up first iov for signature check */ - rdata->iov[0].iov_base = buf; - rdata->iov[0].iov_len = 4; - rdata->iov[1].iov_base = buf + 4; - rdata->iov[1].iov_len = server->vals->read_rsp_size - 4; - cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", - rdata->iov[0].iov_base, server->vals->read_rsp_size); - length = rdata->copy_into_pages(server, rdata, &iter); kfree(bvec); From 2cffcebe17439fab6d2d54d754054cd3da547f09 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 18 Jan 2019 17:25:36 -0800 Subject: [PATCH 1569/2608] CIFS: Do not reconnect TCP session in add_credits() commit ef68e831840c40c7d01b328b3c0f5d8c4796c232 upstream. When executing add_credits() we currently call cifs_reconnect() if the number of credits is zero and there are no requests in flight. In this case we may call cifs_reconnect() recursively twice and cause memory corruption given the following sequence of functions: mid1.callback() -> add_credits() -> cifs_reconnect() -> -> mid2.callback() -> add_credits() -> cifs_reconnect(). Fix this by avoiding to call cifs_reconnect() in add_credits() and checking for zero credits in the demultiplex thread. Cc: Signed-off-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/connect.c | 21 +++++++++++++++++++++ fs/cifs/smb2ops.c | 32 +++++++++++++++++++++++++------- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d6248137c219..000b7bfa8cf0 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -524,6 +524,21 @@ server_unresponsive(struct TCP_Server_Info *server) return false; } +static inline bool +zero_credits(struct TCP_Server_Info *server) +{ + int val; + + spin_lock(&server->req_lock); + val = server->credits + server->echo_credits + server->oplock_credits; + if (server->in_flight == 0 && val == 0) { + spin_unlock(&server->req_lock); + return true; + } + spin_unlock(&server->req_lock); + return false; +} + static int cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg) { @@ -536,6 +551,12 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg) for (total_read = 0; msg_data_left(smb_msg); total_read += length) { try_to_freeze(); + /* reconnect if no credits and no requests in flight */ + if (zero_credits(server)) { + cifs_reconnect(server); + return -ECONNABORTED; + } + if (server_unresponsive(server)) return -ECONNABORTED; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 05548379fbb7..fb1c65f93114 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -33,6 +33,7 @@ #include "smb2glob.h" #include "cifs_ioctl.h" +/* Change credits for different ops and return the total number of credits */ static int change_conf(struct TCP_Server_Info *server) { @@ -40,17 +41,15 @@ change_conf(struct TCP_Server_Info *server) server->oplock_credits = server->echo_credits = 0; switch (server->credits) { case 0: - return -1; + return 0; case 1: server->echoes = false; server->oplocks = false; - cifs_dbg(VFS, "disabling echoes and oplocks\n"); break; case 2: server->echoes = true; server->oplocks = false; server->echo_credits = 1; - cifs_dbg(FYI, "disabling oplocks\n"); break; default: server->echoes = true; @@ -63,14 +62,15 @@ change_conf(struct TCP_Server_Info *server) server->echo_credits = 1; } server->credits -= server->echo_credits + server->oplock_credits; - return 0; + return server->credits + server->echo_credits + server->oplock_credits; } static void smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add, const int optype) { - int *val, rc = 0; + int *val, rc = -1; + spin_lock(&server->req_lock); val = server->ops->get_credits_field(server, optype); *val += add; @@ -94,8 +94,26 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add, } spin_unlock(&server->req_lock); wake_up(&server->request_q); - if (rc) - cifs_reconnect(server); + + if (server->tcpStatus == CifsNeedReconnect) + return; + + switch (rc) { + case -1: + /* change_conf hasn't been executed */ + break; + case 0: + cifs_dbg(VFS, "Possible client or server bug - zero credits\n"); + break; + case 1: + cifs_dbg(VFS, "disabling echoes and oplocks\n"); + break; + case 2: + cifs_dbg(FYI, "disabling oplocks\n"); + break; + default: + cifs_dbg(FYI, "add %u credits total=%d\n", add, rc); + } } static void From f2b63cc459ff1d65bca3aaed5b069a09319d592a Mon Sep 17 00:00:00 2001 From: Tom Panfil Date: Fri, 11 Jan 2019 17:49:40 -0800 Subject: [PATCH 1570/2608] Input: xpad - add support for SteelSeries Stratus Duo commit fe2bfd0d40c935763812973ce15f5764f1c12833 upstream. Add support for the SteelSeries Stratus Duo, a wireless Xbox 360 controller. The Stratus Duo ships with a USB dongle to enable wireless connectivity, but it can also function as a wired controller by connecting it directly to a PC via USB, hence the need for two USD PIDs. 0x1430 is the dongle, and 0x1431 is the controller. Signed-off-by: Tom Panfil Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index f55dcdf99bc5..26476a64e663 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -255,6 +255,8 @@ static const struct xpad_device { { 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX }, { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX }, { 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX }, + { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, + { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 }, { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x12ab, 0x0301, "PDP AFTERGLOW AX.1", 0, XTYPE_XBOX360 }, @@ -431,6 +433,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOXONE_VENDOR(0x0e6f), /* 0x0e6f X-Box One controllers */ XPAD_XBOX360_VENDOR(0x0f0d), /* Hori Controllers */ XPAD_XBOXONE_VENDOR(0x0f0d), /* Hori Controllers */ + XPAD_XBOX360_VENDOR(0x1038), /* SteelSeries Controllers */ XPAD_XBOX360_VENDOR(0x11c9), /* Nacon GC100XF */ XPAD_XBOX360_VENDOR(0x12ab), /* X-Box 360 dance pads */ XPAD_XBOX360_VENDOR(0x1430), /* RedOctane X-Box 360 controllers */ From d8c3df0641bcf9eb4e8ad4048fdf9ea575c79928 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 8 May 2018 00:36:27 +0200 Subject: [PATCH 1571/2608] compiler.h: enable builtin overflow checkers and add fallback code commit f0907827a8a9152aedac2833ed1b674a7b2a44f2 upstream. This adds wrappers for the __builtin overflow checkers present in gcc 5.1+ as well as fallback implementations for earlier compilers. It's not that easy to implement the fully generic __builtin_X_overflow(T1 a, T2 b, T3 *d) in macros, so the fallback code assumes that T1, T2 and T3 are the same. We obviously don't want the wrappers to have different semantics depending on $GCC_VERSION, so we also insist on that even when using the builtins. There are a few problems with the 'a+b < a' idiom for checking for overflow: For signed types, it relies on undefined behaviour and is not actually complete (it doesn't check underflow; e.g. INT_MIN+INT_MIN == 0 isn't caught). Due to type promotion it is wrong for all types (signed and unsigned) narrower than int. Similarly, when a and b does not have the same type, there are subtle cases like u32 a; if (a + sizeof(foo) < a) return -EOVERFLOW; a += sizeof(foo); where the test is always false on 64 bit platforms. Add to that that it is not always possible to determine the types involved at a glance. The new overflow.h is somewhat bulky, but that's mostly a result of trying to be type-generic, complete (e.g. catching not only overflow but also signed underflow) and not relying on undefined behaviour. Linus is of course right [1] that for unsigned subtraction a-b, the right way to check for overflow (underflow) is "b > a" and not "__builtin_sub_overflow(a, b, &d)", but that's just one out of six cases covered here, and included mostly for completeness. So is it worth it? I think it is, if nothing else for the documentation value of seeing if (check_add_overflow(a, b, &d)) return -EGOAWAY; do_stuff_with(d); instead of the open-coded (and possibly wrong and/or incomplete and/or UBsan-tickling) if (a+b < a) return -EGOAWAY; do_stuff_with(a+b); While gcc does recognize the 'a+b < a' idiom for testing unsigned add overflow, it doesn't do nearly as good for unsigned multiplication (there's also no single well-established idiom). So using check_mul_overflow in kcalloc and friends may also make gcc generate slightly better code. [1] https://lkml.org/lkml/2015/11/2/658 Signed-off-by: Rasmus Villemoes Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler-clang.h | 14 +++ include/linux/compiler-gcc.h | 4 + include/linux/compiler-intel.h | 4 + include/linux/overflow.h | 205 +++++++++++++++++++++++++++++++++ 4 files changed, 227 insertions(+) create mode 100644 include/linux/overflow.h diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 28b76f0894d4..673fa522a7ab 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -24,3 +24,17 @@ #ifdef __noretpoline #undef __noretpoline #endif + +/* + * Not all versions of clang implement the the type-generic versions + * of the builtin overflow checkers. Fortunately, clang implements + * __has_builtin allowing us to avoid awkward version + * checks. Unfortunately, we don't know which version of gcc clang + * pretends to be, so the macro may or may not be defined. + */ +#undef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW +#if __has_builtin(__builtin_mul_overflow) && \ + __has_builtin(__builtin_add_overflow) && \ + __has_builtin(__builtin_sub_overflow) +#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 +#endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 00b06d7efb83..4816355b9875 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -358,3 +358,7 @@ * code */ #define uninitialized_var(x) x = x + +#if GCC_VERSION >= 50100 +#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 +#endif diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index bfa08160db3a..547cdc920a3c 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -44,3 +44,7 @@ #define __builtin_bswap16 _bswap16 #endif +/* + * icc defines __GNUC__, but does not implement the builtin overflow checkers. + */ +#undef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW diff --git a/include/linux/overflow.h b/include/linux/overflow.h new file mode 100644 index 000000000000..c8890ec358a7 --- /dev/null +++ b/include/linux/overflow.h @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +#ifndef __LINUX_OVERFLOW_H +#define __LINUX_OVERFLOW_H + +#include + +/* + * In the fallback code below, we need to compute the minimum and + * maximum values representable in a given type. These macros may also + * be useful elsewhere, so we provide them outside the + * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block. + * + * It would seem more obvious to do something like + * + * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0) + * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0) + * + * Unfortunately, the middle expressions, strictly speaking, have + * undefined behaviour, and at least some versions of gcc warn about + * the type_max expression (but not if -fsanitize=undefined is in + * effect; in that case, the warning is deferred to runtime...). + * + * The slightly excessive casting in type_min is to make sure the + * macros also produce sensible values for the exotic type _Bool. [The + * overflow checkers only almost work for _Bool, but that's + * a-feature-not-a-bug, since people shouldn't be doing arithmetic on + * _Bools. Besides, the gcc builtins don't allow _Bool* as third + * argument.] + * + * Idea stolen from + * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html - + * credit to Christian Biere. + */ +#define is_signed_type(type) (((type)(-1)) < (type)1) +#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) +#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) +#define type_min(T) ((T)((T)-type_max(T)-(T)1)) + + +#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW +/* + * For simplicity and code hygiene, the fallback code below insists on + * a, b and *d having the same type (similar to the min() and max() + * macros), whereas gcc's type-generic overflow checkers accept + * different types. Hence we don't just make check_add_overflow an + * alias for __builtin_add_overflow, but add type checks similar to + * below. + */ +#define check_add_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + __builtin_add_overflow(__a, __b, __d); \ +}) + +#define check_sub_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + __builtin_sub_overflow(__a, __b, __d); \ +}) + +#define check_mul_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + __builtin_mul_overflow(__a, __b, __d); \ +}) + +#else + + +/* Checking for unsigned overflow is relatively easy without causing UB. */ +#define __unsigned_add_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = __a + __b; \ + *__d < __a; \ +}) +#define __unsigned_sub_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = __a - __b; \ + __a < __b; \ +}) +/* + * If one of a or b is a compile-time constant, this avoids a division. + */ +#define __unsigned_mul_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = __a * __b; \ + __builtin_constant_p(__b) ? \ + __b > 0 && __a > type_max(typeof(__a)) / __b : \ + __a > 0 && __b > type_max(typeof(__b)) / __a; \ +}) + +/* + * For signed types, detecting overflow is much harder, especially if + * we want to avoid UB. But the interface of these macros is such that + * we must provide a result in *d, and in fact we must produce the + * result promised by gcc's builtins, which is simply the possibly + * wrapped-around value. Fortunately, we can just formally do the + * operations in the widest relevant unsigned type (u64) and then + * truncate the result - gcc is smart enough to generate the same code + * with and without the (u64) casts. + */ + +/* + * Adding two signed integers can overflow only if they have the same + * sign, and overflow has happened iff the result has the opposite + * sign. + */ +#define __signed_add_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = (u64)__a + (u64)__b; \ + (((~(__a ^ __b)) & (*__d ^ __a)) \ + & type_min(typeof(__a))) != 0; \ +}) + +/* + * Subtraction is similar, except that overflow can now happen only + * when the signs are opposite. In this case, overflow has happened if + * the result has the opposite sign of a. + */ +#define __signed_sub_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = (u64)__a - (u64)__b; \ + ((((__a ^ __b)) & (*__d ^ __a)) \ + & type_min(typeof(__a))) != 0; \ +}) + +/* + * Signed multiplication is rather hard. gcc always follows C99, so + * division is truncated towards 0. This means that we can write the + * overflow check like this: + * + * (a > 0 && (b > MAX/a || b < MIN/a)) || + * (a < -1 && (b > MIN/a || b < MAX/a) || + * (a == -1 && b == MIN) + * + * The redundant casts of -1 are to silence an annoying -Wtype-limits + * (included in -Wextra) warning: When the type is u8 or u16, the + * __b_c_e in check_mul_overflow obviously selects + * __unsigned_mul_overflow, but unfortunately gcc still parses this + * code and warns about the limited range of __b. + */ + +#define __signed_mul_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + typeof(a) __tmax = type_max(typeof(a)); \ + typeof(a) __tmin = type_min(typeof(a)); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = (u64)__a * (u64)__b; \ + (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \ + (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \ + (__b == (typeof(__b))-1 && __a == __tmin); \ +}) + + +#define check_add_overflow(a, b, d) \ + __builtin_choose_expr(is_signed_type(typeof(a)), \ + __signed_add_overflow(a, b, d), \ + __unsigned_add_overflow(a, b, d)) + +#define check_sub_overflow(a, b, d) \ + __builtin_choose_expr(is_signed_type(typeof(a)), \ + __signed_sub_overflow(a, b, d), \ + __unsigned_sub_overflow(a, b, d)) + +#define check_mul_overflow(a, b, d) \ + __builtin_choose_expr(is_signed_type(typeof(a)), \ + __signed_mul_overflow(a, b, d), \ + __unsigned_mul_overflow(a, b, d)) + + +#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ + +#endif /* __LINUX_OVERFLOW_H */ From 28edadd147bfed92e3e16ed519ef098276935d94 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 14 Jan 2019 13:54:55 -0800 Subject: [PATCH 1572/2608] Input: uinput - fix undefined behavior in uinput_validate_absinfo() commit d77651a227f8920dd7ec179b84e400cce844eeb3 upstream. An integer overflow may arise in uinput_validate_absinfo() if "max - min" can't be represented by an "int". We should check for overflow before trying to use the result. Reported-by: Kyungtae Kim Reviewed-by: Peter Hutterer Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/uinput.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index 443151de90c6..8c95d3f78072 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include "../input-compat.h" @@ -356,7 +357,7 @@ static int uinput_open(struct inode *inode, struct file *file) static int uinput_validate_absinfo(struct input_dev *dev, unsigned int code, const struct input_absinfo *abs) { - int min, max; + int min, max, range; min = abs->minimum; max = abs->maximum; @@ -368,7 +369,7 @@ static int uinput_validate_absinfo(struct input_dev *dev, unsigned int code, return -EINVAL; } - if (abs->flat > max - min) { + if (!check_sub_overflow(max, min, &range) && abs->flat > range) { printk(KERN_DEBUG "%s: abs_flat #%02x out of range: %d (min:%d/max:%d)\n", UINPUT_NAME, code, abs->flat, min, max); From 8d93b82587a9463130f641d879e4fb5e636720d5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 14 Jan 2019 14:07:19 -0800 Subject: [PATCH 1573/2608] acpi/nfit: Block function zero DSMs commit 5e9e38d0db1d29efed1dd4cf9a70115d33521be7 upstream. In preparation for using function number 0 as an error value, prevent it from being considered a valid function value by acpi_nfit_ctl(). Cc: Cc: stuart hayes Fixes: e02fb7264d8a ("nfit: add Microsoft NVDIMM DSM command set...") Reported-by: Jeff Moyer Reviewed-by: Jeff Moyer Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 8260b90eb64b..abd1a4e8b5a6 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1503,6 +1503,13 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, return 0; } + /* + * Function 0 is the command interrogation function, don't + * export it to potential userspace use, and enable it to be + * used as an error value in acpi_nfit_ctl(). + */ + dsm_mask &= ~1UL; + guid = to_nfit_uuid(nfit_mem->family); for_each_set_bit(i, &dsm_mask, BITS_PER_LONG) if (acpi_check_dsm(adev_dimm->handle, guid, 1, 1ULL << i)) From 1c285c34a509ddfc83be425d8305a6d94d502208 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 19 Jan 2019 10:55:04 -0800 Subject: [PATCH 1574/2608] acpi/nfit: Fix command-supported detection commit 11189c1089da413aa4b5fd6be4c4d47c78968819 upstream. The _DSM function number validation only happens to succeed when the generic Linux command number translation corresponds with a DSM-family-specific function number. This breaks NVDIMM-N implementations that correctly implement _LSR, _LSW, and _LSI, but do not happen to publish support for DSM function numbers 4, 5, and 6. Recall that the support for _LS{I,R,W} family of methods results in the DIMM being marked as supporting those command numbers at acpi_nfit_register_dimms() time. The DSM function mask is only used for ND_CMD_CALL support of non-NVDIMM_FAMILY_INTEL devices. Fixes: 31eca76ba2fc ("nfit, libnvdimm: limited/whitelisted dimm command...") Cc: Link: https://github.com/pmem/ndctl/issues/78 Reported-by: Sujith Pandel Tested-by: Sujith Pandel Reviewed-by: Vishal Verma Reviewed-by: Jeff Moyer Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit/core.c | 54 +++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index abd1a4e8b5a6..4a6c5e7b6835 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -208,6 +208,32 @@ static int xlat_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd, return xlat_nvdimm_status(buf, cmd, status); } +static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd, + struct nd_cmd_pkg *call_pkg) +{ + if (call_pkg) { + int i; + + if (nfit_mem->family != call_pkg->nd_family) + return -ENOTTY; + + for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++) + if (call_pkg->nd_reserved2[i]) + return -EINVAL; + return call_pkg->nd_command; + } + + /* Linux ND commands == NVDIMM_FAMILY_INTEL function numbers */ + if (nfit_mem->family == NVDIMM_FAMILY_INTEL) + return cmd; + + /* + * Force function number validation to fail since 0 is never + * published as a valid function in dsm_mask. + */ + return 0; +} + int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) { @@ -220,21 +246,11 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned long cmd_mask, dsm_mask; u32 offset, fw_status = 0; acpi_handle handle; - unsigned int func; const guid_t *guid; - int rc, i; + int func, rc, i; if (cmd_rc) *cmd_rc = -EINVAL; - func = cmd; - if (cmd == ND_CMD_CALL) { - call_pkg = buf; - func = call_pkg->nd_command; - - for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++) - if (call_pkg->nd_reserved2[i]) - return -EINVAL; - } if (nvdimm) { struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); @@ -242,9 +258,12 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, if (!adev) return -ENOTTY; - if (call_pkg && nfit_mem->family != call_pkg->nd_family) - return -ENOTTY; + if (cmd == ND_CMD_CALL) + call_pkg = buf; + func = cmd_to_func(nfit_mem, cmd, call_pkg); + if (func < 0) + return func; dimm_name = nvdimm_name(nvdimm); cmd_name = nvdimm_cmd_name(cmd); cmd_mask = nvdimm_cmd_mask(nvdimm); @@ -255,6 +274,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, } else { struct acpi_device *adev = to_acpi_dev(acpi_desc); + func = cmd; cmd_name = nvdimm_bus_cmd_name(cmd); cmd_mask = nd_desc->cmd_mask; dsm_mask = cmd_mask; @@ -269,7 +289,13 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, if (!desc || (cmd && (desc->out_num + desc->in_num == 0))) return -ENOTTY; - if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &dsm_mask)) + /* + * Check for a valid command. For ND_CMD_CALL, we also have to + * make sure that the DSM function is supported. + */ + if (cmd == ND_CMD_CALL && !test_bit(func, &dsm_mask)) + return -ENOTTY; + else if (!test_bit(cmd, &cmd_mask)) return -ENOTTY; in_obj.type = ACPI_TYPE_PACKAGE; From 294710ddfc480aee277325cee8b54afa0c514e43 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 15 Jan 2019 13:27:01 -0500 Subject: [PATCH 1575/2608] dm thin: fix passdown_double_checking_shared_status() commit d445bd9cec1a850c2100fcf53684c13b3fd934f2 upstream. Commit 00a0ea33b495 ("dm thin: do not queue freed thin mapping for next stage processing") changed process_prepared_discard_passdown_pt1() to increment all the blocks being discarded until after the passdown had completed to avoid them being prematurely reused. IO issued to a thin device that breaks sharing with a snapshot, followed by a discard issued to snapshot(s) that previously shared the block(s), results in passdown_double_checking_shared_status() being called to iterate through the blocks double checking their reference count is zero and issuing the passdown if so. So a side effect of commit 00a0ea33b495 is passdown_double_checking_shared_status() was broken. Fix this by checking if the block reference count is greater than 1. Also, rename dm_pool_block_is_used() to dm_pool_block_is_shared(). Fixes: 00a0ea33b495 ("dm thin: do not queue freed thin mapping for next stage processing") Cc: stable@vger.kernel.org # 4.9+ Reported-by: ryan.p.norwood@gmail.com Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin-metadata.c | 4 ++-- drivers/md/dm-thin-metadata.h | 2 +- drivers/md/dm-thin.c | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 45ff8fd00248..b85a66f42814 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1687,7 +1687,7 @@ int dm_thin_remove_range(struct dm_thin_device *td, return r; } -int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result) +int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result) { int r; uint32_t ref_count; @@ -1695,7 +1695,7 @@ int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *resu down_read(&pmd->root_lock); r = dm_sm_get_count(pmd->data_sm, b, &ref_count); if (!r) - *result = (ref_count != 0); + *result = (ref_count > 1); up_read(&pmd->root_lock); return r; diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 35e954ea20a9..f6be0d733c20 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -195,7 +195,7 @@ int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd, int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result); -int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result); +int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result); int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e); int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index da98fc7b995c..40b624d8255d 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1042,7 +1042,7 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m * passdown we have to check that these blocks are now unused. */ int r = 0; - bool used = true; + bool shared = true; struct thin_c *tc = m->tc; struct pool *pool = tc->pool; dm_block_t b = m->data_block, e, end = m->data_block + m->virt_end - m->virt_begin; @@ -1052,11 +1052,11 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m while (b != end) { /* find start of unmapped run */ for (; b < end; b++) { - r = dm_pool_block_is_used(pool->pmd, b, &used); + r = dm_pool_block_is_shared(pool->pmd, b, &shared); if (r) goto out; - if (!used) + if (!shared) break; } @@ -1065,11 +1065,11 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m /* find end of run */ for (e = b + 1; e != end; e++) { - r = dm_pool_block_is_used(pool->pmd, e, &used); + r = dm_pool_block_is_shared(pool->pmd, e, &shared); if (r) goto out; - if (used) + if (shared) break; } From 06fc0dbec672bc118b24c8ce92314a5931a0796f Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Wed, 9 Jan 2019 11:57:14 +0100 Subject: [PATCH 1576/2608] dm crypt: fix parsing of extended IV arguments commit 1856b9f7bcc8e9bdcccc360aabb56fbd4dd6c565 upstream. The dm-crypt cipher specification in a mapping table is defined as: cipher[:keycount]-chainmode-ivmode[:ivopts] or (new crypt API format): capi:cipher_api_spec-ivmode[:ivopts] For ESSIV, the parameter includes hash specification, for example: aes-cbc-essiv:sha256 The implementation expected that additional IV option to never include another dash '-' character. But, with SHA3, there are names like sha3-256; so the mapping table parser fails: dmsetup create test --table "0 8 crypt aes-cbc-essiv:sha3-256 9c1185a5c5e9fc54612808977ee8f5b9e 0 /dev/sdb 0" or (new crypt API format) dmsetup create test --table "0 8 crypt capi:cbc(aes)-essiv:sha3-256 9c1185a5c5e9fc54612808977ee8f5b9e 0 /dev/sdb 0" device-mapper: crypt: Ignoring unexpected additional cipher options device-mapper: table: 253:0: crypt: Error creating IV device-mapper: ioctl: error adding target to table Fix the dm-crypt constructor to ignore additional dash in IV options and also remove a bogus warning (that is ignored anyway). Cc: stable@vger.kernel.org # 4.12+ Signed-off-by: Milan Broz Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 2652ef68d58d..1f6d8b6be5c7 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2413,9 +2413,21 @@ static int crypt_ctr_cipher_new(struct dm_target *ti, char *cipher_in, char *key * capi:cipher_api_spec-iv:ivopts */ tmp = &cipher_in[strlen("capi:")]; - cipher_api = strsep(&tmp, "-"); - *ivmode = strsep(&tmp, ":"); - *ivopts = tmp; + + /* Separate IV options if present, it can contain another '-' in hash name */ + *ivopts = strrchr(tmp, ':'); + if (*ivopts) { + **ivopts = '\0'; + (*ivopts)++; + } + /* Parse IV mode */ + *ivmode = strrchr(tmp, '-'); + if (*ivmode) { + **ivmode = '\0'; + (*ivmode)++; + } + /* The rest is crypto API spec */ + cipher_api = tmp; if (*ivmode && !strcmp(*ivmode, "lmk")) cc->tfms_count = 64; @@ -2485,11 +2497,8 @@ static int crypt_ctr_cipher_old(struct dm_target *ti, char *cipher_in, char *key goto bad_mem; chainmode = strsep(&tmp, "-"); - *ivopts = strsep(&tmp, "-"); - *ivmode = strsep(&*ivopts, ":"); - - if (tmp) - DMWARN("Ignoring unexpected additional cipher options"); + *ivmode = strsep(&tmp, ":"); + *ivopts = tmp; /* * For compatibility with the original dm-crypt mapping format, if From e9728154f9d780cc6dc538ef277314c46834db4e Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Mon, 21 Jan 2019 15:48:40 +0300 Subject: [PATCH 1577/2608] KVM: x86: Fix single-step debugging commit 5cc244a20b86090c087073c124284381cdf47234 upstream. The single-step debugging of KVM guests on x86 is broken: if we run gdb 'stepi' command at the breakpoint when the guest interrupts are enabled, RIP always jumps to native_apic_mem_write(). Then other nasty effects follow. Long investigation showed that on Jun 7, 2017 the commit c8401dda2f0a00cd25c0 ("KVM: x86: fix singlestepping over syscall") introduced the kvm_run.debug corruption: kvm_vcpu_do_singlestep() can be called without X86_EFLAGS_TF set. Let's fix it. Please consider that for -stable. Signed-off-by: Alexander Popov Cc: stable@vger.kernel.org Fixes: c8401dda2f0a00cd25c0 ("KVM: x86: fix singlestepping over syscall") Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 130be2efafbe..fb53634f00dd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5923,8 +5923,7 @@ restart: toggle_interruptibility(vcpu, ctxt->interruptibility); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; kvm_rip_write(vcpu, ctxt->eip); - if (r == EMULATE_DONE && - (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) + if (r == EMULATE_DONE && ctxt->tf) kvm_vcpu_do_singlestep(vcpu, &r); if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) From 92296f7ddac6c3bfee6fb952b6f776291ef4df48 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 2 Jan 2019 13:56:55 -0800 Subject: [PATCH 1578/2608] x86/pkeys: Properly copy pkey state at fork() commit a31e184e4f69965c99c04cc5eb8a4920e0c63737 upstream. Memory protection key behavior should be the same in a child as it was in the parent before a fork. But, there is a bug that resets the state in the child at fork instead of preserving it. The creation of new mm's is a bit convoluted. At fork(), the code does: 1. memcpy() the parent mm to initialize child 2. mm_init() to initalize some select stuff stuff 3. dup_mmap() to create true copies that memcpy() did not do right For pkeys two bits of state need to be preserved across a fork: 'execute_only_pkey' and 'pkey_allocation_map'. Those are preserved by the memcpy(), but mm_init() invokes init_new_context() which overwrites 'execute_only_pkey' and 'pkey_allocation_map' with "new" values. The author of the code erroneously believed that init_new_context is *only* called at execve()-time. But, alas, init_new_context() is used at execve() and fork(). The result is that, after a fork(), the child's pkey state ends up looking like it does after an execve(), which is totally wrong. pkeys that are already allocated can be allocated again, for instance. To fix this, add code called by dup_mmap() to copy the pkey state from parent to child explicitly. Also add a comment above init_new_context() to make it more clear to the next poor sod what this code is used for. Fixes: e8c24d3a23a ("x86/pkeys: Allocation/free syscalls") Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Cc: bp@alien8.de Cc: hpa@zytor.com Cc: peterz@infradead.org Cc: mpe@ellerman.id.au Cc: will.deacon@arm.com Cc: luto@kernel.org Cc: jroedel@suse.de Cc: stable@vger.kernel.org Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Michael Ellerman Cc: Will Deacon Cc: Andy Lutomirski Cc: Joerg Roedel Link: https://lkml.kernel.org/r/20190102215655.7A69518C@viggo.jf.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mmu_context.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index ed97ef3b48a7..7ebcbd1d881d 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -182,6 +182,10 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); +/* + * Init a new mm. Used on mm copies, like at fork() + * and on mm's that are brand-new, like at execve(). + */ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { @@ -232,8 +236,22 @@ do { \ } while (0) #endif +static inline void arch_dup_pkeys(struct mm_struct *oldmm, + struct mm_struct *mm) +{ +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + + /* Duplicate the oldmm pkey state in mm: */ + mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map; + mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; +#endif +} + static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { + arch_dup_pkeys(oldmm, mm); paravirt_arch_dup_mmap(oldmm, mm); return ldt_dup_context(oldmm, mm); } From ce20aba74c69084c1f87725b68a507735733c798 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 2 Jan 2019 13:56:57 -0800 Subject: [PATCH 1579/2608] x86/selftests/pkeys: Fork() to check for state being preserved commit e1812933b17be7814f51b6c310c5d1ced7a9a5f5 upstream. There was a bug where the per-mm pkey state was not being preserved across fork() in the child. fork() is performed in the pkey selftests, but all of the pkey activity is performed in the parent. The child does not perform any actions sensitive to pkey state. To make the test more sensitive to these kinds of bugs, add a fork() where the parent exits, and execution continues in the child. To achieve this let the key exhaustion test not terminate at the first allocation failure and fork after 2*NR_PKEYS loops and continue in the child. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Cc: bp@alien8.de Cc: hpa@zytor.com Cc: peterz@infradead.org Cc: mpe@ellerman.id.au Cc: will.deacon@arm.com Cc: luto@kernel.org Cc: jroedel@suse.de Cc: stable@vger.kernel.org Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Michael Ellerman Cc: Will Deacon Cc: Andy Lutomirski Cc: Joerg Roedel Link: https://lkml.kernel.org/r/20190102215657.585704B7@viggo.jf.intel.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/protection_keys.c | 41 ++++++++++++++----- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index 460b4bdf4c1e..5d546dcdbc80 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -1133,6 +1133,21 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) pkey_assert(err); } +void become_child(void) +{ + pid_t forkret; + + forkret = fork(); + pkey_assert(forkret >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); + + if (!forkret) { + /* in the child */ + return; + } + exit(0); +} + /* Assumes that all pkeys other than 'pkey' are unallocated */ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) { @@ -1141,7 +1156,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) int nr_allocated_pkeys = 0; int i; - for (i = 0; i < NR_PKEYS*2; i++) { + for (i = 0; i < NR_PKEYS*3; i++) { int new_pkey; dprintf1("%s() alloc loop: %d\n", __func__, i); new_pkey = alloc_pkey(); @@ -1152,20 +1167,26 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) if ((new_pkey == -1) && (errno == ENOSPC)) { dprintf2("%s() failed to allocate pkey after %d tries\n", __func__, nr_allocated_pkeys); - break; + } else { + /* + * Ensure the number of successes never + * exceeds the number of keys supported + * in the hardware. + */ + pkey_assert(nr_allocated_pkeys < NR_PKEYS); + allocated_pkeys[nr_allocated_pkeys++] = new_pkey; } - pkey_assert(nr_allocated_pkeys < NR_PKEYS); - allocated_pkeys[nr_allocated_pkeys++] = new_pkey; + + /* + * Make sure that allocation state is properly + * preserved across fork(). + */ + if (i == NR_PKEYS*2) + become_child(); } dprintf3("%s()::%d\n", __func__, __LINE__); - /* - * ensure it did not reach the end of the loop without - * failure: - */ - pkey_assert(i < NR_PKEYS*2); - /* * There are 16 pkeys supported in hardware. Three are * allocated by the time we get here: From a0aab0f76606a29a49f89142a07ab412f5ae5658 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Mon, 7 Jan 2019 11:40:24 +0800 Subject: [PATCH 1580/2608] x86/kaslr: Fix incorrect i8254 outb() parameters commit 7e6fc2f50a3197d0e82d1c0e86282976c9e6c8a4 upstream. The outb() function takes parameters value and port, in that order. Fix the parameters used in the kalsr i8254 fallback code. Fixes: 5bfce5ef55cb ("x86, kaslr: Provide randomness functions") Signed-off-by: Daniel Drake Signed-off-by: Thomas Gleixner Cc: bp@alien8.de Cc: hpa@zytor.com Cc: linux@endlessm.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190107034024.15005-1-drake@endlessm.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/kaslr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c index 79778ab200e4..a53665116458 100644 --- a/arch/x86/lib/kaslr.c +++ b/arch/x86/lib/kaslr.c @@ -36,8 +36,8 @@ static inline u16 i8254(void) u16 status, timer; do { - outb(I8254_PORT_CONTROL, - I8254_CMD_READBACK | I8254_SELECT_COUNTER0); + outb(I8254_CMD_READBACK | I8254_SELECT_COUNTER0, + I8254_PORT_CONTROL); status = inb(I8254_PORT_COUNTER0); timer = inb(I8254_PORT_COUNTER0); timer |= inb(I8254_PORT_COUNTER0) << 8; From c5d2a2cf774b1b34704b07a79df96f43b1f6693a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Jan 2019 14:33:16 +0100 Subject: [PATCH 1581/2608] posix-cpu-timers: Unbreak timer rearming commit 93ad0fc088c5b4631f796c995bdd27a082ef33a6 upstream. The recent commit which prevented a division by 0 issue in the alarm timer code broke posix CPU timers as an unwanted side effect. The reason is that the common rearm code checks for timer->it_interval being 0 now. What went unnoticed is that the posix cpu timer setup does not initialize timer->it_interval as it stores the interval in CPU timer specific storage. The reason for the separate storage is historical as the posix CPU timers always had a 64bit nanoseconds representation internally while timer->it_interval is type ktime_t which used to be a modified timespec representation on 32bit machines. Instead of reverting the offending commit and fixing the alarmtimer issue in the alarmtimer code, store the interval in timer->it_interval at CPU timer setup time so the common code check works. This also repairs the existing inconistency of the posix CPU timer code which kept a single shot timer armed despite of the interval being 0. The separate storage can be removed in mainline, but that needs to be a separate commit as the current one has to be backported to stable kernels. Fixes: 0e334db6bb4b ("posix-timers: Fix division by zero bug") Reported-by: H.J. Lu Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190111133500.840117406@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/time/posix-cpu-timers.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 2da660d53a4b..6e8c230ca877 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -685,6 +685,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, * set up the signal and overrun bookkeeping. */ timer->it.cpu.incr = timespec64_to_ns(&new->it_interval); + timer->it_interval = ns_to_ktime(timer->it.cpu.incr); /* * This acts as a modification timestamp for the timer, From 711369e62e624065ffd88ddcced84dcdce588099 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Jan 2019 14:08:59 +0000 Subject: [PATCH 1582/2608] irqchip/gic-v3-its: Align PCI Multi-MSI allocation on their size commit 8208d1708b88b412ca97f50a6d951242c88cbbac upstream. The way we allocate events works fine in most cases, except when multiple PCI devices share an ITS-visible DevID, and that one of them is trying to use MultiMSI allocation. In that case, our allocation is not guaranteed to be zero-based anymore, and we have to make sure we allocate it on a boundary that is compatible with the PCI Multi-MSI constraints. Fix this by allocating the full region upfront instead of iterating over the number of MSIs. MSI-X are always allocated one by one, so this shouldn't change anything on that front. Fixes: b48ac83d6bbc2 ("irqchip: GICv3: ITS: MSI support") Cc: stable@vger.kernel.org Reported-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3-its.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 2ea39a83737f..7638ca03fb1f 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2086,13 +2086,14 @@ static void its_free_device(struct its_device *its_dev) kfree(its_dev); } -static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq) +static int its_alloc_device_irq(struct its_device *dev, int nvecs, irq_hw_number_t *hwirq) { int idx; - idx = find_first_zero_bit(dev->event_map.lpi_map, - dev->event_map.nr_lpis); - if (idx == dev->event_map.nr_lpis) + idx = bitmap_find_free_region(dev->event_map.lpi_map, + dev->event_map.nr_lpis, + get_count_order(nvecs)); + if (idx < 0) return -ENOSPC; *hwirq = dev->event_map.lpi_base + idx; @@ -2188,21 +2189,21 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, int err; int i; - for (i = 0; i < nr_irqs; i++) { - err = its_alloc_device_irq(its_dev, &hwirq); - if (err) - return err; + err = its_alloc_device_irq(its_dev, nr_irqs, &hwirq); + if (err) + return err; - err = its_irq_gic_domain_alloc(domain, virq + i, hwirq); + for (i = 0; i < nr_irqs; i++) { + err = its_irq_gic_domain_alloc(domain, virq + i, hwirq + i); if (err) return err; irq_domain_set_hwirq_and_chip(domain, virq + i, - hwirq, &its_irq_chip, its_dev); + hwirq + i, &its_irq_chip, its_dev); irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(virq + i))); pr_debug("ID:%d pID:%d vID:%d\n", - (int)(hwirq - its_dev->event_map.lpi_base), - (int) hwirq, virq + i); + (int)(hwirq + i - its_dev->event_map.lpi_base), + (int)(hwirq + i), virq + i); } return 0; From 8849347d9cdacbd845ff0a5add3203f4eb599433 Mon Sep 17 00:00:00 2001 From: Manfred Schlaegl Date: Wed, 19 Dec 2018 19:39:58 +0100 Subject: [PATCH 1583/2608] can: dev: __can_get_echo_skb(): fix bogous check for non-existing skb by removing it commit 7b12c8189a3dc50638e7d53714c88007268d47ef upstream. This patch revert commit 7da11ba5c506 ("can: dev: __can_get_echo_skb(): print error message, if trying to echo non existing skb") After introduction of this change we encountered following new error message on various i.MX plattforms (flexcan): | flexcan 53fc8000.can can0: __can_get_echo_skb: BUG! Trying to echo non | existing skb: can_priv::echo_skb[0] The introduction of the message was a mistake because priv->echo_skb[idx] = NULL is a perfectly valid in following case: If CAN_RAW_LOOPBACK is disabled (setsockopt) in applications, the pkt_type of the tx skb's given to can_put_echo_skb is set to PACKET_LOOPBACK. In this case can_put_echo_skb will not set priv->echo_skb[idx]. It is therefore kept NULL. As additional argument for revert: The order of check and usage of idx was changed. idx is used to access an array element before checking it's boundaries. Signed-off-by: Manfred Schlaegl Fixes: 7da11ba5c506 ("can: dev: __can_get_echo_skb(): print error message, if trying to echo non existing skb") Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 035daca63168..7d61d8801220 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -479,8 +479,6 @@ EXPORT_SYMBOL_GPL(can_put_echo_skb); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) { struct can_priv *priv = netdev_priv(dev); - struct sk_buff *skb = priv->echo_skb[idx]; - struct canfd_frame *cf; if (idx >= priv->echo_skb_max) { netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n", @@ -488,20 +486,21 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 return NULL; } - if (!skb) { - netdev_err(dev, "%s: BUG! Trying to echo non existing skb: can_priv::echo_skb[%u]\n", - __func__, idx); - return NULL; + if (priv->echo_skb[idx]) { + /* Using "struct canfd_frame::len" for the frame + * length is supported on both CAN and CANFD frames. + */ + struct sk_buff *skb = priv->echo_skb[idx]; + struct canfd_frame *cf = (struct canfd_frame *)skb->data; + u8 len = cf->len; + + *len_ptr = len; + priv->echo_skb[idx] = NULL; + + return skb; } - /* Using "struct canfd_frame::len" for the frame - * length is supported on both CAN and CANFD frames. - */ - cf = (struct canfd_frame *)skb->data; - *len_ptr = cf->len; - priv->echo_skb[idx] = NULL; - - return skb; + return NULL; } /* From e7126858fc71ccee69220e06e2f97190de85db67 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sun, 13 Jan 2019 19:31:43 +0100 Subject: [PATCH 1584/2608] can: bcm: check timer values before ktime conversion commit 93171ba6f1deffd82f381d36cb13177872d023f6 upstream. Kyungtae Kim detected a potential integer overflow in bcm_[rx|tx]_setup() when the conversion into ktime multiplies the given value with NSEC_PER_USEC (1000). Reference: https://marc.info/?l=linux-can&m=154732118819828&w=2 Add a check for the given tv_usec, so that the value stays below one second. Additionally limit the tv_sec value to a reasonable value for CAN related use-cases of 400 days and ensure all values to be positive. Reported-by: Kyungtae Kim Tested-by: Oliver Hartkopp Signed-off-by: Oliver Hartkopp Cc: linux-stable # >= 2.6.26 Tested-by: Kyungtae Kim Acked-by: Andre Naujoks Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/bcm.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/net/can/bcm.c b/net/can/bcm.c index 13690334efa3..12d851c4604d 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -67,6 +67,9 @@ */ #define MAX_NFRAMES 256 +/* limit timers to 400 days for sending/timeouts */ +#define BCM_TIMER_SEC_MAX (400 * 24 * 60 * 60) + /* use of last_frames[index].flags */ #define RX_RECV 0x40 /* received data for this element */ #define RX_THR 0x80 /* element not been sent due to throttle feature */ @@ -140,6 +143,22 @@ static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv) return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC); } +/* check limitations for timeval provided by user */ +static bool bcm_is_invalid_tv(struct bcm_msg_head *msg_head) +{ + if ((msg_head->ival1.tv_sec < 0) || + (msg_head->ival1.tv_sec > BCM_TIMER_SEC_MAX) || + (msg_head->ival1.tv_usec < 0) || + (msg_head->ival1.tv_usec >= USEC_PER_SEC) || + (msg_head->ival2.tv_sec < 0) || + (msg_head->ival2.tv_sec > BCM_TIMER_SEC_MAX) || + (msg_head->ival2.tv_usec < 0) || + (msg_head->ival2.tv_usec >= USEC_PER_SEC)) + return true; + + return false; +} + #define CFSIZ(flags) ((flags & CAN_FD_FRAME) ? CANFD_MTU : CAN_MTU) #define OPSIZ sizeof(struct bcm_op) #define MHSIZ sizeof(struct bcm_msg_head) @@ -886,6 +905,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, if (msg_head->nframes < 1 || msg_head->nframes > MAX_NFRAMES) return -EINVAL; + /* check timeval limitations */ + if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head)) + return -EINVAL; + /* check the given can_id */ op = bcm_find_op(&bo->tx_ops, msg_head, ifindex); if (op) { @@ -1065,6 +1088,10 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, (!(msg_head->can_id & CAN_RTR_FLAG)))) return -EINVAL; + /* check timeval limitations */ + if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head)) + return -EINVAL; + /* check the given can_id */ op = bcm_find_op(&bo->rx_ops, msg_head, ifindex); if (op) { From 0b926c828ea7cbb82e4ee888ad3bc694ec930796 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 8 Jan 2019 22:55:01 -0500 Subject: [PATCH 1585/2608] vt: invoke notifier on screen size change commit 0c9b1965faddad7534b6974b5b36c4ad37998f8e upstream. User space using poll() on /dev/vcs devices are not awaken when a screen size change occurs. Let's fix that. Signed-off-by: Nicolas Pitre Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index e77421e7bf46..1fb5e7f409c4 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -953,6 +953,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, if (con_is_visible(vc)) update_screen(vc); vt_event_post(VT_EVENT_RESIZE, vc->vc_num, vc->vc_num); + notify_update(vc); return err; } From 9401f4e6e99c262b7b43f4561e4986abdc9ac3e4 Mon Sep 17 00:00:00 2001 From: Martin Vuille Date: Sun, 11 Feb 2018 16:24:20 -0500 Subject: [PATCH 1586/2608] perf unwind: Unwind with libdw doesn't take symfs into account [ Upstream commit 3d20c6246690219881786de10d2dda93f616d0ac ] Path passed to libdw for unwinding doesn't include symfs path if specified, so unwinding fails because ELF file is not found. Similar to unwinding with libunwind, pass symsrc_filename instead of long_name. If there is no symsrc_filename, fallback to long_name. Signed-off-by: Martin Vuille Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/r/20180211212420.18388-1-jpmv27@aim.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/unwind-libdw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 1e9c974faf67..8e969f28cc59 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -50,7 +50,7 @@ static int __report_module(struct addr_location *al, u64 ip, if (!mod) mod = dwfl_report_elf(ui->dwfl, dso->short_name, - dso->long_name, -1, al->map->start, + (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start, false); return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1; From dbf8065943de648d04d820d4a7e9407986eb31c6 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Mon, 29 Oct 2018 15:16:44 +0100 Subject: [PATCH 1587/2608] perf unwind: Take pgoff into account when reporting elf to libdwfl [ Upstream commit 1fe627da30331024f453faef04d500079b901107 ] libdwfl parses an ELF file itself and creates mappings for the individual sections. perf on the other hand sees raw mmap events which represent individual sections. When we encounter an address pointing into a mapping with pgoff != 0, we must take that into account and report the file at the non-offset base address. This fixes unwinding with libdwfl in some cases. E.g. for a file like: ``` using namespace std; mutex g_mutex; double worker() { lock_guard guard(g_mutex); uniform_real_distribution uniform(-1E5, 1E5); default_random_engine engine; double s = 0; for (int i = 0; i < 1000; ++i) { s += norm(complex(uniform(engine), uniform(engine))); } cout << s << endl; return s; } int main() { vector> results; for (int i = 0; i < 10000; ++i) { results.push_back(async(launch::async, worker)); } return 0; } ``` Compile it with `g++ -g -O2 -lpthread cpp-locking.cpp -o cpp-locking`, then record it with `perf record --call-graph dwarf -e sched:sched_switch`. When you analyze it with `perf script` and libunwind, you should see: ``` cpp-locking 20038 [005] 54830.236589: sched:sched_switch: prev_comm=cpp-locking prev_pid=20038 prev_prio=120 prev_state=T ==> next_comm=swapper/5 next_pid=0 next_prio=120 ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1670208 schedule+0x28 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb16737cc rwsem_down_read_failed+0xec (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1665e04 call_rwsem_down_read_failed+0x14 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1672a03 down_read+0x13 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb106bd85 __do_page_fault+0x445 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb18015f5 page_fault+0x45 (/lib/modules/4.14.78-1-lts/build/vmlinux) 7f38e4252591 new_heap+0x101 (/usr/lib/libc-2.28.so) 7f38e4252d0b arena_get2.part.4+0x2fb (/usr/lib/libc-2.28.so) 7f38e4255b1c tcache_init.part.6+0xec (/usr/lib/libc-2.28.so) 7f38e42569e5 __GI___libc_malloc+0x115 (inlined) 7f38e4241790 __GI__IO_file_doallocate+0x90 (inlined) 7f38e424fbbf __GI__IO_doallocbuf+0x4f (inlined) 7f38e424ee47 __GI__IO_file_overflow+0x197 (inlined) 7f38e424df36 _IO_new_file_xsputn+0x116 (inlined) 7f38e4242bfb __GI__IO_fwrite+0xdb (inlined) 7f38e463fa6d std::basic_streambuf >::sputn(char const*, long)+0x1cd (inlined) 7f38e463fa6d std::ostreambuf_iterator >::_M_put(char const*, long)+0x1cd (inlined) 7f38e463fa6d std::ostreambuf_iterator > std::__write(std::ostreambuf_iterator >, char const*, int)+0x1cd (inlined) 7f38e463fa6d std::ostreambuf_iterator > std::num_put > >::_M_insert_float(std::ostreambuf_iterator 7f38e464bd70 std::num_put > >::put(std::ostreambuf_iterator >, std::ios_base&, char, double) const+0x90 (inl> 7f38e464bd70 std::ostream& std::ostream::_M_insert(double)+0x90 (/usr/lib/libstdc++.so.6.0.25) 563b9cb502f7 std::ostream::operator<<(double)+0xb7 (inlined) 563b9cb502f7 worker()+0xb7 (/ssd/milian/projects/kdab/rnd/hotspot/build/tests/test-clients/cpp-locking/cpp-locking) 563b9cb506fb double std::__invoke_impl(std::__invoke_other, double (*&&)())+0x2b (inlined) 563b9cb506fb std::__invoke_result::type std::__invoke(double (*&&)())+0x2b (inlined) 563b9cb506fb decltype (__invoke((_S_declval<0ul>)())) std::thread::_Invoker >::_M_invoke<0ul>(std::_Index_tuple<0ul>)+0x2b (inlined) 563b9cb506fb std::thread::_Invoker >::operator()()+0x2b (inlined) 563b9cb506fb std::__future_base::_Task_setter, std::__future_base::_Result_base::_Deleter>, std::thread::_Invoker >, dou> 563b9cb506fb std::_Function_handler (), std::__future_base::_Task_setter 563b9cb507e8 std::function ()>::operator()() const+0x28 (inlined) 563b9cb507e8 std::__future_base::_State_baseV2::_M_do_set(std::function ()>*, bool*)+0x28 (/ssd/milian/> 7f38e46d24fe __pthread_once_slow+0xbe (/usr/lib/libpthread-2.28.so) 563b9cb51149 __gthread_once+0xe9 (inlined) 563b9cb51149 void std::call_once ()>*, bool*)> 563b9cb51149 std::__future_base::_State_baseV2::_M_set_result(std::function ()>, bool)+0xe9 (inlined) 563b9cb51149 std::__future_base::_Async_state_impl >, double>::_Async_state_impl(std::thread::_Invoker >&&)::{lambda()#1}::op> 563b9cb51149 void std::__invoke_impl >, double>::_Async_state_impl(std::thread::_Invoker 563b9cb51149 std::__invoke_result >, double>::_Async_state_impl(std::thread::_Invoker >> 563b9cb51149 decltype (__invoke((_S_declval<0ul>)())) std::thread::_Invoker >, double>::_Async_state_> 563b9cb51149 std::thread::_Invoker >, double>::_Async_state_impl(std::thread::_Invoker 563b9cb51149 std::thread::_State_impl >, double>::_Async_state_impl(std::thread> 7f38e45f0062 execute_native_thread_routine+0x12 (/usr/lib/libstdc++.so.6.0.25) 7f38e46caa9c start_thread+0xfc (/usr/lib/libpthread-2.28.so) 7f38e42ccb22 __GI___clone+0x42 (inlined) ``` Before this patch, using libdwfl, you would see: ``` cpp-locking 20038 [005] 54830.236589: sched:sched_switch: prev_comm=cpp-locking prev_pid=20038 prev_prio=120 prev_state=T ==> next_comm=swapper/5 next_pid=0 next_prio=120 ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1670208 schedule+0x28 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb16737cc rwsem_down_read_failed+0xec (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1665e04 call_rwsem_down_read_failed+0x14 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1672a03 down_read+0x13 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb106bd85 __do_page_fault+0x445 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb18015f5 page_fault+0x45 (/lib/modules/4.14.78-1-lts/build/vmlinux) 7f38e4252591 new_heap+0x101 (/usr/lib/libc-2.28.so) a041161e77950c5c [unknown] ([unknown]) ``` With this patch applied, we get a bit further in unwinding: ``` cpp-locking 20038 [005] 54830.236589: sched:sched_switch: prev_comm=cpp-locking prev_pid=20038 prev_prio=120 prev_state=T ==> next_comm=swapper/5 next_pid=0 next_prio=120 ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1670208 schedule+0x28 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb16737cc rwsem_down_read_failed+0xec (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1665e04 call_rwsem_down_read_failed+0x14 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1672a03 down_read+0x13 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb106bd85 __do_page_fault+0x445 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb18015f5 page_fault+0x45 (/lib/modules/4.14.78-1-lts/build/vmlinux) 7f38e4252591 new_heap+0x101 (/usr/lib/libc-2.28.so) 7f38e4252d0b arena_get2.part.4+0x2fb (/usr/lib/libc-2.28.so) 7f38e4255b1c tcache_init.part.6+0xec (/usr/lib/libc-2.28.so) 7f38e42569e5 __GI___libc_malloc+0x115 (inlined) 7f38e4241790 __GI__IO_file_doallocate+0x90 (inlined) 7f38e424fbbf __GI__IO_doallocbuf+0x4f (inlined) 7f38e424ee47 __GI__IO_file_overflow+0x197 (inlined) 7f38e424df36 _IO_new_file_xsputn+0x116 (inlined) 7f38e4242bfb __GI__IO_fwrite+0xdb (inlined) 7f38e463fa6d std::basic_streambuf >::sputn(char const*, long)+0x1cd (inlined) 7f38e463fa6d std::ostreambuf_iterator >::_M_put(char const*, long)+0x1cd (inlined) 7f38e463fa6d std::ostreambuf_iterator > std::__write(std::ostreambuf_iterator >, char const*, int)+0x1cd (inlined) 7f38e463fa6d std::ostreambuf_iterator > std::num_put > >::_M_insert_float(std::ostreambuf_iterator 7f38e464bd70 std::num_put > >::put(std::ostreambuf_iterator >, std::ios_base&, char, double) const+0x90 (inl> 7f38e464bd70 std::ostream& std::ostream::_M_insert(double)+0x90 (/usr/lib/libstdc++.so.6.0.25) 563b9cb502f7 std::ostream::operator<<(double)+0xb7 (inlined) 563b9cb502f7 worker()+0xb7 (/ssd/milian/projects/kdab/rnd/hotspot/build/tests/test-clients/cpp-locking/cpp-locking) 6eab825c1ee3e4ff [unknown] ([unknown]) ``` Note that the backtrace is still stopping too early, when compared to the nice results obtained via libunwind. It's unclear so far what the reason for that is. Committer note: Further comment by Milian on the thread started on the Link: tag below: --- The remaining issue is due to a bug in elfutils: https://sourceware.org/ml/elfutils-devel/2018-q4/msg00089.html With both patches applied, libunwind and elfutils produce the same output for the above scenario. --- Signed-off-by: Milian Wolff Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20181029141644.3907-1-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/unwind-libdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 8e969f28cc59..f1fe5acdbba4 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -44,13 +44,13 @@ static int __report_module(struct addr_location *al, u64 ip, Dwarf_Addr s; dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); - if (s != al->map->start) + if (s != al->map->start - al->map->pgoff) mod = 0; } if (!mod) mod = dwfl_report_elf(ui->dwfl, dso->short_name, - (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start, + (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff, false); return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1; From c3ca9064273c3d7bc1654d685d42f41ccf3744d7 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 28 Jan 2019 14:40:49 -0500 Subject: [PATCH 1588/2608] Revert "seccomp: add a selftest for get_metadata" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit e65cd9a20343ea90f576c24c38ee85ab6e7d5fec. Tommi T. Rrantala notes: PTRACE_SECCOMP_GET_METADATA was only added in 4.16 (26500475ac1b499d8636ff281311d633909f5d20) And it's also breaking seccomp_bpf.c compilation for me: seccomp_bpf.c: In function ‘get_metadata’: seccomp_bpf.c:2878:26: error: storage size of ‘md’ isn’t known struct seccomp_metadata md; Signed-off-by: Sasha Levin --- tools/testing/selftests/seccomp/seccomp_bpf.c | 61 ------------------- 1 file changed, 61 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index e350cf3d4f90..194759ec9e70 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -145,15 +145,6 @@ struct seccomp_data { #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) #endif -#ifndef PTRACE_SECCOMP_GET_METADATA -#define PTRACE_SECCOMP_GET_METADATA 0x420d - -struct seccomp_metadata { - __u64 filter_off; /* Input: which filter */ - __u64 flags; /* Output: filter's flags */ -}; -#endif - #ifndef seccomp int seccomp(unsigned int op, unsigned int flags, void *args) { @@ -2870,58 +2861,6 @@ TEST(get_action_avail) EXPECT_EQ(errno, EOPNOTSUPP); } -TEST(get_metadata) -{ - pid_t pid; - int pipefd[2]; - char buf; - struct seccomp_metadata md; - - ASSERT_EQ(0, pipe(pipefd)); - - pid = fork(); - ASSERT_GE(pid, 0); - if (pid == 0) { - struct sock_filter filter[] = { - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), - }; - struct sock_fprog prog = { - .len = (unsigned short)ARRAY_SIZE(filter), - .filter = filter, - }; - - /* one with log, one without */ - ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, - SECCOMP_FILTER_FLAG_LOG, &prog)); - ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); - - ASSERT_EQ(0, close(pipefd[0])); - ASSERT_EQ(1, write(pipefd[1], "1", 1)); - ASSERT_EQ(0, close(pipefd[1])); - - while (1) - sleep(100); - } - - ASSERT_EQ(0, close(pipefd[1])); - ASSERT_EQ(1, read(pipefd[0], &buf, 1)); - - ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid)); - ASSERT_EQ(pid, waitpid(pid, NULL, 0)); - - md.filter_off = 0; - ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md)); - EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); - EXPECT_EQ(md.filter_off, 0); - - md.filter_off = 1; - ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md)); - EXPECT_EQ(md.flags, 0); - EXPECT_EQ(md.filter_off, 1); - - ASSERT_EQ(0, kill(pid, SIGKILL)); -} - /* * TODO: * - add microbenchmarks From 9f2512df6356939c3c27f3364362baacb2ac388e Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Fri, 13 Oct 2017 10:58:36 +0100 Subject: [PATCH 1589/2608] net: stmmac: Use correct values in TQS/RQS fields commit 52a76235d0c4dd259cd0df503afed4757c04ba1d upstream. Currently we are using all the available fifo size in RQS and TQS fields. This will not work correctly in multi-queues IP's because total fifo size must be splitted to the enabled queues. Correct this by computing the available fifo size per queue and setting the right value in TQS and RQS fields. Signed-off-by: Jose Abreu Cc: David S. Miller Cc: Joao Pinto Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller Cc: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/common.h | 3 ++- .../net/ethernet/stmicro/stmmac/dwmac4_dma.c | 15 ++++++++----- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 22 +++++++++++++++++-- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 8e2a19616bc9..c87bc0a5efa3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -444,7 +444,8 @@ struct stmmac_dma_ops { int rxfifosz); void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel, int fifosz); - void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel); + void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel, + int fifosz); /* To track extra statistic (if supported) */ void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x, void __iomem *ioaddr); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index e84831e1b63b..898849bbc7d4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -271,9 +271,10 @@ static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode, } static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode, - u32 channel) + u32 channel, int fifosz) { u32 mtl_tx_op = readl(ioaddr + MTL_CHAN_TX_OP_MODE(channel)); + unsigned int tqs = fifosz / 256 - 1; if (mode == SF_DMA_MODE) { pr_debug("GMAC: enable TX store and forward mode\n"); @@ -306,12 +307,14 @@ static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode, * For an IP with DWC_EQOS_NUM_TXQ > 1, the fields TXQEN and TQS are R/W * with reset values: TXQEN off, TQS 256 bytes. * - * Write the bits in both cases, since it will have no effect when RO. - * For DWC_EQOS_NUM_TXQ > 1, the top bits in MTL_OP_MODE_TQS_MASK might - * be RO, however, writing the whole TQS field will result in a value - * equal to DWC_EQOS_TXFIFO_SIZE, just like for DWC_EQOS_NUM_TXQ == 1. + * TXQEN must be written for multi-channel operation and TQS must + * reflect the available fifo size per queue (total fifo size / number + * of enabled queues). */ - mtl_tx_op |= MTL_OP_MODE_TXQEN | MTL_OP_MODE_TQS_MASK; + mtl_tx_op |= MTL_OP_MODE_TXQEN; + mtl_tx_op &= ~MTL_OP_MODE_TQS_MASK; + mtl_tx_op |= tqs << MTL_OP_MODE_TQS_SHIFT; + writel(mtl_tx_op, ioaddr + MTL_CHAN_TX_OP_MODE(channel)); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index bafbebeb0e00..a901feaad4e1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1765,12 +1765,19 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) u32 rx_channels_count = priv->plat->rx_queues_to_use; u32 tx_channels_count = priv->plat->tx_queues_to_use; int rxfifosz = priv->plat->rx_fifo_size; + int txfifosz = priv->plat->tx_fifo_size; u32 txmode = 0; u32 rxmode = 0; u32 chan = 0; if (rxfifosz == 0) rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + + /* Adjust for real per queue fifo size */ + rxfifosz /= rx_channels_count; + txfifosz /= tx_channels_count; if (priv->plat->force_thresh_dma_mode) { txmode = tc; @@ -1798,7 +1805,8 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) rxfifosz); for (chan = 0; chan < tx_channels_count; chan++) - priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan); + priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan, + txfifosz); } else { priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode, rxfifosz); @@ -1967,15 +1975,25 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan) static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, u32 rxmode, u32 chan) { + u32 rx_channels_count = priv->plat->rx_queues_to_use; + u32 tx_channels_count = priv->plat->tx_queues_to_use; int rxfifosz = priv->plat->rx_fifo_size; + int txfifosz = priv->plat->tx_fifo_size; if (rxfifosz == 0) rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + + /* Adjust for real per queue fifo size */ + rxfifosz /= rx_channels_count; + txfifosz /= tx_channels_count; if (priv->synopsys_id >= DWMAC_CORE_4_00) { priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan, rxfifosz); - priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan); + priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan, + txfifosz); } else { priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode, rxfifosz); From 9485d5d2318bf6e93bc2771a816aebb1ec80f2c5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 28 Jan 2019 12:51:02 -0800 Subject: [PATCH 1590/2608] KVM: x86: Fix a 4.14 backport regression related to userspace/guest FPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream commit: f775b13eedee ("x86,kvm: move qemu/guest FPU switching out to vcpu_run") introduced a bug, which was later fixed by upstream commit: 5663d8f9bbe4 ("kvm: x86: fix WARN due to uninitialized guest FPU state") For reasons unknown, both commits were initially passed-over for inclusion in the 4.14 stable branch despite being tagged for stable. Eventually, someone noticed that the fixup, commit 5663d8f9bbe4, was missing from stable[1], and so it was queued up for 4.14 and included in release v4.14.79. Even later, the original buggy patch, commit f775b13eedee, was also applied to the 4.14 stable branch. Through an unlucky coincidence, the incorrect ordering did not generate a conflict between the two patches, and led to v4.14.94 and later releases containing a spurious call to kvm_load_guest_fpu() in kvm_arch_vcpu_ioctl_run(). As a result, KVM may reload stale guest FPU state, e.g. after accepting in INIT event. This can manifest as crashes during boot, segfaults, failed checksums and so on and so forth. Remove the unwanted kvm_{load,put}_guest_fpu() calls, i.e. make kvm_arch_vcpu_ioctl_run() look like commit 5663d8f9bbe4 was backported after commit f775b13eedee. [1] https://www.spinics.net/lists/stable/msg263931.html Fixes: 4124a4cff344 ("x86,kvm: move qemu/guest FPU switching out to vcpu_run") Cc: stable@vger.kernel.org Cc: Sasha Levin Cc: Greg Kroah-Hartman Cc: Peter Xu Cc: Rik van Riel Cc: Paolo Bonzini Cc: Radim Krčmář Reported-by: Roman Mamedov Reported-by: Thomas Lindroth Signed-off-by: Sean Christopherson Acked-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fb53634f00dd..867c22f8d59b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7422,14 +7422,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } - kvm_load_guest_fpu(vcpu); - if (unlikely(vcpu->arch.complete_userspace_io)) { int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io; vcpu->arch.complete_userspace_io = NULL; r = cui(vcpu); if (r <= 0) - goto out_fpu; + goto out; } else WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); @@ -7438,8 +7436,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) else r = vcpu_run(vcpu); -out_fpu: - kvm_put_guest_fpu(vcpu); out: kvm_put_guest_fpu(vcpu); post_kvm_run_save(vcpu); From 531ebf894c4bfbfbae139869e5ee95d8e9b8dbab Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 11 Jan 2019 15:18:22 +0100 Subject: [PATCH 1591/2608] s390/smp: Fix calling smp_call_ipl_cpu() from ipl CPU commit 60f1bf29c0b2519989927cae640cd1f50f59dc7f upstream. When calling smp_call_ipl_cpu() from the IPL CPU, we will try to read from pcpu_devices->lowcore. However, due to prefixing, that will result in reading from absolute address 0 on that CPU. We have to go via the actual lowcore instead. This means that right now, we will read lc->nodat_stack == 0 and therfore work on a very wrong stack. This BUG essentially broke rebooting under QEMU TCG (which will report a low address protection exception). And checking under KVM, it is also broken under KVM. With 1 VCPU it can be easily triggered. :/# echo 1 > /proc/sys/kernel/sysrq :/# echo b > /proc/sysrq-trigger [ 28.476745] sysrq: SysRq : Resetting [ 28.476793] Kernel stack overflow. [ 28.476817] CPU: 0 PID: 424 Comm: sh Not tainted 5.0.0-rc1+ #13 [ 28.476820] Hardware name: IBM 2964 NE1 716 (KVM/Linux) [ 28.476826] Krnl PSW : 0400c00180000000 0000000000115c0c (pcpu_delegate+0x12c/0x140) [ 28.476861] R:0 T:1 IO:0 EX:0 Key:0 M:0 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 [ 28.476863] Krnl GPRS: ffffffffffffffff 0000000000000000 000000000010dff8 0000000000000000 [ 28.476864] 0000000000000000 0000000000000000 0000000000ab7090 000003e0006efbf0 [ 28.476864] 000000000010dff8 0000000000000000 0000000000000000 0000000000000000 [ 28.476865] 000000007fffc000 0000000000730408 000003e0006efc58 0000000000000000 [ 28.476887] Krnl Code: 0000000000115bfe: 4170f000 la %r7,0(%r15) [ 28.476887] 0000000000115c02: 41f0a000 la %r15,0(%r10) [ 28.476887] #0000000000115c06: e370f0980024 stg %r7,152(%r15) [ 28.476887] >0000000000115c0c: c0e5fffff86e brasl %r14,114ce8 [ 28.476887] 0000000000115c12: 41f07000 la %r15,0(%r7) [ 28.476887] 0000000000115c16: a7f4ffa8 brc 15,115b66 [ 28.476887] 0000000000115c1a: 0707 bcr 0,%r7 [ 28.476887] 0000000000115c1c: 0707 bcr 0,%r7 [ 28.476901] Call Trace: [ 28.476902] Last Breaking-Event-Address: [ 28.476920] [<0000000000a01c4a>] arch_call_rest_init+0x22/0x80 [ 28.476927] Kernel panic - not syncing: Corrupt kernel stack, can't continue. [ 28.476930] CPU: 0 PID: 424 Comm: sh Not tainted 5.0.0-rc1+ #13 [ 28.476932] Hardware name: IBM 2964 NE1 716 (KVM/Linux) [ 28.476932] Call Trace: Fixes: 2f859d0dad81 ("s390/smp: reduce size of struct pcpu") Cc: stable@vger.kernel.org # 4.0+ Reported-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/smp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 1734cfc84cf1..27258db640d7 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -387,9 +387,13 @@ void smp_call_online_cpu(void (*func)(void *), void *data) */ void smp_call_ipl_cpu(void (*func)(void *), void *data) { + struct lowcore *lc = pcpu_devices->lowcore; + + if (pcpu_devices[0].address == stap()) + lc = &S390_lowcore; + pcpu_delegate(&pcpu_devices[0], func, data, - pcpu_devices->lowcore->panic_stack - - PANIC_FRAME_OFFSET + PAGE_SIZE); + lc->panic_stack - PANIC_FRAME_OFFSET + PAGE_SIZE); } int smp_find_processor_id(u16 address) From 5e318594de2d23569c27799c013bd9c9139963a9 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Mon, 19 Nov 2018 10:58:51 +0000 Subject: [PATCH 1592/2608] nvmet-rdma: Add unlikely for response allocated check commit ad1f824948e4ed886529219cf7cd717d078c630d upstream. Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe Cc: Raju Rangoju Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 5d8140e58f6f..21e331e5eefa 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -189,7 +189,7 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) { unsigned long flags; - if (rsp->allocated) { + if (unlikely(rsp->allocated)) { kfree(rsp); return; } From 74329ba335443521ae084147a170965425625aa2 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 3 Jan 2019 23:05:31 +0530 Subject: [PATCH 1593/2608] nvmet-rdma: fix null dereference under heavy load commit 5cbab6303b4791a3e6713dfe2c5fda6a867f9adc upstream. Under heavy load if we don't have any pre-allocated rsps left, we dynamically allocate a rsp, but we are not actually allocating memory for nvme_completion (rsp->req.rsp). In such a case, accessing pointer fields (req->rsp->status) in nvmet_req_init() will result in crash. To fix this, allocate the memory for nvme_completion by calling nvmet_rdma_alloc_rsp() Fixes: 8407879c("nvmet-rdma:fix possible bogus dereference under heavy load") Cc: Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Raju Rangoju Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/rdma.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 21e331e5eefa..da56cc277b71 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -137,6 +137,10 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_qp_event(struct ib_event *event, void *priv); static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); +static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, + struct nvmet_rdma_rsp *r); +static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, + struct nvmet_rdma_rsp *r); static struct nvmet_fabrics_ops nvmet_rdma_ops; @@ -175,9 +179,17 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue) spin_unlock_irqrestore(&queue->rsps_lock, flags); if (unlikely(!rsp)) { - rsp = kmalloc(sizeof(*rsp), GFP_KERNEL); + int ret; + + rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); if (unlikely(!rsp)) return NULL; + ret = nvmet_rdma_alloc_rsp(queue->dev, rsp); + if (unlikely(ret)) { + kfree(rsp); + return NULL; + } + rsp->allocated = true; } @@ -190,6 +202,7 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) unsigned long flags; if (unlikely(rsp->allocated)) { + nvmet_rdma_free_rsp(rsp->queue->dev, rsp); kfree(rsp); return; } From a0d74de10c56a7a92871d6dd1cdcbb1add22ae9c Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Thu, 10 Jan 2019 12:39:55 -0800 Subject: [PATCH 1594/2608] usb: dwc3: gadget: Clear req->needs_extra_trb flag on cleanup commit bd6742249b9ca918565e4e3abaa06665e587f4b5 upstream. OUT endpoint requests may somtimes have this flag set when preparing to be submitted to HW indicating that there is an additional TRB chained to the request for alignment purposes. If that request is removed before the controller can execute the transfer (e.g. ep_dequeue/ep_disable), the request will not go through the dwc3_gadget_ep_cleanup_completed_request() handler and will not have its needs_extra_trb flag cleared when dwc3_gadget_giveback() is called. This same request could be later requeued for a new transfer that does not require an extra TRB and if it is successfully completed, the cleanup and TRB reclamation will incorrectly process the additional TRB which belongs to the next request, and incorrectly advances the TRB dequeue pointer, thereby messing up calculation of the next requeust's actual/remaining count when it completes. The right thing to do here is to ensure that the flag is cleared before it is given back to the function driver. A good place to do that is in dwc3_gadget_del_and_unmap_request(). Fixes: c6267a51639b ("usb: dwc3: gadget: align transfers to wMaxPacketSize") Cc: stable@vger.kernel.org Signed-off-by: Jack Pham Signed-off-by: Felipe Balbi [jackp: backport to <= 4.20: replaced 'needs_extra_trb' with 'unaligned' and 'zero' members in patch and reworded commit text] Signed-off-by: Jack Pham Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index b8704c0678f9..727bf3c9f53b 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -182,6 +182,8 @@ void dwc3_gadget_del_and_unmap_request(struct dwc3_ep *dep, req->started = false; list_del(&req->list); req->remaining = 0; + req->unaligned = false; + req->zero = false; if (req->request.status == -EINPROGRESS) req->request.status = status; From 219dcadc75d794587ceb98d86785dac0dc4761b0 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 9 Nov 2018 17:21:17 +0200 Subject: [PATCH 1595/2608] xhci: Fix leaking USB3 shared_hcd at xhci removal commit f068090426ea8d72c408ebd42953a82a88e2282c upstream. Ensure that the shared_hcd pointer is valid when calling usb_put_hcd() The shared_hcd is removed and freed in xhci by first calling usb_remove_hcd(xhci->shared_hcd), and later usb_put_hcd(xhci->shared_hcd) Afer commit fe190ed0d602 ("xhci: Do not halt the host until both HCD have disconnected their devices.") the shared_hcd was never properly put as xhci->shared_hcd was set to NULL before usb_put_hcd(xhci->shared_hcd) was called. shared_hcd (USB3) is removed before primary hcd (USB2). While removing the primary hcd we might need to handle xhci interrupts to cleanly remove last USB2 devices, therefore we need to set xhci->shared_hcd to NULL before removing the primary hcd to let xhci interrupt handler know shared_hcd is no longer available. xhci-plat.c, xhci-histb.c and xhci-mtk first create both their hcd's before adding them. so to keep the correct reverse removal order use a temporary shared_hcd variable for them. For more details see commit 4ac53087d6d4 ("usb: xhci: plat: Create both HCDs before adding them") Fixes: fe190ed0d602 ("xhci: Do not halt the host until both HCD have disconnected their devices.") Cc: Joel Stanley Cc: Chunfeng Yun Cc: Thierry Reding Cc: Jianguo Sun Cc: Reported-by: Jack Pham Tested-by: Jack Pham Tested-by: Peter Chen Signed-off-by: Mathias Nyman Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk.c | 6 ++++-- drivers/usb/host/xhci-pci.c | 1 + drivers/usb/host/xhci-plat.c | 6 ++++-- drivers/usb/host/xhci-tegra.c | 1 + drivers/usb/host/xhci.c | 2 -- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index 510d28a9d190..35aecbcac6f7 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -724,14 +724,16 @@ static int xhci_mtk_remove(struct platform_device *dev) struct xhci_hcd_mtk *mtk = platform_get_drvdata(dev); struct usb_hcd *hcd = mtk->hcd; struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct usb_hcd *shared_hcd = xhci->shared_hcd; - usb_remove_hcd(xhci->shared_hcd); + usb_remove_hcd(shared_hcd); + xhci->shared_hcd = NULL; xhci_mtk_phy_power_off(mtk); xhci_mtk_phy_exit(mtk); device_init_wakeup(&dev->dev, false); usb_remove_hcd(hcd); - usb_put_hcd(xhci->shared_hcd); + usb_put_hcd(shared_hcd); usb_put_hcd(hcd); xhci_mtk_sch_exit(mtk); xhci_mtk_clks_disable(mtk); diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 0fbc549cc55c..1de006aebec5 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -370,6 +370,7 @@ static void xhci_pci_remove(struct pci_dev *dev) if (xhci->shared_hcd) { usb_remove_hcd(xhci->shared_hcd); usb_put_hcd(xhci->shared_hcd); + xhci->shared_hcd = NULL; } /* Workaround for spurious wakeups at shutdown with HSW */ diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 830dd0dbbce0..108a212294bf 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -332,14 +332,16 @@ static int xhci_plat_remove(struct platform_device *dev) struct usb_hcd *hcd = platform_get_drvdata(dev); struct xhci_hcd *xhci = hcd_to_xhci(hcd); struct clk *clk = xhci->clk; + struct usb_hcd *shared_hcd = xhci->shared_hcd; xhci->xhc_state |= XHCI_STATE_REMOVING; - usb_remove_hcd(xhci->shared_hcd); + usb_remove_hcd(shared_hcd); + xhci->shared_hcd = NULL; usb_phy_shutdown(hcd->usb_phy); usb_remove_hcd(hcd); - usb_put_hcd(xhci->shared_hcd); + usb_put_hcd(shared_hcd); if (!IS_ERR(clk)) clk_disable_unprepare(clk); diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index 32ddafe7af87..28df32d85671 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -1178,6 +1178,7 @@ static int tegra_xusb_remove(struct platform_device *pdev) usb_remove_hcd(xhci->shared_hcd); usb_put_hcd(xhci->shared_hcd); + xhci->shared_hcd = NULL; usb_remove_hcd(tegra->hcd); usb_put_hcd(tegra->hcd); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 930eecd86429..c78de07c4d00 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -669,8 +669,6 @@ static void xhci_stop(struct usb_hcd *hcd) /* Only halt host and free memory after both hcds are removed */ if (!usb_hcd_is_primary_hcd(hcd)) { - /* usb core will free this hcd shortly, unset pointer */ - xhci->shared_hcd = NULL; mutex_unlock(&xhci->mutex); return; } From 45c40a96b98568f4b0fc79b24d9595ae01ccad68 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Wed, 8 Nov 2017 17:19:54 +0000 Subject: [PATCH 1596/2608] ptp_kvm: probe for kvm guest availability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 001f60e1f662a6dee1630a2915401aaf5959d479 upstream. In the event of moving pvclock_pvti_cpu0_va() definition to common pvclock code, this function would return a value on non KVM guests. Later on this would fail with a GPF on ptp_kvm_init when running on a Xen guest. Therefore, ptp_kvm_init() should check whether it is running in a KVM guest. Signed-off-by: Joao Martins Acked-by: Radim Krčmář Signed-off-by: Boris Ostrovsky Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/ptp/ptp_kvm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ptp/ptp_kvm.c b/drivers/ptp/ptp_kvm.c index 2b1b212c219e..e04d7b2ecb3a 100644 --- a/drivers/ptp/ptp_kvm.c +++ b/drivers/ptp/ptp_kvm.c @@ -178,6 +178,9 @@ static int __init ptp_kvm_init(void) { long ret; + if (!kvm_para_available()) + return -ENODEV; + clock_pair_gpa = slow_virt_to_phys(&clock_pair); hv_clock = pvclock_pvti_cpu0_va(); From 9a12f78a86fc17af6f8326ffb991a931c34b82b9 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Wed, 8 Nov 2017 17:19:55 +0000 Subject: [PATCH 1597/2608] x86/pvclock: add setter for pvclock_pvti_cpu0_va commit 9f08890ab906abaf9d4c1bad8111755cbd302260 upstream. Right now there is only a pvclock_pvti_cpu0_va() which is defined on kvmclock since: commit dac16fba6fc5 ("x86/vdso: Get pvclock data from the vvar VMA instead of the fixmap") The only user of this interface so far is kvm. This commit adds a setter function for the pvti page and moves pvclock_pvti_cpu0_va to pvclock, which is a more generic place to have it; and would allow other PV clocksources to use it, such as Xen. While moving pvclock_pvti_cpu0_va into pvclock, rename also this function to pvclock_get_pvti_cpu0_va (including its call sites) to be symmetric with the setter (pvclock_set_pvti_cpu0_va). Signed-off-by: Joao Martins Acked-by: Andy Lutomirski Acked-by: Paolo Bonzini Acked-by: Thomas Gleixner Signed-off-by: Boris Ostrovsky Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/vdso/vma.c | 2 +- arch/x86/include/asm/pvclock.h | 19 ++++++++++--------- arch/x86/kernel/kvmclock.c | 7 +------ arch/x86/kernel/pvclock.c | 14 ++++++++++++++ drivers/ptp/ptp_kvm.c | 2 +- 5 files changed, 27 insertions(+), 17 deletions(-) diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 1911310959f8..a77fd3c8d824 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -112,7 +112,7 @@ static int vvar_fault(const struct vm_special_mapping *sm, __pa_symbol(&__vvar_page) >> PAGE_SHIFT); } else if (sym_offset == image->sym_pvclock_page) { struct pvclock_vsyscall_time_info *pvti = - pvclock_pvti_cpu0_va(); + pvclock_get_pvti_cpu0_va(); if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) { ret = vm_insert_pfn( vma, diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 3e4ed8fb5f91..a7471dcd2205 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -5,15 +5,6 @@ #include #include -#ifdef CONFIG_KVM_GUEST -extern struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void); -#else -static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void) -{ - return NULL; -} -#endif - /* some helper functions for xen and kvm pv clock sources */ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src); @@ -102,4 +93,14 @@ struct pvclock_vsyscall_time_info { #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) +#ifdef CONFIG_PARAVIRT_CLOCK +void pvclock_set_pvti_cpu0_va(struct pvclock_vsyscall_time_info *pvti); +struct pvclock_vsyscall_time_info *pvclock_get_pvti_cpu0_va(void); +#else +static inline struct pvclock_vsyscall_time_info *pvclock_get_pvti_cpu0_va(void) +{ + return NULL; +} +#endif + #endif /* _ASM_X86_PVCLOCK_H */ diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 48703d430a2f..08806d64eacd 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -47,12 +47,6 @@ early_param("no-kvmclock", parse_no_kvmclock); static struct pvclock_vsyscall_time_info *hv_clock; static struct pvclock_wall_clock wall_clock; -struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void) -{ - return hv_clock; -} -EXPORT_SYMBOL_GPL(pvclock_pvti_cpu0_va); - /* * The wallclock is the time of day when we booted. Since then, some time may * have elapsed since the hypervisor wrote the data. So we try to account for @@ -335,6 +329,7 @@ int __init kvm_setup_vsyscall_timeinfo(void) return 1; } + pvclock_set_pvti_cpu0_va(hv_clock); put_cpu(); kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 5c3f6d6a5078..761f6af6efa5 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -25,8 +25,10 @@ #include #include +#include static u8 valid_flags __read_mostly = 0; +static struct pvclock_vsyscall_time_info *pvti_cpu0_va __read_mostly; void pvclock_set_flags(u8 flags) { @@ -144,3 +146,15 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); } + +void pvclock_set_pvti_cpu0_va(struct pvclock_vsyscall_time_info *pvti) +{ + WARN_ON(vclock_was_used(VCLOCK_PVCLOCK)); + pvti_cpu0_va = pvti; +} + +struct pvclock_vsyscall_time_info *pvclock_get_pvti_cpu0_va(void) +{ + return pvti_cpu0_va; +} +EXPORT_SYMBOL_GPL(pvclock_get_pvti_cpu0_va); diff --git a/drivers/ptp/ptp_kvm.c b/drivers/ptp/ptp_kvm.c index e04d7b2ecb3a..c67dd11e08b1 100644 --- a/drivers/ptp/ptp_kvm.c +++ b/drivers/ptp/ptp_kvm.c @@ -182,7 +182,7 @@ static int __init ptp_kvm_init(void) return -ENODEV; clock_pair_gpa = slow_virt_to_phys(&clock_pair); - hv_clock = pvclock_pvti_cpu0_va(); + hv_clock = pvclock_get_pvti_cpu0_va(); if (!hv_clock) return -ENODEV; From 4f3498a06330cfddc29a7876738eca7a497c81e7 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Wed, 8 Nov 2017 17:19:56 +0000 Subject: [PATCH 1598/2608] x86/xen/time: set pvclock flags on xen_time_init() commit b888808093113ae7d63d213272d01fea4b8329ed upstream. Specifically check for PVCLOCK_TSC_STABLE_BIT and if this bit is set, then set it too on pvclock flags. This allows Xen clocksource to use it and thus speeding up xen_clocksource_read() callers (i.e. sched_clock()) Signed-off-by: Joao Martins Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/time.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 80c2a4bdf230..5687be6f5ca5 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -373,6 +373,7 @@ static const struct pv_time_ops xen_time_ops __initconst = { static void __init xen_time_init(void) { + struct pvclock_vcpu_time_info *pvti; int cpu = smp_processor_id(); struct timespec tp; @@ -396,6 +397,14 @@ static void __init xen_time_init(void) setup_force_cpu_cap(X86_FEATURE_TSC); + /* + * We check ahead on the primary time info if this + * bit is supported hence speeding up Xen clocksource. + */ + pvti = &__this_cpu_read(xen_vcpu)->time; + if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) + pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); + xen_setup_runstate_info(cpu); xen_setup_timer(cpu); xen_setup_cpu_clockevents(); From 9fb9a5434f1c888e4cd8fd1cb573973939eddfa9 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Wed, 8 Nov 2017 17:19:57 +0000 Subject: [PATCH 1599/2608] x86/xen/time: setup vcpu 0 time info page commit 2229f70b5bbb025e1394b61007938a68060afbfb upstream. In order to support pvclock vdso on xen we need to setup the time info page for vcpu 0 and register the page with Xen using the VCPUOP_register_vcpu_time_memory_area hypercall. This hypercall will also forcefully update the pvti which will set some of the necessary flags for vdso. Afterwards we check if it supports the PVCLOCK_TSC_STABLE_BIT flag which is mandatory for having vdso/vsyscall support. And if so, it will set the cpu 0 pvti that will be later on used when mapping the vdso image. The xen headers are also updated to include the new hypercall for registering the secondary vcpu_time_info struct. Signed-off-by: Joao Martins Reviewed-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/suspend.c | 4 ++ arch/x86/xen/time.c | 90 +++++++++++++++++++++++++++++++++++- arch/x86/xen/xen-ops.h | 2 + include/xen/interface/vcpu.h | 42 +++++++++++++++++ 4 files changed, 137 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 3e3a58ea669e..1d83152c761b 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -22,6 +22,8 @@ static DEFINE_PER_CPU(u64, spec_ctrl); void xen_arch_pre_suspend(void) { + xen_save_time_memory_area(); + if (xen_pv_domain()) xen_pv_pre_suspend(); } @@ -32,6 +34,8 @@ void xen_arch_post_suspend(int cancelled) xen_pv_post_suspend(cancelled); else xen_hvm_post_suspend(cancelled); + + xen_restore_time_memory_area(); } static void xen_vcpu_notify_restore(void *data) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 5687be6f5ca5..efa81e736f43 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -371,6 +371,92 @@ static const struct pv_time_ops xen_time_ops __initconst = { .steal_clock = xen_steal_clock, }; +static struct pvclock_vsyscall_time_info *xen_clock __read_mostly; + +void xen_save_time_memory_area(void) +{ + struct vcpu_register_time_memory_area t; + int ret; + + if (!xen_clock) + return; + + t.addr.v = NULL; + + ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t); + if (ret != 0) + pr_notice("Cannot save secondary vcpu_time_info (err %d)", + ret); + else + clear_page(xen_clock); +} + +void xen_restore_time_memory_area(void) +{ + struct vcpu_register_time_memory_area t; + int ret; + + if (!xen_clock) + return; + + t.addr.v = &xen_clock->pvti; + + ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t); + + /* + * We don't disable VCLOCK_PVCLOCK entirely if it fails to register the + * secondary time info with Xen or if we migrated to a host without the + * necessary flags. On both of these cases what happens is either + * process seeing a zeroed out pvti or seeing no PVCLOCK_TSC_STABLE_BIT + * bit set. Userspace checks the latter and if 0, it discards the data + * in pvti and fallbacks to a system call for a reliable timestamp. + */ + if (ret != 0) + pr_notice("Cannot restore secondary vcpu_time_info (err %d)", + ret); +} + +static void xen_setup_vsyscall_time_info(void) +{ + struct vcpu_register_time_memory_area t; + struct pvclock_vsyscall_time_info *ti; + int ret; + + ti = (struct pvclock_vsyscall_time_info *)get_zeroed_page(GFP_KERNEL); + if (!ti) + return; + + t.addr.v = &ti->pvti; + + ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t); + if (ret) { + pr_notice("xen: VCLOCK_PVCLOCK not supported (err %d)\n", ret); + free_page((unsigned long)ti); + return; + } + + /* + * If primary time info had this bit set, secondary should too since + * it's the same data on both just different memory regions. But we + * still check it in case hypervisor is buggy. + */ + if (!(ti->pvti.flags & PVCLOCK_TSC_STABLE_BIT)) { + t.addr.v = NULL; + ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, + 0, &t); + if (!ret) + free_page((unsigned long)ti); + + pr_notice("xen: VCLOCK_PVCLOCK not supported (tsc unstable)\n"); + return; + } + + xen_clock = ti; + pvclock_set_pvti_cpu0_va(xen_clock); + + xen_clocksource.archdata.vclock_mode = VCLOCK_PVCLOCK; +} + static void __init xen_time_init(void) { struct pvclock_vcpu_time_info *pvti; @@ -402,8 +488,10 @@ static void __init xen_time_init(void) * bit is supported hence speeding up Xen clocksource. */ pvti = &__this_cpu_read(xen_vcpu)->time; - if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) + if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) { pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); + xen_setup_vsyscall_time_info(); + } xen_setup_runstate_info(cpu); xen_setup_timer(cpu); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f377e1820c6c..75011b80660f 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -70,6 +70,8 @@ void xen_setup_runstate_info(int cpu); void xen_teardown_timer(int cpu); u64 xen_clocksource_read(void); void xen_setup_cpu_clockevents(void); +void xen_save_time_memory_area(void); +void xen_restore_time_memory_area(void); void __init xen_init_time_ops(void); void __init xen_hvm_init_time_ops(void); diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h index 98188c87f5c1..504c71601511 100644 --- a/include/xen/interface/vcpu.h +++ b/include/xen/interface/vcpu.h @@ -178,4 +178,46 @@ DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info); /* Send an NMI to the specified VCPU. @extra_arg == NULL. */ #define VCPUOP_send_nmi 11 + +/* + * Get the physical ID information for a pinned vcpu's underlying physical + * processor. The physical ID informmation is architecture-specific. + * On x86: id[31:0]=apic_id, id[63:32]=acpi_id. + * This command returns -EINVAL if it is not a valid operation for this VCPU. + */ +#define VCPUOP_get_physid 12 /* arg == vcpu_get_physid_t */ +struct vcpu_get_physid { + uint64_t phys_id; +}; +DEFINE_GUEST_HANDLE_STRUCT(vcpu_get_physid); +#define xen_vcpu_physid_to_x86_apicid(physid) ((uint32_t)(physid)) +#define xen_vcpu_physid_to_x86_acpiid(physid) ((uint32_t)((physid) >> 32)) + +/* + * Register a memory location to get a secondary copy of the vcpu time + * parameters. The master copy still exists as part of the vcpu shared + * memory area, and this secondary copy is updated whenever the master copy + * is updated (and using the same versioning scheme for synchronisation). + * + * The intent is that this copy may be mapped (RO) into userspace so + * that usermode can compute system time using the time info and the + * tsc. Usermode will see an array of vcpu_time_info structures, one + * for each vcpu, and choose the right one by an existing mechanism + * which allows it to get the current vcpu number (such as via a + * segment limit). It can then apply the normal algorithm to compute + * system time from the tsc. + * + * @extra_arg == pointer to vcpu_register_time_info_memory_area structure. + */ +#define VCPUOP_register_vcpu_time_memory_area 13 +DEFINE_GUEST_HANDLE_STRUCT(vcpu_time_info); +struct vcpu_register_time_memory_area { + union { + GUEST_HANDLE(vcpu_time_info) h; + struct pvclock_vcpu_time_info *v; + uint64_t p; + } addr; +}; +DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_time_memory_area); + #endif /* __XEN_PUBLIC_VCPU_H__ */ From ac0d94ff5d7a5efaf135d7d606fd0c30d16e639d Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:32 -0400 Subject: [PATCH 1600/2608] x86/xen/time: Output xen sched_clock time from 0 commit 38669ba205d178d2d38bfd194a196d65a44d5af2 upstream. It is expected for sched_clock() to output data from 0, when system boots. Add an offset xen_sched_clock_offset (similarly how it is done in other hypervisors i.e. kvm_sched_clock_offset) to count sched_clock() from 0, when time is first initialized. Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-14-pasha.tatashin@oracle.com Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/time.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index efa81e736f43..8c0b2beb3f46 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -31,6 +31,8 @@ /* Xen may fire a timer up to this many ns early */ #define TIMER_SLOP 100000 +static u64 xen_sched_clock_offset __read_mostly; + /* Get the TSC speed from Xen */ static unsigned long xen_tsc_khz(void) { @@ -57,6 +59,11 @@ static u64 xen_clocksource_get_cycles(struct clocksource *cs) return xen_clocksource_read(); } +static u64 xen_sched_clock(void) +{ + return xen_clocksource_read() - xen_sched_clock_offset; +} + static void xen_read_wallclock(struct timespec *ts) { struct shared_info *s = HYPERVISOR_shared_info; @@ -367,7 +374,7 @@ void xen_timer_resume(void) } static const struct pv_time_ops xen_time_ops __initconst = { - .sched_clock = xen_clocksource_read, + .sched_clock = xen_sched_clock, .steal_clock = xen_steal_clock, }; @@ -505,6 +512,7 @@ static void __init xen_time_init(void) void __ref xen_init_time_ops(void) { + xen_sched_clock_offset = xen_clocksource_read(); pv_time_ops = xen_time_ops; x86_init.timers.timer_init = xen_time_init; @@ -547,6 +555,7 @@ void __init xen_hvm_init_time_ops(void) return; } + xen_sched_clock_offset = xen_clocksource_read(); pv_time_ops = xen_time_ops; x86_init.timers.setup_percpu_clockev = xen_time_init; x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; From 49c5a805a2286adfd231475603f85bb495d99623 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 14 Jan 2019 13:44:13 +0100 Subject: [PATCH 1601/2608] xen: Fix x86 sched_clock() interface for xen commit 867cefb4cb1012f42cada1c7d1f35ac8dd276071 upstream. Commit f94c8d11699759 ("sched/clock, x86/tsc: Rework the x86 'unstable' sched_clock() interface") broke Xen guest time handling across migration: [ 187.249951] Freezing user space processes ... (elapsed 0.001 seconds) done. [ 187.251137] OOM killer disabled. [ 187.251137] Freezing remaining freezable tasks ... (elapsed 0.001 seconds) done. [ 187.252299] suspending xenstore... [ 187.266987] xen:grant_table: Grant tables using version 1 layout [18446743811.706476] OOM killer enabled. [18446743811.706478] Restarting tasks ... done. [18446743811.720505] Setting capacity to 16777216 Fix that by setting xen_sched_clock_offset at resume time to ensure a monotonic clock value. [boris: replaced pr_info() with pr_info_once() in xen_callback_vector() to avoid printing with incorrect timestamp during resume (as we haven't re-adjusted the clock yet)] Fixes: f94c8d11699759 ("sched/clock, x86/tsc: Rework the x86 'unstable' sched_clock() interface") Cc: # 4.11 Reported-by: Hans van Kranenburg Signed-off-by: Juergen Gross Tested-by: Hans van Kranenburg Signed-off-by: Boris Ostrovsky Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/time.c | 12 +++++++++--- drivers/xen/events/events_base.c | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 8c0b2beb3f46..03706331f567 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -361,8 +361,6 @@ void xen_timer_resume(void) { int cpu; - pvclock_resume(); - if (xen_clockevent != &xen_vcpuop_clockevent) return; @@ -379,12 +377,15 @@ static const struct pv_time_ops xen_time_ops __initconst = { }; static struct pvclock_vsyscall_time_info *xen_clock __read_mostly; +static u64 xen_clock_value_saved; void xen_save_time_memory_area(void) { struct vcpu_register_time_memory_area t; int ret; + xen_clock_value_saved = xen_clocksource_read() - xen_sched_clock_offset; + if (!xen_clock) return; @@ -404,7 +405,7 @@ void xen_restore_time_memory_area(void) int ret; if (!xen_clock) - return; + goto out; t.addr.v = &xen_clock->pvti; @@ -421,6 +422,11 @@ void xen_restore_time_memory_area(void) if (ret != 0) pr_notice("Cannot restore secondary vcpu_time_info (err %d)", ret); + +out: + /* Need pvclock_resume() before using xen_clocksource_read(). */ + pvclock_resume(); + xen_sched_clock_offset = xen_clocksource_read() - xen_clock_value_saved; } static void xen_setup_vsyscall_time_info(void) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index e6c1934734b7..fe1f16351f94 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1650,7 +1650,7 @@ void xen_callback_vector(void) xen_have_vector_callback = 0; return; } - pr_info("Xen HVM callback vector for event delivery is enabled\n"); + pr_info_once("Xen HVM callback vector for event delivery is enabled\n"); alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, xen_hvm_callback_vector); } From d33655dbae7d3c446612f799adc78e53cd142d44 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 22 Nov 2018 18:58:46 +0800 Subject: [PATCH 1602/2608] f2fs: read page index before freeing commit 0ea295dd853e0879a9a30ab61f923c26be35b902 upstream. The function truncate_node frees the page with f2fs_put_page. However, the page index is read after that. So, the patch reads the index before freeing the page. Fixes: bf39c00a9a7f ("f2fs: drop obsolete node page when it is truncated") Cc: Signed-off-by: Pan Bian Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 65de72d65562..12060fbfbb05 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -694,6 +694,7 @@ static void truncate_node(struct dnode_of_data *dn) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct node_info ni; + pgoff_t index; get_node_info(sbi, dn->nid, &ni); f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); @@ -712,10 +713,11 @@ static void truncate_node(struct dnode_of_data *dn) clear_node_page_dirty(dn->node_page); set_sbi_flag(sbi, SBI_IS_DIRTY); + index = dn->node_page->index; f2fs_put_page(dn->node_page, 1); invalidate_mapping_pages(NODE_MAPPING(sbi), - dn->node_page->index, dn->node_page->index); + index, index); dn->node_page = NULL; trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); From b682b805251ed247da3b1d2eb8ad1287509f2555 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 6 Sep 2018 15:52:17 -0400 Subject: [PATCH 1603/2608] btrfs: fix error handling in btrfs_dev_replace_start commit 5c06147128fbbdf7a84232c5f0d808f53153defe upstream. When we fail to start a transaction in btrfs_dev_replace_start, we leave dev_replace->replace_start set to STARTED but clear ->srcdev and ->tgtdev. Later, that can result in an Oops in btrfs_dev_replace_progress when having state set to STARTED or SUSPENDED implies that ->srcdev is valid. Also fix error handling when the state is already STARTED or SUSPENDED while starting. That, too, will clear ->srcdev and ->tgtdev even though it doesn't own them. This should be an impossible case to hit since we should be protected by the BTRFS_FS_EXCL_OP bit being set. Let's add an ASSERT there while we're at it. Fixes: e93c89c1aaaaa (Btrfs: add new sources for device replace code) CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/dev-replace.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index dd80a1bdf9e2..68362eddaea2 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -351,6 +351,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: + ASSERT(0); ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED; goto leave; } @@ -395,6 +396,10 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, if (IS_ERR(trans)) { ret = PTR_ERR(trans); btrfs_dev_replace_lock(dev_replace, 1); + dev_replace->replace_state = + BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED; + dev_replace->srcdev = NULL; + dev_replace->tgtdev = NULL; goto leave; } @@ -416,8 +421,6 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, return ret; leave: - dev_replace->srcdev = NULL; - dev_replace->tgtdev = NULL; btrfs_dev_replace_unlock(dev_replace, 1); btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); return ret; From f4f4ac411eab2c15f25acb7847f310ccdaaf299a Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sun, 11 Nov 2018 22:22:17 +0800 Subject: [PATCH 1604/2608] btrfs: dev-replace: go back to suspended state if target device is missing commit 0d228ece59a35a9b9e8ff0d40653234a6d90f61e upstream. At the time of forced unmount we place the running replace to BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED state, so when the system comes back and expect the target device is missing. Then let the replace state continue to be in BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED state instead of BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED as there isn't any matching scrub running as part of replace. Fixes: e93c89c1aaaa ("Btrfs: add new sources for device replace code") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/dev-replace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 68362eddaea2..f86457713e60 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -804,6 +804,8 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info) "cannot continue dev_replace, tgtdev is missing"); btrfs_info(fs_info, "you may cancel the operation after 'mount -o degraded'"); + dev_replace->replace_state = + BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED; btrfs_dev_replace_unlock(dev_replace, 1); return 0; } From e1e364bf09d92018d35f20a004ffcfd4cbeffa34 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 31 Jan 2019 08:13:48 +0100 Subject: [PATCH 1605/2608] Linux 4.14.97 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 57b45169ed85..485afde0f1f1 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 96 +SUBLEVEL = 97 EXTRAVERSION = NAME = Petit Gorille From dc489ad6a2f2fcebdaecb7b8532e0d02d272ac6a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 31 Jan 2019 15:59:51 +0100 Subject: [PATCH 1606/2608] Fix "net: ipv4: do not handle duplicate fragments as overlapping" ade446403bfb ("net: ipv4: do not handle duplicate fragments as overlapping") was backported to many stable trees, but it had a problem that was "accidentally" fixed by the upstream commit 0ff89efb5246 ("ip: fail fast on IP defrag errors") This is the fixup for that problem as we do not want the larger patch in the older stable trees. Fixes: ade446403bfb ("net: ipv4: do not handle duplicate fragments as overlapping") Reported-by: Ivan Babrou Reported-by: Eric Dumazet Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index f8bbd693c19c..d95b32af4a0e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -425,6 +425,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) * fragment. */ + err = -EINVAL; /* Find out where to put this fragment. */ prev_tail = qp->q.fragments_tail; if (!prev_tail) @@ -501,7 +502,6 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) discard_qp: inet_frag_kill(&qp->q); - err = -EINVAL; __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); err: kfree_skb(skb); From b99abc70ea8f79fda83f8a795cbc717c47311ae8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 2 Jan 2019 18:57:09 -0800 Subject: [PATCH 1607/2608] ipv6: Consider sk_bound_dev_if when binding a socket to an address [ Upstream commit c5ee066333ebc322a24a00a743ed941a0c68617e ] IPv6 does not consider if the socket is bound to a device when binding to an address. The result is that a socket can be bound to eth0 and then bound to the address of eth1. If the device is a VRF, the result is that a socket can only be bound to an address in the default VRF. Resolve by considering the device if sk_bound_dev_if is set. This problem exists from the beginning of git history. Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/af_inet6.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b1ed9254a4b6..9552e0b08f45 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -370,6 +370,9 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = -EINVAL; goto out_unlock; } + } + + if (sk->sk_bound_dev_if) { dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; From c630ec8bdadae9d557b1ceb9d6c06e149108a0d4 Mon Sep 17 00:00:00 2001 From: Yohei Kanemaru Date: Tue, 29 Jan 2019 15:52:34 +0900 Subject: [PATCH 1608/2608] ipv6: sr: clear IP6CB(skb) on SRH ip4ip6 encapsulation [ Upstream commit ef489749aae508e6f17886775c075f12ff919fb1 ] skb->cb may contain data from previous layers (in an observed case IPv4 with L3 Master Device). In the observed scenario, the data in IPCB(skb)->frags was misinterpreted as IP6CB(skb)->frag_max_size, eventually caused an unexpected IPv6 fragmentation in ip6_fragment() through ip6_finish_output(). This patch clears IP6CB(skb), which potentially contains garbage data, on the SRH ip4ip6 encapsulation. Fixes: 32d99d0b6702 ("ipv6: sr: add support for ip4ip6 encapsulation") Signed-off-by: Yohei Kanemaru Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/seg6_iptunnel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index cf9342bfe95a..de4c9826c1ce 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -126,6 +126,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) } else { ip6_flow_hdr(hdr, 0, 0); hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); + + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); } hdr->nexthdr = NEXTHDR_ROUTING; From 6dac61bfd6db96ee60ffa9d96d295bbc91bd2bba Mon Sep 17 00:00:00 2001 From: Jacob Wen Date: Thu, 31 Jan 2019 15:18:56 +0800 Subject: [PATCH 1609/2608] l2tp: copy 4 more bytes to linear part if necessary [ Upstream commit 91c524708de6207f59dd3512518d8a1c7b434ee3 ] The size of L2TPv2 header with all optional fields is 14 bytes. l2tp_udp_recv_core only moves 10 bytes to the linear part of a skb. This may lead to l2tp_recv_common read data outside of a skb. This patch make sure that there is at least 14 bytes in the linear part of a skb to meet the maximum need of l2tp_udp_recv_core and l2tp_recv_common. The minimum size of both PPP HDLC-like frame and Ethernet frame is larger than 14 bytes, so we are safe to do so. Also remove L2TP_HDR_SIZE_NOSEQ, it is unused now. Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Suggested-by: Guillaume Nault Signed-off-by: Jacob Wen Acked-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 33ea389ee015..c49a2dc33f68 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -83,8 +83,7 @@ #define L2TP_SLFLAG_S 0x40000000 #define L2TP_SL_SEQ_MASK 0x00ffffff -#define L2TP_HDR_SIZE_SEQ 10 -#define L2TP_HDR_SIZE_NOSEQ 6 +#define L2TP_HDR_SIZE_MAX 14 /* Default trace flags */ #define L2TP_DEFAULT_DEBUG_FLAGS 0 @@ -907,7 +906,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, __skb_pull(skb, sizeof(struct udphdr)); /* Short packet? */ - if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) { + if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) { l2tp_info(tunnel, L2TP_MSG_DATA, "%s: recv short packet (len=%d)\n", tunnel->name, skb->len); From f6ed795983fca6223324742426a89925f0de783a Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 22 Jan 2019 15:19:44 +0200 Subject: [PATCH 1610/2608] net/mlx4_core: Add masking for a few queries on HCA caps [ Upstream commit a40ded6043658444ee4dd6ee374119e4e98b33fc ] Driver reads the query HCA capabilities without the corresponding masks. Without the correct masks, the base addresses of the queues are unaligned. In addition some reserved bits were wrongly read. Using the correct masks, ensures alignment of the base addresses and allows future firmware versions safe use of the reserved bits. Fixes: ab9c17a009ee ("mlx4_core: Modify driver initialization flow to accommodate SRIOV for Ethernet") Fixes: 0ff1fb654bec ("{NET, IB}/mlx4: Add device managed flow steering firmware API") Signed-off-by: Aya Levin Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/fw.c | 75 +++++++++++++++---------- 1 file changed, 46 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 16c09949afd5..7440c769b30f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -2048,9 +2048,11 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, { struct mlx4_cmd_mailbox *mailbox; __be32 *outbox; + u64 qword_field; u32 dword_field; - int err; + u16 word_field; u8 byte_field; + int err; static const u8 a0_dmfs_query_hw_steering[] = { [0] = MLX4_STEERING_DMFS_A0_DEFAULT, [1] = MLX4_STEERING_DMFS_A0_DYNAMIC, @@ -2078,19 +2080,32 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, /* QPC/EEC/CQC/EQC/RDMARC attributes */ - MLX4_GET(param->qpc_base, outbox, INIT_HCA_QPC_BASE_OFFSET); - MLX4_GET(param->log_num_qps, outbox, INIT_HCA_LOG_QP_OFFSET); - MLX4_GET(param->srqc_base, outbox, INIT_HCA_SRQC_BASE_OFFSET); - MLX4_GET(param->log_num_srqs, outbox, INIT_HCA_LOG_SRQ_OFFSET); - MLX4_GET(param->cqc_base, outbox, INIT_HCA_CQC_BASE_OFFSET); - MLX4_GET(param->log_num_cqs, outbox, INIT_HCA_LOG_CQ_OFFSET); - MLX4_GET(param->altc_base, outbox, INIT_HCA_ALTC_BASE_OFFSET); - MLX4_GET(param->auxc_base, outbox, INIT_HCA_AUXC_BASE_OFFSET); - MLX4_GET(param->eqc_base, outbox, INIT_HCA_EQC_BASE_OFFSET); - MLX4_GET(param->log_num_eqs, outbox, INIT_HCA_LOG_EQ_OFFSET); - MLX4_GET(param->num_sys_eqs, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET); - MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); - MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); + MLX4_GET(qword_field, outbox, INIT_HCA_QPC_BASE_OFFSET); + param->qpc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_QP_OFFSET); + param->log_num_qps = byte_field & 0x1f; + MLX4_GET(qword_field, outbox, INIT_HCA_SRQC_BASE_OFFSET); + param->srqc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_SRQ_OFFSET); + param->log_num_srqs = byte_field & 0x1f; + MLX4_GET(qword_field, outbox, INIT_HCA_CQC_BASE_OFFSET); + param->cqc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_CQ_OFFSET); + param->log_num_cqs = byte_field & 0x1f; + MLX4_GET(qword_field, outbox, INIT_HCA_ALTC_BASE_OFFSET); + param->altc_base = qword_field; + MLX4_GET(qword_field, outbox, INIT_HCA_AUXC_BASE_OFFSET); + param->auxc_base = qword_field; + MLX4_GET(qword_field, outbox, INIT_HCA_EQC_BASE_OFFSET); + param->eqc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_EQ_OFFSET); + param->log_num_eqs = byte_field & 0x1f; + MLX4_GET(word_field, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET); + param->num_sys_eqs = word_field & 0xfff; + MLX4_GET(qword_field, outbox, INIT_HCA_RDMARC_BASE_OFFSET); + param->rdmarc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_RD_OFFSET); + param->log_rd_per_qp = byte_field & 0x7; MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET); if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) { @@ -2109,22 +2124,21 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, /* steering attributes */ if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); - MLX4_GET(param->log_mc_entry_sz, outbox, - INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); - MLX4_GET(param->log_mc_table_sz, outbox, - INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); - MLX4_GET(byte_field, outbox, - INIT_HCA_FS_A0_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); + param->log_mc_entry_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); + param->log_mc_table_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_FS_A0_OFFSET); param->dmfs_high_steer_mode = a0_dmfs_query_hw_steering[(byte_field >> 6) & 3]; } else { MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET); - MLX4_GET(param->log_mc_entry_sz, outbox, - INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); - MLX4_GET(param->log_mc_hash_sz, outbox, - INIT_HCA_LOG_MC_HASH_SZ_OFFSET); - MLX4_GET(param->log_mc_table_sz, outbox, - INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); + param->log_mc_entry_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_HASH_SZ_OFFSET); + param->log_mc_hash_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + param->log_mc_table_sz = byte_field & 0x1f; } /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */ @@ -2148,15 +2162,18 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, /* TPT attributes */ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); - MLX4_GET(param->mw_enabled, outbox, INIT_HCA_TPT_MW_OFFSET); - MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_TPT_MW_OFFSET); + param->mw_enabled = byte_field >> 7; + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); + param->log_mpt_sz = byte_field & 0x3f; MLX4_GET(param->mtt_base, outbox, INIT_HCA_MTT_BASE_OFFSET); MLX4_GET(param->cmpt_base, outbox, INIT_HCA_CMPT_BASE_OFFSET); /* UAR attributes */ MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET); - MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET); + param->log_uar_sz = byte_field & 0xf; /* phv_check enable */ MLX4_GET(byte_field, outbox, INIT_HCA_CACHELINE_SZ_OFFSET); From 0adf571fa34b27bd0b97b408cc0f0dc54b72f0eb Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 24 Jan 2019 14:18:18 -0800 Subject: [PATCH 1611/2608] netrom: switch to sock timer API [ Upstream commit 63346650c1a94a92be61a57416ac88c0a47c4327 ] sk_reset_timer() and sk_stop_timer() properly handle sock refcnt for timer function. Switching to them could fix a refcounting bug reported by syzbot. Reported-and-tested-by: syzbot+defa700d16f1bd1b9a05@syzkaller.appspotmail.com Cc: Ralf Baechle Cc: linux-hams@vger.kernel.org Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netrom/nr_timer.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index 94d05806a9a2..f0ecaec1ff3d 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -53,21 +53,21 @@ void nr_start_t1timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); - mod_timer(&nr->t1timer, jiffies + nr->t1); + sk_reset_timer(sk, &nr->t1timer, jiffies + nr->t1); } void nr_start_t2timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); - mod_timer(&nr->t2timer, jiffies + nr->t2); + sk_reset_timer(sk, &nr->t2timer, jiffies + nr->t2); } void nr_start_t4timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); - mod_timer(&nr->t4timer, jiffies + nr->t4); + sk_reset_timer(sk, &nr->t4timer, jiffies + nr->t4); } void nr_start_idletimer(struct sock *sk) @@ -75,37 +75,37 @@ void nr_start_idletimer(struct sock *sk) struct nr_sock *nr = nr_sk(sk); if (nr->idle > 0) - mod_timer(&nr->idletimer, jiffies + nr->idle); + sk_reset_timer(sk, &nr->idletimer, jiffies + nr->idle); } void nr_start_heartbeat(struct sock *sk) { - mod_timer(&sk->sk_timer, jiffies + 5 * HZ); + sk_reset_timer(sk, &sk->sk_timer, jiffies + 5 * HZ); } void nr_stop_t1timer(struct sock *sk) { - del_timer(&nr_sk(sk)->t1timer); + sk_stop_timer(sk, &nr_sk(sk)->t1timer); } void nr_stop_t2timer(struct sock *sk) { - del_timer(&nr_sk(sk)->t2timer); + sk_stop_timer(sk, &nr_sk(sk)->t2timer); } void nr_stop_t4timer(struct sock *sk) { - del_timer(&nr_sk(sk)->t4timer); + sk_stop_timer(sk, &nr_sk(sk)->t4timer); } void nr_stop_idletimer(struct sock *sk) { - del_timer(&nr_sk(sk)->idletimer); + sk_stop_timer(sk, &nr_sk(sk)->idletimer); } void nr_stop_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); } int nr_t1timer_running(struct sock *sk) From 70b9069d45cd870a9c7235efaf96df2965db8eac Mon Sep 17 00:00:00 2001 From: Bernard Pidoux Date: Fri, 25 Jan 2019 11:46:40 +0100 Subject: [PATCH 1612/2608] net/rose: fix NULL ax25_cb kernel panic [ Upstream commit b0cf029234f9b18e10703ba5147f0389c382bccc ] When an internally generated frame is handled by rose_xmit(), rose_route_frame() is called: if (!rose_route_frame(skb, NULL)) { dev_kfree_skb(skb); stats->tx_errors++; return NETDEV_TX_OK; } We have the same code sequence in Net/Rom where an internally generated frame is handled by nr_xmit() calling nr_route_frame(skb, NULL). However, in this function NULL argument is tested while it is not in rose_route_frame(). Then kernel panic occurs later on when calling ax25cmp() with a NULL ax25_cb argument as reported many times and recently with syzbot. We need to test if ax25 is NULL before using it. Testing: Built kernel with CONFIG_ROSE=y. Signed-off-by: Bernard Pidoux Acked-by: Dmitry Vyukov Reported-by: syzbot+1a2c456a1ea08fa5b5f7@syzkaller.appspotmail.com Cc: "David S. Miller" Cc: Ralf Baechle Cc: Bernard Pidoux Cc: linux-hams@vger.kernel.org Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rose/rose_route.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 452bbb38d943..2741abec7ee7 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -848,6 +848,7 @@ void rose_link_device_down(struct net_device *dev) /* * Route a frame to an appropriate AX.25 connection. + * A NULL ax25_cb indicates an internally generated frame. */ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) { @@ -865,6 +866,10 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) if (skb->len < ROSE_MIN_LEN) return res; + + if (!ax25) + return rose_loopback_queue(skb, NULL); + frametype = skb->data[2]; lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); if (frametype == ROSE_CALL_REQUEST && From ee092be36582a5f7420b58cae6e980c08c0f38ee Mon Sep 17 00:00:00 2001 From: Josh Elsasser Date: Sat, 26 Jan 2019 14:38:33 -0800 Subject: [PATCH 1613/2608] net: set default network namespace in init_dummy_netdev() [ Upstream commit 35edfdc77f683c8fd27d7732af06cf6489af60a5 ] Assign a default net namespace to netdevs created by init_dummy_netdev(). Fixes a NULL pointer dereference caused by busy-polling a socket bound to an iwlwifi wireless device, which bumps the per-net BUSYPOLLRXPACKETS stat if napi_poll() received packets: BUG: unable to handle kernel NULL pointer dereference at 0000000000000190 IP: napi_busy_loop+0xd6/0x200 Call Trace: sock_poll+0x5e/0x80 do_sys_poll+0x324/0x5a0 SyS_poll+0x6c/0xf0 do_syscall_64+0x6b/0x1f0 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Fixes: 7db6b048da3b ("net: Commonize busy polling code to focus on napi_id instead of socket") Signed-off-by: Josh Elsasser Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 4337450a5fdb..54ba5b5bc55c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7748,6 +7748,9 @@ int init_dummy_netdev(struct net_device *dev) set_bit(__LINK_STATE_PRESENT, &dev->state); set_bit(__LINK_STATE_START, &dev->state); + /* napi_busy_loop stats accounting wants this */ + dev_net_set(dev, &init_net); + /* Note : We dont allocate pcpu_refcnt for dummy devices, * because users of this 'device' dont need to change * its refcount. From 046b1b9efe35e9873d7cf35a3acefb7889dd10a6 Mon Sep 17 00:00:00 2001 From: Mathias Thore Date: Mon, 28 Jan 2019 10:07:47 +0100 Subject: [PATCH 1614/2608] ucc_geth: Reset BQL queue when stopping device [ Upstream commit e15aa3b2b1388c399c1a2ce08550d2cc4f7e3e14 ] After a timeout event caused by for example a broadcast storm, when the MAC and PHY are reset, the BQL TX queue needs to be reset as well. Otherwise, the device will exhibit severe performance issues even after the storm has ended. Co-authored-by: David Gounaris Signed-off-by: Mathias Thore Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/ucc_geth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index f77ba9fa257b..94df1ddc5dcb 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1888,6 +1888,8 @@ static void ucc_geth_free_tx(struct ucc_geth_private *ugeth) u16 i, j; u8 __iomem *bd; + netdev_reset_queue(ugeth->ndev); + ug_info = ugeth->ug_info; uf_info = &ug_info->uf_info; From d3ee1e685f4ba7f9e2dabc97ad979bcd27a03292 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 24 Dec 2018 09:48:42 +0200 Subject: [PATCH 1615/2608] net/mlx5e: Allow MAC invalidation while spoofchk is ON [ Upstream commit 9d2cbdc5d334967c35b5f58c7bf3208e17325647 ] Prior to this patch the driver prohibited spoof checking on invalid MAC. Now the user can set this configuration if it wishes to. This is required since libvirt might invalidate the VF Mac by setting it to zero, while spoofcheck is ON. Fixes: 1ab2068a4c66 ("net/mlx5: Implement vports admin state backup/restore") Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1af9894abd95..49a1f8f99c36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1126,13 +1126,6 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, int err = 0; u8 *smac_v; - if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) { - mlx5_core_warn(esw->dev, - "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", - vport->vport); - return -EPERM; - } - esw_vport_cleanup_ingress_rules(esw, vport); if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { @@ -1734,13 +1727,10 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; - if (evport->info.spoofchk && !is_valid_ether_addr(mac)) { + if (evport->info.spoofchk && !is_valid_ether_addr(mac)) mlx5_core_warn(esw->dev, - "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n", + "Set invalid MAC while spoofchk is on, vport(%d)\n", vport); - err = -EPERM; - goto unlock; - } err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); if (err) { @@ -1886,6 +1876,10 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, evport = &esw->vports[vport]; pschk = evport->info.spoofchk; evport->info.spoofchk = spoofchk; + if (pschk && !is_valid_ether_addr(evport->info.mac)) + mlx5_core_warn(esw->dev, + "Spoofchk in set while MAC is invalid, vport(%d)\n", + evport->vport); if (evport->enabled && esw->mode == SRIOV_LEGACY) err = esw_vport_ingress_config(esw, evport); if (err) From eb31b906dcac50ce1bcf40e82689414f7051925d Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Sun, 13 Jan 2019 22:47:26 -0600 Subject: [PATCH 1616/2608] Revert "net/mlx5e: E-Switch, Initialize eswitch only if eswitch manager" [ Upstream commit 4e046de0f50e04acd48eb373d6a9061ddf014e0c ] This reverts commit 5f5991f36dce1e69dd8bd7495763eec2e28f08e7. With the original commit, eswitch instance will not be initialized for a function which is vport group manager but not eswitch manager such as host PF on SmartNIC (BlueField) card. This will result in a kernel crash when such a vport group manager is trying to access vports in its group. E.g, PF vport manager (not eswitch manager) tries to configure the MAC of its VF vport, a kernel trace will happen similar as bellow: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 ... RIP: 0010:mlx5_eswitch_get_vport_config+0xc/0x180 [mlx5_core] ... Fixes: 5f5991f36dce ("net/mlx5e: E-Switch, Initialize eswitch only if eswitch manager") Signed-off-by: Bodong Wang Reported-by: Yuval Avnery Reviewed-by: Daniel Jurgens Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 49a1f8f99c36..2f93e6e9dc9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1607,7 +1607,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) int vport_num; int err; - if (!MLX5_ESWITCH_MANAGER(dev)) + if (!MLX5_VPORT_MANAGER(dev)) return 0; esw_info(dev, @@ -1680,7 +1680,7 @@ abort: void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) { - if (!esw || !MLX5_ESWITCH_MANAGER(esw->dev)) + if (!esw || !MLX5_VPORT_MANAGER(esw->dev)) return; esw_info(esw->dev, "cleanup\n"); From e16d506fda2cd06925ae932cdd910f4072831c8c Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 29 Jan 2019 09:45:53 +0900 Subject: [PATCH 1617/2608] virtio_net: Don't enable NAPI when interface is down [ Upstream commit 8be4d9a492f88b96d4d3a06c6cbedbc40ca14c83 ] Commit 4e09ff536284 ("virtio-net: disable NAPI only when enabled during XDP set") tried to fix inappropriate NAPI enabling/disabling when !netif_running(), but was not complete. On error path virtio_net could enable NAPI even when !netif_running(). This can cause enabling NAPI twice on virtnet_open(), which would trigger BUG_ON() in napi_enable(). Fixes: 4941d472bf95b ("virtio-net: do not reset during XDP set") Signed-off-by: Toshiaki Makita Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 215696f21d67..c504487c8245 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2034,8 +2034,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, return 0; err: - for (i = 0; i < vi->max_queue_pairs; i++) - virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + if (netif_running(dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) + virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + } if (prog) bpf_prog_sub(prog, vi->max_queue_pairs - 1); return err; From 4dc715b218b3747bc236c18841812ca93176d003 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 29 Jan 2019 09:45:54 +0900 Subject: [PATCH 1618/2608] virtio_net: Don't call free_old_xmit_skbs for xdp_frames [ Upstream commit 534da5e856334fb54cb0272a9fb3afec28ea3aed ] When napi_tx is enabled, virtnet_poll_cleantx() called free_old_xmit_skbs() even for xdp send queue. This is bogus since the queue has xdp_frames, not sk_buffs, thus mangled device tx bytes counters because skb->len is meaningless value, and even triggered oops due to general protection fault on freeing them. Since xdp send queues do not aquire locks, old xdp_frames should be freed only in virtnet_xdp_xmit(), so just skip free_old_xmit_skbs() for xdp send queues. Similarly virtnet_poll_tx() called free_old_xmit_skbs(). This NAPI handler is called even without calling start_xmit() because cb for tx is by default enabled. Once the handler is called, it enabled the cb again, and then the handler would be called again. We don't need this handler for XDP, so don't enable cb as well as not calling free_old_xmit_skbs(). Also, we need to disable tx NAPI when disabling XDP, so virtnet_poll_tx() can safely access curr_queue_pairs and xdp_queue_pairs, which are not atomically updated while disabling XDP. Fixes: b92f1e6751a6 ("virtio-net: transmit napi") Fixes: 7b0411ef4aa6 ("virtio-net: clean tx descriptors from rx napi") Signed-off-by: Toshiaki Makita Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 49 +++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c504487c8245..50c0ae772b5a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1149,6 +1149,16 @@ static void free_old_xmit_skbs(struct send_queue *sq) u64_stats_update_end(&stats->tx_syncp); } +static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) +{ + if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) + return false; + else if (q < vi->curr_queue_pairs) + return true; + else + return false; +} + static void virtnet_poll_cleantx(struct receive_queue *rq) { struct virtnet_info *vi = rq->vq->vdev->priv; @@ -1156,7 +1166,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) struct send_queue *sq = &vi->sq[index]; struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); - if (!sq->napi.weight) + if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) return; if (__netif_tx_trylock(txq)) { @@ -1206,8 +1216,16 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) { struct send_queue *sq = container_of(napi, struct send_queue, napi); struct virtnet_info *vi = sq->vq->vdev->priv; - struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); + unsigned int index = vq2txq(sq->vq); + struct netdev_queue *txq; + if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { + /* We don't need to enable cb for XDP */ + napi_complete_done(napi, 0); + return 0; + } + + txq = netdev_get_tx_queue(vi->dev, index); __netif_tx_lock(txq, raw_smp_processor_id()); free_old_xmit_skbs(sq); __netif_tx_unlock(txq); @@ -2006,9 +2024,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, } /* Make sure NAPI is not using any XDP TX queues for RX. */ - if (netif_running(dev)) - for (i = 0; i < vi->max_queue_pairs; i++) + if (netif_running(dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) { napi_disable(&vi->rq[i].napi); + virtnet_napi_tx_disable(&vi->sq[i].napi); + } + } netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); err = _virtnet_set_queues(vi, curr_qp + xdp_qp); @@ -2027,16 +2048,22 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, } if (old_prog) bpf_prog_put(old_prog); - if (netif_running(dev)) + if (netif_running(dev)) { virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + virtnet_napi_tx_enable(vi, vi->sq[i].vq, + &vi->sq[i].napi); + } } return 0; err: if (netif_running(dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) + for (i = 0; i < vi->max_queue_pairs; i++) { virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + virtnet_napi_tx_enable(vi, vi->sq[i].vq, + &vi->sq[i].napi); + } } if (prog) bpf_prog_sub(prog, vi->max_queue_pairs - 1); @@ -2178,16 +2205,6 @@ static void free_receive_page_frags(struct virtnet_info *vi) put_page(vi->rq[i].alloc_frag.page); } -static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) -{ - if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) - return false; - else if (q < vi->curr_queue_pairs) - return true; - else - return false; -} - static void free_unused_bufs(struct virtnet_info *vi) { void *buf; From 25d497c38cd3a96d8a20b04c0b1a643951628a92 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 29 Jan 2019 09:45:55 +0900 Subject: [PATCH 1619/2608] virtio_net: Fix not restoring real_num_rx_queues [ Upstream commit 188313c137c4f76afd0862f50dbc185b198b9e2a ] When _virtnet_set_queues() failed we did not restore real_num_rx_queues. Fix this by placing the change of real_num_rx_queues after _virtnet_set_queues(). This order is also in line with virtnet_set_channels(). Fixes: 4941d472bf95 ("virtio-net: do not reset during XDP set") Signed-off-by: Toshiaki Makita Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 50c0ae772b5a..0b457c81c448 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2031,10 +2031,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, } } - netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); err = _virtnet_set_queues(vi, curr_qp + xdp_qp); if (err) goto err; + netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); vi->xdp_queue_pairs = xdp_qp; for (i = 0; i < vi->max_queue_pairs; i++) { From f97e785670005f78188550ad21265edee03526d6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 22 Jan 2019 02:40:12 +0800 Subject: [PATCH 1620/2608] sctp: improve the events for sctp stream adding [ Upstream commit 8220c870cb0f4eaa4e335c9645dbd9a1c461c1dd ] This patch is to improve sctp stream adding events in 2 places: 1. In sctp_process_strreset_addstrm_out(), move up SCTP_MAX_STREAM and in stream allocation failure checks, as the adding has to succeed after reconf_timer stops for the in stream adding request retransmission. 3. In sctp_process_strreset_addstrm_in(), no event should be sent, as no in or out stream is added here. Fixes: 50a41591f110 ("sctp: implement receiver-side procedures for the Add Outgoing Streams Request Parameter") Fixes: c5c4ebb3ab87 ("sctp: implement receiver-side procedures for the Add Incoming Streams Request Parameter") Reported-by: Ying Xu Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/stream.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 61273534ae10..33fc0a651d33 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -642,6 +642,16 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out( if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ)) goto out; + in = ntohs(addstrm->number_of_streams); + incnt = stream->incnt + in; + if (!in || incnt > SCTP_MAX_STREAM) + goto out; + + streamin = krealloc(stream->in, incnt * sizeof(*streamin), + GFP_ATOMIC); + if (!streamin) + goto out; + if (asoc->strreset_chunk) { if (!sctp_chunk_lookup_strreset_param( asoc, 0, SCTP_PARAM_RESET_ADD_IN_STREAMS)) { @@ -665,16 +675,6 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out( } } - in = ntohs(addstrm->number_of_streams); - incnt = stream->incnt + in; - if (!in || incnt > SCTP_MAX_STREAM) - goto out; - - streamin = krealloc(stream->in, incnt * sizeof(*streamin), - GFP_ATOMIC); - if (!streamin) - goto out; - memset(streamin + stream->incnt, 0, in * sizeof(*streamin)); stream->in = streamin; stream->incnt = incnt; @@ -750,9 +750,6 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in( result = SCTP_STRRESET_PERFORMED; - *evp = sctp_ulpevent_make_stream_change_event(asoc, - 0, 0, ntohs(addstrm->number_of_streams), GFP_ATOMIC); - out: sctp_update_strreset_result(asoc, result); err: From 556e554a3b7478dba08585b5393384c973f36408 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 22 Jan 2019 02:39:34 +0800 Subject: [PATCH 1621/2608] sctp: improve the events for sctp stream reset [ Upstream commit 2e6dc4d95110becfe0ff4c3d4749c33ea166e9e7 ] This patch is to improve sctp stream reset events in 4 places: 1. In sctp_process_strreset_outreq(), the flag should always be set with SCTP_STREAM_RESET_INCOMING_SSN instead of OUTGOING, as receiver's in stream is reset here. 2. In sctp_process_strreset_outreq(), move up SCTP_STRRESET_ERR_WRONG_SSN check, as the reset has to succeed after reconf_timer stops for the in stream reset request retransmission. 3. In sctp_process_strreset_inreq(), no event should be sent, as no in or out stream is reset here. 4. In sctp_process_strreset_resp(), SCTP_STREAM_RESET_INCOMING_SSN or OUTGOING event should always be sent for stream reset requests, no matter it fails or succeeds to process the request. Fixes: 810544764536 ("sctp: implement receiver-side procedures for the Outgoing SSN Reset Request Parameter") Fixes: 16e1a91965b0 ("sctp: implement receiver-side procedures for the Incoming SSN Reset Request Parameter") Fixes: 11ae76e67a17 ("sctp: implement receiver-side procedures for the Reconf Response Parameter") Reported-by: Ying Xu Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/stream.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 33fc0a651d33..71534bd4e77c 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -360,9 +360,9 @@ struct sctp_chunk *sctp_process_strreset_outreq( struct sctp_strreset_outreq *outreq = param.v; struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; - __u16 i, nums, flags = 0; __be16 *str_p = NULL; __u32 request_seq; + __u16 i, nums; request_seq = ntohl(outreq->request_seq); @@ -390,6 +390,15 @@ struct sctp_chunk *sctp_process_strreset_outreq( if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_STREAM_REQ)) goto out; + nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16); + str_p = outreq->list_of_streams; + for (i = 0; i < nums; i++) { + if (ntohs(str_p[i]) >= stream->incnt) { + result = SCTP_STRRESET_ERR_WRONG_SSN; + goto out; + } + } + if (asoc->strreset_chunk) { if (!sctp_chunk_lookup_strreset_param( asoc, outreq->response_seq, @@ -412,32 +421,19 @@ struct sctp_chunk *sctp_process_strreset_outreq( sctp_chunk_put(asoc->strreset_chunk); asoc->strreset_chunk = NULL; } - - flags = SCTP_STREAM_RESET_INCOMING_SSN; } - nums = (ntohs(param.p->length) - sizeof(*outreq)) / 2; - if (nums) { - str_p = outreq->list_of_streams; - for (i = 0; i < nums; i++) { - if (ntohs(str_p[i]) >= stream->incnt) { - result = SCTP_STRRESET_ERR_WRONG_SSN; - goto out; - } - } - + if (nums) for (i = 0; i < nums; i++) stream->in[ntohs(str_p[i])].ssn = 0; - } else { + else for (i = 0; i < stream->incnt; i++) stream->in[i].ssn = 0; - } result = SCTP_STRRESET_PERFORMED; *evp = sctp_ulpevent_make_stream_reset_event(asoc, - flags | SCTP_STREAM_RESET_OUTGOING_SSN, nums, str_p, - GFP_ATOMIC); + SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC); out: sctp_update_strreset_result(asoc, result); @@ -507,9 +503,6 @@ struct sctp_chunk *sctp_process_strreset_inreq( result = SCTP_STRRESET_PERFORMED; - *evp = sctp_ulpevent_make_stream_reset_event(asoc, - SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC); - out: sctp_update_strreset_result(asoc, result); err: @@ -802,10 +795,10 @@ struct sctp_chunk *sctp_process_strreset_resp( for (i = 0; i < stream->outcnt; i++) stream->out[i].ssn = 0; } - - flags = SCTP_STREAM_RESET_OUTGOING_SSN; } + flags |= SCTP_STREAM_RESET_OUTGOING_SSN; + for (i = 0; i < stream->outcnt; i++) stream->out[i].state = SCTP_STREAM_OPEN; @@ -823,6 +816,8 @@ struct sctp_chunk *sctp_process_strreset_resp( str_p = inreq->list_of_streams; nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / 2; + flags |= SCTP_STREAM_RESET_INCOMING_SSN; + *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, nums, str_p, GFP_ATOMIC); } else if (req->type == SCTP_PARAM_RESET_TSN_REQUEST) { From a12098dd9f84ad7d94f2e4a36c2f73bb9eaa985c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 16 Jan 2018 23:01:55 +0100 Subject: [PATCH 1622/2608] l2tp: remove l2specific_len dependency in l2tp_core commit 62e7b6a57c7b9bf3c6fd99418eeec05b08a85c38 upstream. Remove l2specific_len dependency while building l2tpv3 header or parsing the received frame since default L2-Specific Sublayer is always four bytes long and we don't need to rely on a user supplied value. Moreover in l2tp netlink code there are no sanity checks to enforce the relation between l2specific_len and l2specific_type, so sending a malformed netlink message is possible to set l2specific_type to L2TP_L2SPECTYPE_DEFAULT (or even L2TP_L2SPECTYPE_NONE) and set l2specific_len to a value greater than 4 leaking memory on the wire and sending corrupted frames. Reviewed-by: Guillaume Nault Tested-by: Guillaume Nault Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_core.c | 34 ++++++++++++++++------------------ net/l2tp/l2tp_core.h | 11 +++++++++++ 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index c49a2dc33f68..847e252d094d 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -758,11 +758,9 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, "%s: recv data ns=%u, session nr=%u\n", session->name, ns, session->nr); } + ptr += 4; } - /* Advance past L2-specific header, if present */ - ptr += session->l2specific_len; - if (L2TP_SKB_CB(skb)->has_seq) { /* Received a packet with sequence numbers. If we're the LNS, * check if we sre sending sequence numbers and if not, @@ -1084,21 +1082,20 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf) memcpy(bufp, &session->cookie[0], session->cookie_len); bufp += session->cookie_len; } - if (session->l2specific_len) { - if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) { - u32 l2h = 0; - if (session->send_seq) { - l2h = 0x40000000 | session->ns; - session->ns++; - session->ns &= 0xffffff; - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: updated ns to %u\n", - session->name, session->ns); - } + if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) { + u32 l2h = 0; - *((__be32 *) bufp) = htonl(l2h); + if (session->send_seq) { + l2h = 0x40000000 | session->ns; + session->ns++; + session->ns &= 0xffffff; + l2tp_dbg(session, L2TP_MSG_SEQ, + "%s: updated ns to %u\n", + session->name, session->ns); } - bufp += session->l2specific_len; + + *((__be32 *)bufp) = htonl(l2h); + bufp += 4; } return bufp - optr; @@ -1764,7 +1761,7 @@ int l2tp_session_delete(struct l2tp_session *session) EXPORT_SYMBOL_GPL(l2tp_session_delete); /* We come here whenever a session's send_seq, cookie_len or - * l2specific_len parameters are set. + * l2specific_type parameters are set. */ void l2tp_session_set_header_len(struct l2tp_session *session, int version) { @@ -1773,7 +1770,8 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version) if (session->send_seq) session->hdr_len += 4; } else { - session->hdr_len = 4 + session->cookie_len + session->l2specific_len; + session->hdr_len = 4 + session->cookie_len; + session->hdr_len += l2tp_get_l2specific_len(session); if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP) session->hdr_len += 4; } diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 0a58c0754526..c9429d5f9a2e 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -320,6 +320,17 @@ do { \ #define l2tp_session_dec_refcount(s) l2tp_session_dec_refcount_1(s) #endif +static inline int l2tp_get_l2specific_len(struct l2tp_session *session) +{ + switch (session->l2specific_type) { + case L2TP_L2SPECTYPE_DEFAULT: + return 4; + case L2TP_L2SPECTYPE_NONE: + default: + return 0; + } +} + #define l2tp_printk(ptr, type, func, fmt, ...) \ do { \ if (((ptr)->debug) & (type)) \ From 3924ebe54f0c0750436f69cb185e290a16499b9b Mon Sep 17 00:00:00 2001 From: Jacob Wen Date: Wed, 30 Jan 2019 14:55:14 +0800 Subject: [PATCH 1623/2608] l2tp: fix reading optional fields of L2TPv3 [ Upstream commit 4522a70db7aa5e77526a4079628578599821b193 ] Use pskb_may_pull() to make sure the optional fields are in skb linear parts, so we can safely read them later. It's easy to reproduce the issue with a net driver that supports paged skb data. Just create a L2TPv3 over IP tunnel and then generates some network traffic. Once reproduced, rx err in /sys/kernel/debug/l2tp/tunnels will increase. Changes in v4: 1. s/l2tp_v3_pull_opt/l2tp_v3_ensure_opt_in_linear/ 2. s/tunnel->version != L2TP_HDR_VER_2/tunnel->version == L2TP_HDR_VER_3/ 3. Add 'Fixes' in commit messages. Changes in v3: 1. To keep consistency, move the code out of l2tp_recv_common. 2. Use "net" instead of "net-next", since this is a bug fix. Changes in v2: 1. Only fix L2TPv3 to make code simple. To fix both L2TPv3 and L2TPv2, we'd better refactor l2tp_recv_common. It's complicated to do so. 2. Reloading pointers after pskb_may_pull Fixes: f7faffa3ff8e ("l2tp: Add L2TPv3 protocol support") Fixes: 0d76751fad77 ("l2tp: Add L2TPv3 IP encapsulation (no UDP) support") Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6") Signed-off-by: Jacob Wen Acked-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_core.c | 4 ++++ net/l2tp/l2tp_core.h | 20 ++++++++++++++++++++ net/l2tp/l2tp_ip.c | 3 +++ net/l2tp/l2tp_ip6.c | 3 +++ 4 files changed, 30 insertions(+) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 847e252d094d..e494f04819e9 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -983,6 +983,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, goto error; } + if (tunnel->version == L2TP_HDR_VER_3 && + l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto error; + l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook); l2tp_session_dec_refcount(session); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index c9429d5f9a2e..62598ee7b2e7 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -331,6 +331,26 @@ static inline int l2tp_get_l2specific_len(struct l2tp_session *session) } } +static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb, + unsigned char **ptr, unsigned char **optr) +{ + int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session); + + if (opt_len > 0) { + int off = *ptr - *optr; + + if (!pskb_may_pull(skb, off + opt_len)) + return -1; + + if (skb->data != *optr) { + *optr = skb->data; + *ptr = skb->data + off; + } + } + + return 0; +} + #define l2tp_printk(ptr, type, func, fmt, ...) \ do { \ if (((ptr)->debug) & (type)) \ diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index e4280b6568b4..f7880becc165 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -165,6 +165,9 @@ static int l2tp_ip_recv(struct sk_buff *skb) print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } + if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto discard_sess; + l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook); l2tp_session_dec_refcount(session); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 8bcaa975b432..3c77507601c7 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -178,6 +178,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb) print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } + if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto discard_sess; + l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook); l2tp_session_dec_refcount(session); From efa83c74f83b26ebc01461662632a16ee1eae227 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Jan 2019 12:49:48 +0100 Subject: [PATCH 1624/2608] ipvlan, l3mdev: fix broken l3s mode wrt local routes [ Upstream commit d5256083f62e2720f75bb3c5a928a0afe47d6bc3 ] While implementing ipvlan l3 and l3s mode for kubernetes CNI plugin, I ran into the issue that while l3 mode is working fine, l3s mode does not have any connectivity to kube-apiserver and hence all pods end up in Error state as well. The ipvlan master device sits on top of a bond device and hostns traffic to kube-apiserver (also running in hostns) is DNATed from 10.152.183.1:443 to 139.178.29.207:37573 where the latter is the address of the bond0. While in l3 mode, a curl to https://10.152.183.1:443 or to https://139.178.29.207:37573 works fine from hostns, neither of them do in case of l3s. In the latter only a curl to https://127.0.0.1:37573 appeared to work where for local addresses of bond0 I saw kernel suddenly starting to emit ARP requests to query HW address of bond0 which remained unanswered and neighbor entries in INCOMPLETE state. These ARP requests only happen while in l3s. Debugging this further, I found the issue is that l3s mode is piggy- backing on l3 master device, and in this case local routes are using l3mdev_master_dev_rcu(dev) instead of net->loopback_dev as per commit f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant") and 5f02ce24c269 ("net: l3mdev: Allow the l3mdev to be a loopback"). I found that reverting them back into using the net->loopback_dev fixed ipvlan l3s connectivity and got everything working for the CNI. Now judging from 4fbae7d83c98 ("ipvlan: Introduce l3s mode") and the l3mdev paper in [0] the only sole reason why ipvlan l3s is relying on l3 master device is to get the l3mdev_ip_rcv() receive hook for setting the dst entry of the input route without adding its own ipvlan specific hacks into the receive path, however, any l3 domain semantics beyond just that are breaking l3s operation. Note that ipvlan also has the ability to dynamically switch its internal operation from l3 to l3s for all ports via ipvlan_set_port_mode() at runtime. In any case, l3 vs l3s soley distinguishes itself by 'de-confusing' netfilter through switching skb->dev to ipvlan slave device late in NF_INET_LOCAL_IN before handing the skb to L4. Minimal fix taken here is to add a IFF_L3MDEV_RX_HANDLER flag which, if set from ipvlan setup, gets us only the wanted l3mdev_l3_rcv() hook without any additional l3mdev semantics on top. This should also have minimal impact since dev->priv_flags is already hot in cache. With this set, l3s mode is working fine and I also get things like masquerading pod traffic on the ipvlan master properly working. [0] https://netdevconf.org/1.2/papers/ahern-what-is-l3mdev-paper.pdf Fixes: f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant") Fixes: 5f02ce24c269 ("net: l3mdev: Allow the l3mdev to be a loopback") Fixes: 4fbae7d83c98 ("ipvlan: Introduce l3s mode") Signed-off-by: Daniel Borkmann Cc: Mahesh Bandewar Cc: David Ahern Cc: Florian Westphal Cc: Martynas Pumputis Acked-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ipvlan/ipvlan_main.c | 6 +++--- include/linux/netdevice.h | 8 ++++++++ include/net/l3mdev.h | 3 ++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 58133c9f701b..2222ed63d055 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -95,12 +95,12 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) err = ipvlan_register_nf_hook(read_pnet(&port->pnet)); if (!err) { mdev->l3mdev_ops = &ipvl_l3mdev_ops; - mdev->priv_flags |= IFF_L3MDEV_MASTER; + mdev->priv_flags |= IFF_L3MDEV_RX_HANDLER; } else goto fail; } else if (port->mode == IPVLAN_MODE_L3S) { /* Old mode was L3S */ - mdev->priv_flags &= ~IFF_L3MDEV_MASTER; + mdev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER; ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); mdev->l3mdev_ops = NULL; } @@ -172,7 +172,7 @@ static void ipvlan_port_destroy(struct net_device *dev) dev->priv_flags &= ~IFF_IPVLAN_MASTER; if (port->mode == IPVLAN_MODE_L3S) { - dev->priv_flags &= ~IFF_L3MDEV_MASTER; + dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER; ipvlan_unregister_nf_hook(dev_net(dev)); dev->l3mdev_ops = NULL; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a516dbe5869f..40b830d55fe5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1356,6 +1356,7 @@ struct net_device_ops { * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external * entity (i.e. the master device for bridged veth) * @IFF_MACSEC: device is a MACsec device + * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1386,6 +1387,7 @@ enum netdev_priv_flags { IFF_RXFH_CONFIGURED = 1<<25, IFF_PHONY_HEADROOM = 1<<26, IFF_MACSEC = 1<<27, + IFF_L3MDEV_RX_HANDLER = 1<<28, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1415,6 +1417,7 @@ enum netdev_priv_flags { #define IFF_TEAM IFF_TEAM #define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED #define IFF_MACSEC IFF_MACSEC +#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER /** * struct net_device - The DEVICE structure. @@ -4206,6 +4209,11 @@ static inline bool netif_supports_nofcs(struct net_device *dev) return dev->priv_flags & IFF_SUPP_NOFCS; } +static inline bool netif_has_l3_rx_handler(const struct net_device *dev) +{ + return dev->priv_flags & IFF_L3MDEV_RX_HANDLER; +} + static inline bool netif_is_l3_master(const struct net_device *dev) { return dev->priv_flags & IFF_L3MDEV_MASTER; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 3832099289c5..128487658ff7 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -142,7 +142,8 @@ struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) if (netif_is_l3_slave(skb->dev)) master = netdev_master_upper_dev_get_rcu(skb->dev); - else if (netif_is_l3_master(skb->dev)) + else if (netif_is_l3_master(skb->dev) || + netif_has_l3_rx_handler(skb->dev)) master = skb->dev; if (master && master->l3mdev_ops->l3mdev_l3_rcv) From 1e6bb03a0b87addc736cfa78e6b5ed32a0a8baa8 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Sat, 26 Jan 2019 12:21:32 -0800 Subject: [PATCH 1625/2608] CIFS: Do not count -ENODATA as failure for query directory commit 8e6e72aeceaaed5aeeb1cb43d3085de7ceb14f79 upstream. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index c0f8087d9819..fd2d199dd413 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3071,8 +3071,8 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, rsp->hdr.sync_hdr.Status == STATUS_NO_MORE_FILES) { srch_inf->endOfSearch = true; rc = 0; - } - cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); + } else + cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); goto qdir_exit; } From b30b0fdaae3942329788dd82fd1bd3865469c053 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 30 Jan 2019 13:52:36 -0500 Subject: [PATCH 1626/2608] fs/dcache: Fix incorrect nr_dentry_unused accounting in shrink_dcache_sb() commit 1dbd449c9943e3145148cc893c2461b72ba6fef0 upstream. The nr_dentry_unused per-cpu counter tracks dentries in both the LRU lists and the shrink lists where the DCACHE_LRU_LIST bit is set. The shrink_dcache_sb() function moves dentries from the LRU list to a shrink list and subtracts the dentry count from nr_dentry_unused. This is incorrect as the nr_dentry_unused count will also be decremented in shrink_dentry_list() via d_shrink_del(). To fix this double decrement, the decrement in the shrink_dcache_sb() function is taken out. Fixes: 4e717f5c1083 ("list_lru: remove special case function list_lru_dispose_all." Cc: stable@kernel.org Signed-off-by: Waiman Long Reviewed-by: Dave Chinner Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 28b2e770bb69..9ac1290ae44f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1183,15 +1183,11 @@ static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, */ void shrink_dcache_sb(struct super_block *sb) { - long freed; - do { LIST_HEAD(dispose); - freed = list_lru_walk(&sb->s_dentry_lru, + list_lru_walk(&sb->s_dentry_lru, dentry_lru_isolate_shrink, &dispose, 1024); - - this_cpu_sub(nr_dentry_unused, freed); shrink_dentry_list(&dispose); cond_resched(); } while (list_lru_count(&sb->s_dentry_lru) > 0); From b48b0fe5be40d5ed3a6172208812bfa5aa44538f Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 16 Jan 2019 20:11:44 +0100 Subject: [PATCH 1627/2608] iommu/vt-d: Fix memory leak in intel_iommu_put_resv_regions() commit 198bc3252ea3a45b0c5d500e6a5b91cfdd08f001 upstream. Commit 9d3a4de4cb8d ("iommu: Disambiguate MSI region types") changed the reserved region type in intel_iommu_get_resv_regions() from IOMMU_RESV_RESERVED to IOMMU_RESV_MSI, but it forgot to also change the type in intel_iommu_put_resv_regions(). This leads to a memory leak, because now the check in intel_iommu_put_resv_regions() for IOMMU_RESV_RESERVED will never be true, and no allocated regions will be freed. Fix this by changing the region type in intel_iommu_put_resv_regions() to IOMMU_RESV_MSI, matching the type of the allocated regions. Fixes: 9d3a4de4cb8d ("iommu: Disambiguate MSI region types") Cc: # v4.11+ Signed-off-by: Gerald Schaefer Reviewed-by: Eric Auger Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 34006354d2eb..802ba7b16e09 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5210,7 +5210,7 @@ static void intel_iommu_put_resv_regions(struct device *dev, struct iommu_resv_region *entry, *next; list_for_each_entry_safe(entry, next, head, list) { - if (entry->type == IOMMU_RESV_RESERVED) + if (entry->type == IOMMU_RESV_MSI) kfree(entry); } } From 2b1ee7daf46dff7a7c86cda4b41c892ec2516365 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 25 Jan 2019 10:33:59 -0800 Subject: [PATCH 1628/2608] selftests/seccomp: Enhance per-arch ptrace syscall skip tests commit ed5f13261cb65b02c611ae9971677f33581d4286 upstream. Passing EPERM during syscall skipping was confusing since the test wasn't actually exercising the errno evaluation -- it was just passing a literal "1" (EPERM). Instead, expand the tests to check both direct value returns (positive, 45000 in this case), and errno values (negative, -ESRCH in this case) to check both fake success and fake failure during syscall skipping. Reported-by: Colin Ian King Fixes: a33b2d0359a0 ("selftests/seccomp: Add tests for basic ptrace actions") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/seccomp/seccomp_bpf.c | 72 +++++++++++++++---- 1 file changed, 57 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 194759ec9e70..ba15baa2061b 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1554,7 +1554,16 @@ TEST_F(TRACE_poke, getpid_runs_normally) #ifdef SYSCALL_NUM_RET_SHARE_REG # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) #else -# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(val, action) +# define EXPECT_SYSCALL_RETURN(val, action) \ + do { \ + errno = 0; \ + if (val < 0) { \ + EXPECT_EQ(-1, action); \ + EXPECT_EQ(-(val), errno); \ + } else { \ + EXPECT_EQ(val, action); \ + } \ + } while (0) #endif /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for @@ -1593,7 +1602,7 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) /* Architecture-specific syscall changing routine. */ void change_syscall(struct __test_metadata *_metadata, - pid_t tracee, int syscall) + pid_t tracee, int syscall, int result) { int ret; ARCH_REGS regs; @@ -1652,7 +1661,7 @@ void change_syscall(struct __test_metadata *_metadata, #ifdef SYSCALL_NUM_RET_SHARE_REG TH_LOG("Can't modify syscall return on this architecture"); #else - regs.SYSCALL_RET = EPERM; + regs.SYSCALL_RET = result; #endif #ifdef HAVE_GETREGS @@ -1680,14 +1689,19 @@ void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, case 0x1002: /* change getpid to getppid. */ EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, __NR_getppid); + change_syscall(_metadata, tracee, __NR_getppid, 0); break; case 0x1003: - /* skip gettid. */ + /* skip gettid with valid return code. */ EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, -1); + change_syscall(_metadata, tracee, -1, 45000); break; case 0x1004: + /* skip openat with error. */ + EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); + change_syscall(_metadata, tracee, -1, -ESRCH); + break; + case 0x1005: /* do nothing (allow getppid) */ EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); break; @@ -1720,9 +1734,11 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, nr = get_syscall(_metadata, tracee); if (nr == __NR_getpid) - change_syscall(_metadata, tracee, __NR_getppid); + change_syscall(_metadata, tracee, __NR_getppid, 0); + if (nr == __NR_gettid) + change_syscall(_metadata, tracee, -1, 45000); if (nr == __NR_openat) - change_syscall(_metadata, tracee, -1); + change_syscall(_metadata, tracee, -1, -ESRCH); } FIXTURE_DATA(TRACE_syscall) { @@ -1739,8 +1755,10 @@ FIXTURE_SETUP(TRACE_syscall) BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; @@ -1788,15 +1806,26 @@ TEST_F(TRACE_syscall, ptrace_syscall_redirected) EXPECT_NE(self->mypid, syscall(__NR_getpid)); } -TEST_F(TRACE_syscall, ptrace_syscall_dropped) +TEST_F(TRACE_syscall, ptrace_syscall_errno) { /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ teardown_trace_fixture(_metadata, self->tracer); self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, true); - /* Tracer should skip the open syscall, resulting in EPERM. */ - EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_openat)); + /* Tracer should skip the open syscall, resulting in ESRCH. */ + EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); +} + +TEST_F(TRACE_syscall, ptrace_syscall_faked) +{ + /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ + teardown_trace_fixture(_metadata, self->tracer); + self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, + true); + + /* Tracer should skip the gettid syscall, resulting fake pid. */ + EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); } TEST_F(TRACE_syscall, syscall_allowed) @@ -1829,7 +1858,21 @@ TEST_F(TRACE_syscall, syscall_redirected) EXPECT_NE(self->mypid, syscall(__NR_getpid)); } -TEST_F(TRACE_syscall, syscall_dropped) +TEST_F(TRACE_syscall, syscall_errno) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* openat has been skipped and an errno return. */ + EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); +} + +TEST_F(TRACE_syscall, syscall_faked) { long ret; @@ -1840,8 +1883,7 @@ TEST_F(TRACE_syscall, syscall_dropped) ASSERT_EQ(0, ret); /* gettid has been skipped and an altered return value stored. */ - EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid)); - EXPECT_NE(self->mytid, syscall(__NR_gettid)); + EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); } TEST_F(TRACE_syscall, skip_after_RET_TRACE) From f3f6e284de989d5f1b831bdd09835957dc12eaad Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jan 2019 15:52:55 -0500 Subject: [PATCH 1629/2608] NFS: Fix up return value on fatal errors in nfs_page_async_flush() commit 8fc75bed96bb94e23ca51bd9be4daf65c57697bf upstream. Ensure that we return the fatal error value that caused us to exit nfs_page_async_flush(). Fixes: c373fff7bd25 ("NFSv4: Don't special case "launder"") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.12+ Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/write.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 19e6ea89ad26..2d956a7d5378 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -618,11 +618,12 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, nfs_set_page_writeback(page); WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags)); - ret = 0; + ret = req->wb_context->error; /* If there is a fatal error that covers this write, just exit */ - if (nfs_error_is_fatal_on_server(req->wb_context->error)) + if (nfs_error_is_fatal_on_server(ret)) goto out_launder; + ret = 0; if (!nfs_pageio_add_request(pgio, req)) { ret = pgio->pg_error; /* @@ -632,9 +633,9 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, nfs_context_set_write_error(req->wb_context, ret); if (nfs_error_is_fatal_on_server(ret)) goto out_launder; - } + } else + ret = -EAGAIN; nfs_redirty_request(req); - ret = -EAGAIN; } else nfs_add_stats(page_file_mapping(page)->host, NFSIOS_WRITEPAGES, 1); From 17aa9d04ba62be77b4aa2be7fa86e1b1dded4cad Mon Sep 17 00:00:00 2001 From: Koen Vandeputte Date: Thu, 31 Jan 2019 15:00:01 -0600 Subject: [PATCH 1630/2608] ARM: cns3xxx: Fix writing to wrong PCI config registers after alignment commit 65dbb423cf28232fed1732b779249d6164c5999b upstream. Originally, cns3xxx used its own functions for mapping, reading and writing config registers. Commit 802b7c06adc7 ("ARM: cns3xxx: Convert PCI to use generic config accessors") removed the internal PCI config write function in favor of the generic one: cns3xxx_pci_write_config() --> pci_generic_config_write() cns3xxx_pci_write_config() expected aligned addresses, being produced by cns3xxx_pci_map_bus() while the generic one pci_generic_config_write() actually expects the real address as both the function and hardware are capable of byte-aligned writes. This currently leads to pci_generic_config_write() writing to the wrong registers. For instance, upon ath9k module loading: - driver ath9k gets loaded - The driver wants to write value 0xA8 to register PCI_LATENCY_TIMER, located at 0x0D - cns3xxx_pci_map_bus() aligns the address to 0x0C - pci_generic_config_write() effectively writes 0xA8 into register 0x0C (CACHE_LINE_SIZE) Fix the bug by removing the alignment in the cns3xxx mapping function. Fixes: 802b7c06adc7 ("ARM: cns3xxx: Convert PCI to use generic config accessors") Signed-off-by: Koen Vandeputte [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi Acked-by: Krzysztof Halasa Acked-by: Tim Harvey Acked-by: Arnd Bergmann CC: stable@vger.kernel.org # v4.0+ CC: Bjorn Helgaas CC: Olof Johansson CC: Robin Leblon CC: Rob Herring CC: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-cns3xxx/pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-cns3xxx/pcie.c b/arch/arm/mach-cns3xxx/pcie.c index 318394ed5c7a..5e11ad3164e0 100644 --- a/arch/arm/mach-cns3xxx/pcie.c +++ b/arch/arm/mach-cns3xxx/pcie.c @@ -83,7 +83,7 @@ static void __iomem *cns3xxx_pci_map_bus(struct pci_bus *bus, } else /* remote PCI bus */ base = cnspci->cfg1_regs + ((busno & 0xf) << 20); - return base + (where & 0xffc) + (devfn << 12); + return base + where + (devfn << 12); } static int cns3xxx_pci_read_config(struct pci_bus *bus, unsigned int devfn, From ab4ece4e9b1302bfb8453aa4fe4b2a3388bf476f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 27 Jan 2019 09:29:42 +0100 Subject: [PATCH 1631/2608] arm64: kaslr: ensure randomized quantities are clean also when kaslr is off commit 8ea235932314311f15ea6cf65c1393ed7e31af70 upstream. Commit 1598ecda7b23 ("arm64: kaslr: ensure randomized quantities are clean to the PoC") added cache maintenance to ensure that global variables set by the kaslr init routine are not wiped clean due to cache invalidation occurring during the second round of page table creation. However, if kaslr_early_init() exits early with no randomization being applied (either due to the lack of a seed, or because the user has disabled kaslr explicitly), no cache maintenance is performed, leading to the same issue we attempted to fix earlier, as far as the module_alloc_base variable is concerned. Note that module_alloc_base cannot be initialized statically, because that would cause it to be subject to a R_AARCH64_RELATIVE relocation, causing it to be overwritten by the second round of KASLR relocation processing. Fixes: f80fb3a3d508 ("arm64: add support for kernel ASLR") Cc: # v4.6+ Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/kaslr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 2bda224e8e71..ae7278286094 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -88,6 +88,7 @@ u64 __init kaslr_early_init(u64 dt_phys) * we end up running with module randomization disabled. */ module_alloc_base = (u64)_etext - MODULES_VSIZE; + __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); /* * Try to map the FDT early. If this fails, we simply bail, From 6c100d927f3fde2a8ee3eed76316ffea83473149 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 24 Jan 2019 16:32:56 +0000 Subject: [PATCH 1632/2608] arm64: hyp-stub: Forbid kprobing of the hyp-stub commit 8fac5cbdfe0f01254d9d265c6aa1a95f94f58595 upstream. The hyp-stub is loaded by the kernel's early startup code at EL2 during boot, before KVM takes ownership later. The hyp-stub's text is part of the regular kernel text, meaning it can be kprobed. A breakpoint in the hyp-stub causes the CPU to spin in el2_sync_invalid. Add it to the __hyp_text. Signed-off-by: James Morse Cc: stable@vger.kernel.org Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/hyp-stub.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index e1261fbaa374..17f325ba831e 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -28,6 +28,8 @@ #include .text + .pushsection .hyp.text, "ax" + .align 11 ENTRY(__hyp_stub_vectors) From 8703298b9d803b064ed1e1d50338c7a37f73bef9 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 24 Jan 2019 16:32:57 +0000 Subject: [PATCH 1633/2608] arm64: hibernate: Clean the __hyp_text to PoC after resume commit f7daa9c8fd191724b9ab9580a7be55cd1a67d799 upstream. During resume hibernate restores all physical memory. Any memory that is accessed with the MMU disabled needs to be cleaned to the PoC. KVMs __hyp_text was previously ommitted as it runs with the MMU enabled, but now that the hyp-stub is located in this section, we must clean __hyp_text too. This ensures secondary CPUs that come online after hibernate has finished resuming, and load KVM via the freshly written hyp-stub see the correct instructions. Signed-off-by: James Morse Cc: stable@vger.kernel.org Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/hibernate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index a028cc95afe1..bb444c693796 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -299,8 +299,10 @@ int swsusp_arch_suspend(void) dcache_clean_range(__idmap_text_start, __idmap_text_end); /* Clean kvm setup code to PoC? */ - if (el2_reset_needed()) + if (el2_reset_needed()) { dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end); + dcache_clean_range(__hyp_text_start, __hyp_text_end); + } /* make the crash dump kernel image protected again */ crash_post_resume(); From 0e68ff516346d37496ed4b82efe470db79d5135e Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 23 Jan 2019 08:00:57 +0800 Subject: [PATCH 1634/2608] gpio: altera-a10sr: Set proper output level for direction_output commit 2095a45e345e669ea77a9b34bdd7de5ceb422f93 upstream. The altr_a10sr_gpio_direction_output should set proper output level based on the value argument. Fixes: 26a48c4cc2f1 ("gpio: altera-a10sr: Add A10 System Resource Chip GPIO support.") Cc: Signed-off-by: Axel Lin Tested by: Thor Thayer Reviewed by: Thor Thayer Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-altera-a10sr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-altera-a10sr.c b/drivers/gpio/gpio-altera-a10sr.c index 6b11f1314248..7f9e0304b510 100644 --- a/drivers/gpio/gpio-altera-a10sr.c +++ b/drivers/gpio/gpio-altera-a10sr.c @@ -66,8 +66,10 @@ static int altr_a10sr_gpio_direction_input(struct gpio_chip *gc, static int altr_a10sr_gpio_direction_output(struct gpio_chip *gc, unsigned int nr, int value) { - if (nr <= (ALTR_A10SR_OUT_VALID_RANGE_HI - ALTR_A10SR_LED_VALID_SHIFT)) + if (nr <= (ALTR_A10SR_OUT_VALID_RANGE_HI - ALTR_A10SR_LED_VALID_SHIFT)) { + altr_a10sr_gpio_set(gc, nr, value); return 0; + } return -EINVAL; } From 25f13440ddbcc27c5bcad54382be9b1ac7ea19f6 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 9 Jan 2019 11:11:24 +0200 Subject: [PATCH 1635/2608] gpio: pcf857x: Fix interrupts on multiple instances commit 2486e67374aa8b7854c2de32869642c2873b3d53 upstream. When multiple instances of pcf857x chips are present, a fix up message [1] is printed during the probe of the 2nd and later instances. The issue is that the driver is using the same irq_chip data structure between multiple instances. Fix this by allocating the irq_chip data structure per instance. [1] fix up message addressed by this patch [ 1.212100] gpio gpiochip9: (pcf8575): detected irqchip that is shared with multiple gpiochips: please fix the driver. Cc: Stable Signed-off-by: Roger Quadros Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-pcf857x.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c index a4fd78b9c0e4..e94c34920241 100644 --- a/drivers/gpio/gpio-pcf857x.c +++ b/drivers/gpio/gpio-pcf857x.c @@ -84,6 +84,7 @@ MODULE_DEVICE_TABLE(of, pcf857x_of_table); */ struct pcf857x { struct gpio_chip chip; + struct irq_chip irqchip; struct i2c_client *client; struct mutex lock; /* protect 'out' */ unsigned out; /* software latch */ @@ -252,18 +253,6 @@ static void pcf857x_irq_bus_sync_unlock(struct irq_data *data) mutex_unlock(&gpio->lock); } -static struct irq_chip pcf857x_irq_chip = { - .name = "pcf857x", - .irq_enable = pcf857x_irq_enable, - .irq_disable = pcf857x_irq_disable, - .irq_ack = noop, - .irq_mask = noop, - .irq_unmask = noop, - .irq_set_wake = pcf857x_irq_set_wake, - .irq_bus_lock = pcf857x_irq_bus_lock, - .irq_bus_sync_unlock = pcf857x_irq_bus_sync_unlock, -}; - /*-------------------------------------------------------------------------*/ static int pcf857x_probe(struct i2c_client *client, @@ -376,8 +365,17 @@ static int pcf857x_probe(struct i2c_client *client, /* Enable irqchip if we have an interrupt */ if (client->irq) { + gpio->irqchip.name = "pcf857x", + gpio->irqchip.irq_enable = pcf857x_irq_enable, + gpio->irqchip.irq_disable = pcf857x_irq_disable, + gpio->irqchip.irq_ack = noop, + gpio->irqchip.irq_mask = noop, + gpio->irqchip.irq_unmask = noop, + gpio->irqchip.irq_set_wake = pcf857x_irq_set_wake, + gpio->irqchip.irq_bus_lock = pcf857x_irq_bus_lock, + gpio->irqchip.irq_bus_sync_unlock = pcf857x_irq_bus_sync_unlock, status = gpiochip_irqchip_add_nested(&gpio->chip, - &pcf857x_irq_chip, + &gpio->irqchip, 0, handle_level_irq, IRQ_TYPE_NONE); if (status) { @@ -392,7 +390,7 @@ static int pcf857x_probe(struct i2c_client *client, if (status) goto fail; - gpiochip_set_nested_irqchip(&gpio->chip, &pcf857x_irq_chip, + gpiochip_set_nested_irqchip(&gpio->chip, &gpio->irqchip, client->irq); gpio->irq_parent = client->irq; } From 5cb3a9220995cfb6234e664a5fa0f001e5ad2256 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 30 Jan 2019 21:30:36 +0100 Subject: [PATCH 1636/2608] gfs2: Revert "Fix loop in gfs2_rbm_find" commit e74c98ca2d6ae4376cc15fa2a22483430909d96b upstream. This reverts commit 2d29f6b96d8f80322ed2dd895bca590491c38d34. It turns out that the fix can lead to a ~20 percent performance regression in initial writes to the page cache according to iozone. Let's revert this for now to have more time for a proper fix. Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 914cb3d72ddf..b0eee90738ff 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1695,9 +1695,9 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, goto next_iter; } if (ret == -E2BIG) { - n += rbm->bii - initial_bii; rbm->bii = 0; rbm->offset = 0; + n += (rbm->bii - initial_bii); goto res_covered_end_of_rgrp; } return ret; From 6af069a10b9513b338582e310802225da145f7d8 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 19 Jan 2019 16:31:00 +0100 Subject: [PATCH 1637/2608] mmc: bcm2835: Fix DMA channel leak on probe error commit 8c9620b1cc9b69e82fa8d4081d646d0016b602e7 upstream. The BCM2835 MMC host driver requests a DMA channel on probe but neglects to release the channel in the probe error path. The channel may therefore be leaked, in particular if devm_clk_get() causes probe deferral. Fix it. Fixes: 660fc733bd74 ("mmc: bcm2835: Add new driver for the sdhost controller.") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v4.12+ Cc: Frank Pavlic Tested-by: Stefan Wahren Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/bcm2835.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c index 768972af8b85..0d3b7473bc21 100644 --- a/drivers/mmc/host/bcm2835.c +++ b/drivers/mmc/host/bcm2835.c @@ -1427,6 +1427,8 @@ static int bcm2835_probe(struct platform_device *pdev) err: dev_dbg(dev, "%s -> err %d\n", __func__, ret); + if (host->dma_chan_rxtx) + dma_release_channel(host->dma_chan_rxtx); mmc_free_host(mmc); return ret; From a93404876d977150b37854b7c49253d9bcf6e726 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 29 Jan 2019 15:38:21 +0800 Subject: [PATCH 1638/2608] ALSA: hda/realtek - Fixed hp_pin no value commit 693abe11aa6b27aed6eb8222162f8fb986325cef upstream. Fix hp_pin always no value. [More notes on the changes: The hp_pin value that is referred in alc294_hp_init() is always zero at the moment the function gets called, hence this is actually useless as in the current code. And, this kind of init sequence should be called from the codec init callback, instead of the parser function. So, the first fix in this patch to move the call call into its own init_hook. OTOH, this function is needed to be called only once after the boot, and it'd take too long for invoking at each resume (where the init callback gets called). So we add a new flag and invoke this only once as an additional fix. The one case is still not covered, though: S4 resume. But this change itself won't lead to any regression in that regard, so we leave S4 issue as is for now and fix it later. -- tiwai ] Fixes: bde1a7459623 ("ALSA: hda/realtek - Fixed headphone issue for ALC700") Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 78 ++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1191d8925c44..972fd95f08ca 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -118,6 +118,7 @@ struct alc_spec { int codec_variant; /* flag for other variants */ unsigned int has_alc5505_dsp:1; unsigned int no_depop_delay:1; + unsigned int done_hp_init:1; /* for PLL fix */ hda_nid_t pll_nid; @@ -3213,6 +3214,48 @@ static void alc_default_shutup(struct hda_codec *codec) snd_hda_shutup_pins(codec); } +static void alc294_hp_init(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + int i, val; + + if (!hp_pin) + return; + + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + + msleep(100); + + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + + alc_update_coef_idx(codec, 0x6f, 0x000f, 0);/* Set HP depop to manual mode */ + alc_update_coefex_idx(codec, 0x58, 0x00, 0x8000, 0x8000); /* HP depop procedure start */ + + /* Wait for depop procedure finish */ + val = alc_read_coefex_idx(codec, 0x58, 0x01); + for (i = 0; i < 20 && val & 0x0080; i++) { + msleep(50); + val = alc_read_coefex_idx(codec, 0x58, 0x01); + } + /* Set HP depop to auto mode */ + alc_update_coef_idx(codec, 0x6f, 0x000f, 0x000b); + msleep(50); +} + +static void alc294_init(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + if (!spec->done_hp_init) { + alc294_hp_init(codec); + spec->done_hp_init = true; + } + alc_default_init(codec); +} + static void alc5505_coef_set(struct hda_codec *codec, unsigned int index_reg, unsigned int val) { @@ -6981,37 +7024,6 @@ static void alc269_fill_coef(struct hda_codec *codec) alc_update_coef_idx(codec, 0x4, 0, 1<<11); } -static void alc294_hp_init(struct hda_codec *codec) -{ - struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; - int i, val; - - if (!hp_pin) - return; - - snd_hda_codec_write(codec, hp_pin, 0, - AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); - - msleep(100); - - snd_hda_codec_write(codec, hp_pin, 0, - AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); - - alc_update_coef_idx(codec, 0x6f, 0x000f, 0);/* Set HP depop to manual mode */ - alc_update_coefex_idx(codec, 0x58, 0x00, 0x8000, 0x8000); /* HP depop procedure start */ - - /* Wait for depop procedure finish */ - val = alc_read_coefex_idx(codec, 0x58, 0x01); - for (i = 0; i < 20 && val & 0x0080; i++) { - msleep(50); - val = alc_read_coefex_idx(codec, 0x58, 0x01); - } - /* Set HP depop to auto mode */ - alc_update_coef_idx(codec, 0x6f, 0x000f, 0x000b); - msleep(50); -} - /* */ static int patch_alc269(struct hda_codec *codec) @@ -7148,7 +7160,7 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC294; spec->gen.mixer_nid = 0; /* ALC2x4 does not have any loopback mixer path */ alc_update_coef_idx(codec, 0x6b, 0x0018, (1<<4) | (1<<3)); /* UAJ MIC Vref control by verb */ - alc294_hp_init(codec); + spec->init_hook = alc294_init; break; case 0x10ec0300: spec->codec_variant = ALC269_TYPE_ALC300; @@ -7160,7 +7172,7 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC700; spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */ alc_update_coef_idx(codec, 0x4a, 1 << 15, 0); /* Combo jack auto trigger control */ - alc294_hp_init(codec); + spec->init_hook = alc294_init; break; } From 31ba885621f23a53c8651665ae04e716c0a30694 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 17 Jan 2019 12:42:04 -0800 Subject: [PATCH 1639/2608] IB/hfi1: Remove overly conservative VM_EXEC flag check commit 7709b0dc265f28695487712c45f02bbd1f98415d upstream. Applications that use the stack for execution purposes cause userspace PSM jobs to fail during mmap(). Both Fortran (non-standard format parsing) and C (callback functions located in the stack) applications can be written such that stack execution is required. The linker notes this via the gnu_stack ELF flag. This causes READ_IMPLIES_EXEC to be set which forces all PROT_READ mmaps to have PROT_EXEC for the process. Checking for VM_EXEC bit and failing the request with EPERM is overly conservative and will break any PSM application using executable stacks. Cc: #v4.14+ Fixes: 12220267645c ("IB/hfi: Protect against writable mmap") Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 9abc5a9c47a0..76861a8b5c1e 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -605,7 +605,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) vmf = 1; break; case STATUS: - if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) { + if (flags & VM_WRITE) { ret = -EPERM; goto done; } From 420c41ca488ba068c31c40537b4938f409277986 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= Date: Wed, 31 Oct 2018 17:21:27 -0700 Subject: [PATCH 1640/2608] platform/x86: asus-nb-wmi: Map 0x35 to KEY_SCREENLOCK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b3f2f3799a972d3863d0fdc2ab6287aef6ca631f ] When the OS registers to handle events from the display off hotkey the EC will send a notification with 0x35 for every key press, independent of the backlight state. The behavior of this key on Windows, with the ATKACPI driver from Asus installed, is turning off the backlight of all connected displays with a fading effect, and any cursor input or key press turning the backlight back on. The key press or cursor input that wakes up the display is also passed through to the application under the cursor or under focus. The key that matches this behavior the closest is KEY_SCREENLOCK. Signed-off-by: João Paulo Rechi Vita Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/platform/x86/asus-nb-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index a6a33327f5e7..2a697bf572c9 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -435,6 +435,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0x32, { KEY_MUTE } }, { KE_KEY, 0x33, { KEY_DISPLAYTOGGLE } }, /* LCD on */ { KE_KEY, 0x34, { KEY_DISPLAY_OFF } }, /* LCD off */ + { KE_KEY, 0x35, { KEY_SCREENLOCK } }, { KE_KEY, 0x40, { KEY_PREVIOUSSONG } }, { KE_KEY, 0x41, { KEY_NEXTSONG } }, { KE_KEY, 0x43, { KEY_STOPCD } }, /* Stop/Eject */ From 3b3dba9ba8b2717229489ba45943b46ad75da003 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= Date: Wed, 31 Oct 2018 17:21:28 -0700 Subject: [PATCH 1641/2608] platform/x86: asus-nb-wmi: Drop mapping of 0x33 and 0x34 scan codes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 71b12beaf12f21a53bfe100795d0797f1035b570 ] According to Asus firmware engineers, the meaning of these codes is only to notify the OS that the screen brightness has been turned on/off by the EC. This does not match the meaning of KEY_DISPLAYTOGGLE / KEY_DISPLAY_OFF, where userspace is expected to change the display brightness. Signed-off-by: João Paulo Rechi Vita Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/platform/x86/asus-nb-wmi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 2a697bf572c9..9c4b0d7f15c3 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -433,8 +433,6 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0x30, { KEY_VOLUMEUP } }, { KE_KEY, 0x31, { KEY_VOLUMEDOWN } }, { KE_KEY, 0x32, { KEY_MUTE } }, - { KE_KEY, 0x33, { KEY_DISPLAYTOGGLE } }, /* LCD on */ - { KE_KEY, 0x34, { KEY_DISPLAY_OFF } }, /* LCD off */ { KE_KEY, 0x35, { KEY_SCREENLOCK } }, { KE_KEY, 0x40, { KEY_PREVIOUSSONG } }, { KE_KEY, 0x41, { KEY_NEXTSONG } }, From 28f4952ab7ff4f9494eae1fa4ee72a98ecd384b2 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 23 Dec 2018 21:59:17 +0100 Subject: [PATCH 1642/2608] mmc: sdhci-iproc: handle mmc_of_parse() errors during probe commit 2bd44dadd5bfb4135162322fd0b45a174d4ad5bf upstream. We need to handle mmc_of_parse() errors during probe. This finally fixes the wifi regression on Raspberry Pi 3 series. In error case the wifi chip was permanently in reset because of the power sequence depending on the deferred probe of the GPIO expander. Fixes: b580c52d58d9 ("mmc: sdhci-iproc: add IPROC SDHCI driver") Cc: stable@vger.kernel.org Signed-off-by: Stefan Wahren Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-iproc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c index 0cfbdb3ab68a..cecffcbd3ca8 100644 --- a/drivers/mmc/host/sdhci-iproc.c +++ b/drivers/mmc/host/sdhci-iproc.c @@ -278,7 +278,10 @@ static int sdhci_iproc_probe(struct platform_device *pdev) iproc_host->data = iproc_data; - mmc_of_parse(host->mmc); + ret = mmc_of_parse(host->mmc); + if (ret) + goto err; + sdhci_get_of_property(pdev); host->mmc->caps |= iproc_host->data->mmc_caps; From 3d98fc4d190f4e02f71ac4a52413fee028acaa8c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 1 Feb 2019 14:20:24 -0800 Subject: [PATCH 1643/2608] kernel/exit.c: release ptraced tasks before zap_pid_ns_processes commit 8fb335e078378c8426fabeed1ebee1fbf915690c upstream. Currently, exit_ptrace() adds all ptraced tasks in a dead list, then zap_pid_ns_processes() waits on all tasks in a current pidns, and only then are tasks from the dead list released. zap_pid_ns_processes() can get stuck on waiting tasks from the dead list. In this case, we will have one unkillable process with one or more dead children. Thanks to Oleg for the advice to release tasks in find_child_reaper(). Link: http://lkml.kernel.org/r/20190110175200.12442-1-avagin@gmail.com Fixes: 7c8bd2322c7f ("exit: ptrace: shift "reap dead" code from exit_ptrace() to forget_original_parent()") Signed-off-by: Andrei Vagin Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/exit.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index e3a08761eb40..3aa01b74c1e3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -557,12 +557,14 @@ static struct task_struct *find_alive_thread(struct task_struct *p) return NULL; } -static struct task_struct *find_child_reaper(struct task_struct *father) +static struct task_struct *find_child_reaper(struct task_struct *father, + struct list_head *dead) __releases(&tasklist_lock) __acquires(&tasklist_lock) { struct pid_namespace *pid_ns = task_active_pid_ns(father); struct task_struct *reaper = pid_ns->child_reaper; + struct task_struct *p, *n; if (likely(reaper != father)) return reaper; @@ -578,6 +580,12 @@ static struct task_struct *find_child_reaper(struct task_struct *father) panic("Attempted to kill init! exitcode=0x%08x\n", father->signal->group_exit_code ?: father->exit_code); } + + list_for_each_entry_safe(p, n, dead, ptrace_entry) { + list_del_init(&p->ptrace_entry); + release_task(p); + } + zap_pid_ns_processes(pid_ns); write_lock_irq(&tasklist_lock); @@ -667,7 +675,7 @@ static void forget_original_parent(struct task_struct *father, exit_ptrace(father, dead); /* Can drop and reacquire tasklist_lock */ - reaper = find_child_reaper(father); + reaper = find_child_reaper(father, dead); if (list_empty(&father->children)) return; From 731785481a5af2475e0f0b2d6935718800653325 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 1 Feb 2019 14:20:31 -0800 Subject: [PATCH 1644/2608] oom, oom_reaper: do not enqueue same task twice commit 9bcdeb51bd7d2ae9fe65ea4d60643d2aeef5bfe3 upstream. Arkadiusz reported that enabling memcg's group oom killing causes strange memcg statistics where there is no task in a memcg despite the number of tasks in that memcg is not 0. It turned out that there is a bug in wake_oom_reaper() which allows enqueuing same task twice which makes impossible to decrease the number of tasks in that memcg due to a refcount leak. This bug existed since the OOM reaper became invokable from task_will_free_mem(current) path in out_of_memory() in Linux 4.7, T1@P1 |T2@P1 |T3@P1 |OOM reaper ----------+----------+----------+------------ # Processing an OOM victim in a different memcg domain. try_charge() mem_cgroup_out_of_memory() mutex_lock(&oom_lock) try_charge() mem_cgroup_out_of_memory() mutex_lock(&oom_lock) try_charge() mem_cgroup_out_of_memory() mutex_lock(&oom_lock) out_of_memory() oom_kill_process(P1) do_send_sig_info(SIGKILL, @P1) mark_oom_victim(T1@P1) wake_oom_reaper(T1@P1) # T1@P1 is enqueued. mutex_unlock(&oom_lock) out_of_memory() mark_oom_victim(T2@P1) wake_oom_reaper(T2@P1) # T2@P1 is enqueued. mutex_unlock(&oom_lock) out_of_memory() mark_oom_victim(T1@P1) wake_oom_reaper(T1@P1) # T1@P1 is enqueued again due to oom_reaper_list == T2@P1 && T1@P1->oom_reaper_list == NULL. mutex_unlock(&oom_lock) # Completed processing an OOM victim in a different memcg domain. spin_lock(&oom_reaper_lock) # T1P1 is dequeued. spin_unlock(&oom_reaper_lock) but memcg's group oom killing made it easier to trigger this bug by calling wake_oom_reaper() on the same task from one out_of_memory() request. Fix this bug using an approach used by commit 855b018325737f76 ("oom, oom_reaper: disable oom_reaper for oom_kill_allocating_task"). As a side effect of this patch, this patch also avoids enqueuing multiple threads sharing memory via task_will_free_mem(current) path. Link: http://lkml.kernel.org/r/e865a044-2c10-9858-f4ef-254bc71d6cc2@i-love.sakura.ne.jp Link: http://lkml.kernel.org/r/5ee34fc6-1485-34f8-8790-903ddabaa809@i-love.sakura.ne.jp Fixes: af8e15cc85a25315 ("oom, oom_reaper: do not enqueue task if it is on the oom_reaper_list head") Signed-off-by: Tetsuo Handa Reported-by: Arkadiusz Miskiewicz Tested-by: Arkadiusz Miskiewicz Acked-by: Michal Hocko Acked-by: Roman Gushchin Cc: Tejun Heo Cc: Aleksa Sarai Cc: Jay Kamat Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/sched/coredump.h | 1 + mm/oom_kill.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index ec912d01126f..ecdc6542070f 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -71,6 +71,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ +#define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 58977f634ced..cd15e46b8b80 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -628,8 +628,8 @@ static void wake_oom_reaper(struct task_struct *tsk) if (!oom_reaper_th) return; - /* tsk is already queued? */ - if (tsk == oom_reaper_list || tsk->oom_reaper_list) + /* mm is already queued? */ + if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) return; get_task_struct(tsk); From 43f7e8bea6ea0b5d340badf4f608642c670e696e Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Fri, 1 Feb 2019 14:20:54 -0800 Subject: [PATCH 1645/2608] mm, oom: fix use-after-free in oom_kill_process commit cefc7ef3c87d02fc9307835868ff721ea12cc597 upstream. Syzbot instance running on upstream kernel found a use-after-free bug in oom_kill_process. On further inspection it seems like the process selected to be oom-killed has exited even before reaching read_lock(&tasklist_lock) in oom_kill_process(). More specifically the tsk->usage is 1 which is due to get_task_struct() in oom_evaluate_task() and the put_task_struct within for_each_thread() frees the tsk and for_each_thread() tries to access the tsk. The easiest fix is to do get/put across the for_each_thread() on the selected task. Now the next question is should we continue with the oom-kill as the previously selected task has exited? However before adding more complexity and heuristics, let's answer why we even look at the children of oom-kill selected task? The select_bad_process() has already selected the worst process in the system/memcg. Due to race, the selected process might not be the worst at the kill time but does that matter? The userspace can use the oom_score_adj interface to prefer children to be killed before the parent. I looked at the history but it seems like this is there before git history. Link: http://lkml.kernel.org/r/20190121215850.221745-1-shakeelb@google.com Reported-by: syzbot+7fbbfa368521945f0e3d@syzkaller.appspotmail.com Fixes: 6b0c81b3be11 ("mm, oom: reduce dependency on tasklist_lock") Signed-off-by: Shakeel Butt Reviewed-by: Roman Gushchin Acked-by: Michal Hocko Cc: David Rientjes Cc: Johannes Weiner Cc: Tetsuo Handa Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/oom_kill.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index cd15e46b8b80..fe0aac2348e5 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -870,6 +870,13 @@ static void oom_kill_process(struct oom_control *oc, const char *message) * still freeing memory. */ read_lock(&tasklist_lock); + + /* + * The task 'p' might have already exited before reaching here. The + * put_task_struct() will free task_struct 'p' while the loop still try + * to access the field of 'p', so, get an extra reference. + */ + get_task_struct(p); for_each_thread(p, t) { list_for_each_entry(child, &t->children, sibling) { unsigned int child_points; @@ -889,6 +896,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message) } } } + put_task_struct(p); read_unlock(&tasklist_lock); p = find_lock_task_mm(victim); From 0783205e1fb651f405e0421a7972011b05cc3460 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 1 Feb 2019 14:21:08 -0800 Subject: [PATCH 1646/2608] mm: hwpoison: use do_send_sig_info() instead of force_sig() commit 6376360ecbe525a9c17b3d081dfd88ba3e4ed65b upstream. Currently memory_failure() is racy against process's exiting, which results in kernel crash by null pointer dereference. The root cause is that memory_failure() uses force_sig() to forcibly kill asynchronous (meaning not in the current context) processes. As discussed in thread https://lkml.org/lkml/2010/6/8/236 years ago for OOM fixes, this is not a right thing to do. OOM solves this issue by using do_send_sig_info() as done in commit d2d393099de2 ("signal: oom_kill_task: use SEND_SIG_FORCED instead of force_sig()"), so this patch is suggesting to do the same for hwpoison. do_send_sig_info() properly accesses to siglock with lock_task_sighand(), so is free from the reported race. I confirmed that the reported bug reproduces with inserting some delay in kill_procs(), and it never reproduces with this patch. Note that memory_failure() can send another type of signal using force_sig_mceerr(), and the reported race shouldn't happen on it because force_sig_mceerr() is called only for synchronous processes (i.e. BUS_MCEERR_AR happens only when some process accesses to the corrupted memory.) Link: http://lkml.kernel.org/r/20190116093046.GA29835@hori1.linux.bs1.fc.nec.co.jp Signed-off-by: Naoya Horiguchi Reported-by: Jane Chu Reviewed-by: Dan Williams Reviewed-by: William Kucharski Cc: Oleg Nesterov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory-failure.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 345e69d88b37..ef080fa682a6 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -339,7 +339,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, int trapno, if (fail || tk->addr_valid == 0) { pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", pfn, tk->tsk->comm, tk->tsk->pid); - force_sig(SIGKILL, tk->tsk); + do_send_sig_info(SIGKILL, SEND_SIG_PRIV, + tk->tsk, PIDTYPE_PID); } /* From 4ebbe06b6a56e18054e533f59d56ad00142ab5f7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 1 Feb 2019 14:21:19 -0800 Subject: [PATCH 1647/2608] mm: migrate: don't rely on __PageMovable() of newpage after unlocking it commit e0a352fabce61f730341d119fbedf71ffdb8663f upstream. We had a race in the old balloon compaction code before b1123ea6d3b3 ("mm: balloon: use general non-lru movable page feature") refactored it that became visible after backporting 195a8c43e93d ("virtio-balloon: deflate via a page list") without the refactoring. The bug existed from commit d6d86c0a7f8d ("mm/balloon_compaction: redesign ballooned pages management") till b1123ea6d3b3 ("mm: balloon: use general non-lru movable page feature"). d6d86c0a7f8d ("mm/balloon_compaction: redesign ballooned pages management") was backported to 3.12, so the broken kernels are stable kernels [3.12 - 4.7]. There was a subtle race between dropping the page lock of the newpage in __unmap_and_move() and checking for __is_movable_balloon_page(newpage). Just after dropping this page lock, virtio-balloon could go ahead and deflate the newpage, effectively dequeueing it and clearing PageBalloon, in turn making __is_movable_balloon_page(newpage) fail. This resulted in dropping the reference of the newpage via putback_lru_page(newpage) instead of put_page(newpage), leading to page->lru getting modified and a !LRU page ending up in the LRU lists. With 195a8c43e93d ("virtio-balloon: deflate via a page list") backported, one would suddenly get corrupted lists in release_pages_balloon(): - WARNING: CPU: 13 PID: 6586 at lib/list_debug.c:59 __list_del_entry+0xa1/0xd0 - list_del corruption. prev->next should be ffffe253961090a0, but was dead000000000100 Nowadays this race is no longer possible, but it is hidden behind very ugly handling of __ClearPageMovable() and __PageMovable(). __ClearPageMovable() will not make __PageMovable() fail, only PageMovable(). So the new check (__PageMovable(newpage)) will still hold even after newpage was dequeued by virtio-balloon. If anybody would ever change that special handling, the BUG would be introduced again. So instead, make it explicit and use the information of the original isolated page before migration. This patch can be backported fairly easy to stable kernels (in contrast to the refactoring). Link: http://lkml.kernel.org/r/20190129233217.10747-1-david@redhat.com Fixes: d6d86c0a7f8d ("mm/balloon_compaction: redesign ballooned pages management") Signed-off-by: David Hildenbrand Reported-by: Vratislav Bendel Acked-by: Michal Hocko Acked-by: Rafael Aquini Cc: Mel Gorman Cc: "Kirill A. Shutemov" Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Jan Kara Cc: Andrea Arcangeli Cc: Dominik Brodowski Cc: Matthew Wilcox Cc: Vratislav Bendel Cc: Rafael Aquini Cc: Konstantin Khlebnikov Cc: Minchan Kim Cc: [3.12 - 4.7] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/migrate.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index cbb025239071..8c57cdd77ba5 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1106,10 +1106,13 @@ out: * If migration is successful, decrease refcount of the newpage * which will not free the page because new page owner increased * refcounter. As well, if it is LRU page, add the page to LRU - * list in here. + * list in here. Use the old state of the isolated source page to + * determine if we migrated a LRU page. newpage was already unlocked + * and possibly modified by its owner - don't rely on the page + * state. */ if (rc == MIGRATEPAGE_SUCCESS) { - if (unlikely(__PageMovable(newpage))) + if (unlikely(!is_lru)) put_page(newpage); else putback_lru_page(newpage); From fafc8e09d1e92351969ab6d2ad237f3c266fe2d8 Mon Sep 17 00:00:00 2001 From: Alexei Naberezhnov Date: Tue, 27 Mar 2018 16:54:16 -0700 Subject: [PATCH 1648/2608] md/raid5: fix 'out of memory' during raid cache recovery commit 483cbbeddd5fe2c80fd4141ff0748fa06c4ff146 upstream. This fixes the case when md array assembly fails because of raid cache recovery unable to allocate a stripe, despite attempts to replay stripes and increase cache size. This happens because stripes released by r5c_recovery_replay_stripes and raid5_set_cache_size don't become available for allocation immediately. Released stripes first are placed on conf->released_stripes list and require md thread to merge them on conf->inactive_list before they can be allocated. Patch allows final allocation attempt during cache recovery to wait for new stripes to become availabe for allocation. Cc: linux-raid@vger.kernel.org Cc: Shaohua Li Cc: linux-stable # 4.10+ Fixes: b4c625c67362 ("md/r5cache: r5cache recovery: part 1") Signed-off-by: Alexei Naberezhnov Signed-off-by: Song Liu Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5-cache.c | 33 ++++++++++++++++++++++----------- drivers/md/raid5.c | 8 ++++++-- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 0d535b40cb3b..dcef761ab242 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1942,12 +1942,14 @@ out: } static struct stripe_head * -r5c_recovery_alloc_stripe(struct r5conf *conf, - sector_t stripe_sect) +r5c_recovery_alloc_stripe( + struct r5conf *conf, + sector_t stripe_sect, + int noblock) { struct stripe_head *sh; - sh = raid5_get_active_stripe(conf, stripe_sect, 0, 1, 0); + sh = raid5_get_active_stripe(conf, stripe_sect, 0, noblock, 0); if (!sh) return NULL; /* no more stripe available */ @@ -2157,7 +2159,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, stripe_sect); if (!sh) { - sh = r5c_recovery_alloc_stripe(conf, stripe_sect); + sh = r5c_recovery_alloc_stripe(conf, stripe_sect, 1); /* * cannot get stripe from raid5_get_active_stripe * try replay some stripes @@ -2166,20 +2168,29 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, r5c_recovery_replay_stripes( cached_stripe_list, ctx); sh = r5c_recovery_alloc_stripe( - conf, stripe_sect); + conf, stripe_sect, 1); } if (!sh) { + int new_size = conf->min_nr_stripes * 2; pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n", mdname(mddev), - conf->min_nr_stripes * 2); - raid5_set_cache_size(mddev, - conf->min_nr_stripes * 2); - sh = r5c_recovery_alloc_stripe(conf, - stripe_sect); + new_size); + ret = raid5_set_cache_size(mddev, new_size); + if (conf->min_nr_stripes <= new_size / 2) { + pr_err("md/raid:%s: Cannot increase cache size, ret=%d, new_size=%d, min_nr_stripes=%d, max_nr_stripes=%d\n", + mdname(mddev), + ret, + new_size, + conf->min_nr_stripes, + conf->max_nr_stripes); + return -ENOMEM; + } + sh = r5c_recovery_alloc_stripe( + conf, stripe_sect, 0); } if (!sh) { pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n", - mdname(mddev)); + mdname(mddev)); return -ENOMEM; } list_add_tail(&sh->lru, cached_stripe_list); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index dbf51b4c21b3..7dbb74cd506a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6336,6 +6336,7 @@ raid5_show_stripe_cache_size(struct mddev *mddev, char *page) int raid5_set_cache_size(struct mddev *mddev, int size) { + int result = 0; struct r5conf *conf = mddev->private; if (size <= 16 || size > 32768) @@ -6352,11 +6353,14 @@ raid5_set_cache_size(struct mddev *mddev, int size) mutex_lock(&conf->cache_size_mutex); while (size > conf->max_nr_stripes) - if (!grow_one_stripe(conf, GFP_KERNEL)) + if (!grow_one_stripe(conf, GFP_KERNEL)) { + conf->min_nr_stripes = conf->max_nr_stripes; + result = -ENOMEM; break; + } mutex_unlock(&conf->cache_size_mutex); - return 0; + return result; } EXPORT_SYMBOL(raid5_set_cache_size); From 834adff8b5a711e6e934e75407e88fa4dc123d34 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 20 Nov 2018 15:16:36 -0200 Subject: [PATCH 1649/2608] cifs: Always resolve hostname before reconnecting commit 28eb24ff75c5ac130eb326b3b4d0dcecfc0f427d upstream. In case a hostname resolves to a different IP address (e.g. long running mounts), make sure to resolve it every time prior to calling generic_ip_connect() in reconnect. Suggested-by: Steve French Signed-off-by: Paulo Alcantara Signed-off-by: Steve French Signed-off-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/connect.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 000b7bfa8cf0..48aa854c564a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -51,6 +51,7 @@ #include "cifs_unicode.h" #include "cifs_debug.h" #include "cifs_fs_sb.h" +#include "dns_resolve.h" #include "ntlmssp.h" #include "nterr.h" #include "rfc1002pdu.h" @@ -313,6 +314,53 @@ static void cifs_prune_tlinks(struct work_struct *work); static int cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, const char *devname); +/* + * Resolve hostname and set ip addr in tcp ses. Useful for hostnames that may + * get their ip addresses changed at some point. + * + * This should be called with server->srv_mutex held. + */ +#ifdef CONFIG_CIFS_DFS_UPCALL +static int reconn_set_ipaddr(struct TCP_Server_Info *server) +{ + int rc; + int len; + char *unc, *ipaddr = NULL; + + if (!server->hostname) + return -EINVAL; + + len = strlen(server->hostname) + 3; + + unc = kmalloc(len, GFP_KERNEL); + if (!unc) { + cifs_dbg(FYI, "%s: failed to create UNC path\n", __func__); + return -ENOMEM; + } + snprintf(unc, len, "\\\\%s", server->hostname); + + rc = dns_resolve_server_name_to_ip(unc, &ipaddr); + kfree(unc); + + if (rc < 0) { + cifs_dbg(FYI, "%s: failed to resolve server part of %s to IP: %d\n", + __func__, server->hostname, rc); + return rc; + } + + rc = cifs_convert_address((struct sockaddr *)&server->dstaddr, ipaddr, + strlen(ipaddr)); + kfree(ipaddr); + + return !rc ? -1 : 0; +} +#else +static inline int reconn_set_ipaddr(struct TCP_Server_Info *server) +{ + return 0; +} +#endif + /* * cifs tcp session reconnection * @@ -408,6 +456,11 @@ cifs_reconnect(struct TCP_Server_Info *server) rc = generic_ip_connect(server); if (rc) { cifs_dbg(FYI, "reconnect error %d\n", rc); + rc = reconn_set_ipaddr(server); + if (rc) { + cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n", + __func__, rc); + } mutex_unlock(&server->srv_mutex); msleep(3000); } else { From 2f4da60e2133d85485c9fd4132d34dd6e6b1ebeb Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 10 Jul 2018 10:29:10 +1000 Subject: [PATCH 1650/2608] drivers: core: Remove glue dirs from sysfs earlier commit 726e41097920a73e4c7c33385dcc0debb1281e18 upstream. For devices with a class, we create a "glue" directory between the parent device and the new device with the class name. This directory is never "explicitely" removed when empty however, this is left to the implicit sysfs removal done by kobject_release() when the object loses its last reference via kobject_put(). This is problematic because as long as it's not been removed from sysfs, it is still present in the class kset and in sysfs directory structure. The presence in the class kset exposes a use after free bug fixed by the previous patch, but the presence in sysfs means that until the kobject is released, which can take a while (especially with kobject debugging), any attempt at re-creating such as binding a new device for that class/parent pair, will result in a sysfs duplicate file name error. This fixes it by instead doing an explicit kobject_del() when the glue dir is empty, by keeping track of the number of child devices of the gluedir. This is made easy by the fact that all glue dir operations are done with a global mutex, and there's already a function (cleanup_glue_dir) called in all the right places taking that mutex that can be enhanced for this. It appears that this was in fact the intent of the function, but the implementation was wrong. Signed-off-by: Benjamin Herrenschmidt Acked-by: Linus Torvalds Cc: Guenter Roeck Cc: Zubin Mithra Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 2 ++ include/linux/kobject.h | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/drivers/base/core.c b/drivers/base/core.c index 3f463a61f8cf..fc5bbb2519fe 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1571,6 +1571,8 @@ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir) return; mutex_lock(&gdp_mutex); + if (!kobject_has_children(glue_dir)) + kobject_del(glue_dir); kobject_put(glue_dir); mutex_unlock(&gdp_mutex); } diff --git a/include/linux/kobject.h b/include/linux/kobject.h index e0a6205caa71..e232df1d9db2 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -117,6 +117,23 @@ extern void kobject_put(struct kobject *kobj); extern const void *kobject_namespace(struct kobject *kobj); extern char *kobject_get_path(struct kobject *kobj, gfp_t flag); +/** + * kobject_has_children - Returns whether a kobject has children. + * @kobj: the object to test + * + * This will return whether a kobject has other kobjects as children. + * + * It does NOT account for the presence of attribute files, only sub + * directories. It also assumes there is no concurrent addition or + * removal of such children, and thus relies on external locking. + */ +static inline bool kobject_has_children(struct kobject *kobj) +{ + WARN_ON_ONCE(kref_read(&kobj->kref) == 0); + + return kobj->sd && kobj->sd->dir.subdirs; +} + struct kobj_type { void (*release)(struct kobject *kobj); const struct sysfs_ops *sysfs_ops; From 515160e3c4f2aa7e62963546b1e94d1c0a841df9 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 30 Oct 2018 20:29:53 +0200 Subject: [PATCH 1651/2608] fanotify: fix handling of events on child sub-directory commit b469e7e47c8a075cc08bcd1e85d4365134bdcdd5 upstream. When an event is reported on a sub-directory and the parent inode has a mark mask with FS_EVENT_ON_CHILD|FS_ISDIR, the event will be sent to fsnotify() even if the event type is not in the parent mark mask (e.g. FS_OPEN). Further more, if that event happened on a mount or a filesystem with a mount/sb mark that does have that event type in their mask, the "on child" event will be reported on the mount/sb mark. That is not desired, because user will get a duplicate event for the same action. Note that the event reported on the victim inode is never merged with the event reported on the parent inode, because of the check in should_merge(): old_fsn->inode == new_fsn->inode. Fix this by looking for a match of an actual event type (i.e. not just FS_ISDIR) in parent's inode mark mask and by not reporting an "on child" event to group if event type is only found on mount/sb marks. [backport hint: The bug seems to have always been in fanotify, but this patch will only apply cleanly to v4.19.y] Cc: # v4.19 Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara [amir: backport to v4.9] Signed-off-by: Amir Goldstein Signed-off-by: Greg Kroah-Hartman --- fs/notify/fsnotify.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 2bc61e7543dd..506da82ff3f1 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -158,9 +158,9 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask parent = dget_parent(dentry); p_inode = parent->d_inode; - if (unlikely(!fsnotify_inode_watches_children(p_inode))) + if (unlikely(!fsnotify_inode_watches_children(p_inode))) { __fsnotify_update_child_dentry_flags(p_inode); - else if (p_inode->i_fsnotify_mask & mask) { + } else if (p_inode->i_fsnotify_mask & mask & ~FS_EVENT_ON_CHILD) { struct name_snapshot name; /* we are notifying a parent so come up with the new mask which @@ -264,6 +264,10 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, else mnt = NULL; + /* An event "on child" is not intended for a mount mark */ + if (mask & FS_EVENT_ON_CHILD) + mnt = NULL; + /* * Optimization: srcu_read_lock() has a memory barrier which can * be expensive. It protects walking the *_fsnotify_marks lists. From 0d7866d54a2c2d708807930c3bfd38ab3ce0550d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 6 Feb 2019 17:31:37 +0100 Subject: [PATCH 1652/2608] Linux 4.14.98 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 485afde0f1f1..7f561ef954f2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 97 +SUBLEVEL = 98 EXTRAVERSION = NAME = Petit Gorille From 8df7378b0de4f64c155f4a2a93005d680fc6828c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 11:55:49 +0200 Subject: [PATCH 1653/2608] drm/bufs: Fix Spectre v1 vulnerability [ Upstream commit a37805098900a6e73a55b3a43b7d3bcd987bb3f4 ] idx can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/gpu/drm/drm_bufs.c:1420 drm_legacy_freebufs() warn: potential spectre issue 'dma->buflist' [r] (local cap) Fix this by sanitizing idx before using it to index dma->buflist Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20181016095549.GA23586@embeddedor.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_bufs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c index 1ee84dd802d4..0f05b8d8fefa 100644 --- a/drivers/gpu/drm/drm_bufs.c +++ b/drivers/gpu/drm/drm_bufs.c @@ -36,6 +36,8 @@ #include #include "drm_legacy.h" +#include + static struct drm_map_list *drm_find_matching_map(struct drm_device *dev, struct drm_local_map *map) { @@ -1417,6 +1419,7 @@ int drm_legacy_freebufs(struct drm_device *dev, void *data, idx, dma->buf_count - 1); return -EINVAL; } + idx = array_index_nospec(idx, dma->buf_count); buf = dma->buflist[idx]; if (buf->file_priv != file_priv) { DRM_ERROR("Process %d freeing buffer not owned\n", From bd39c9485d6aa580d8059716ba43b6c3493c8a83 Mon Sep 17 00:00:00 2001 From: Slawomir Stepien Date: Sat, 20 Oct 2018 23:04:11 +0200 Subject: [PATCH 1654/2608] staging: iio: adc: ad7280a: handle error from __ad7280_read32() [ Upstream commit 0559ef7fde67bc6c83c6eb6329dbd6649528263e ] Inside __ad7280_read32(), the spi_sync_transfer() can fail with negative error code. This change will ensure that this error is being passed up in the call stack, so it can be handled. Signed-off-by: Slawomir Stepien Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/staging/iio/adc/ad7280a.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/staging/iio/adc/ad7280a.c b/drivers/staging/iio/adc/ad7280a.c index f85dde9805e0..f17f700ea04f 100644 --- a/drivers/staging/iio/adc/ad7280a.c +++ b/drivers/staging/iio/adc/ad7280a.c @@ -256,7 +256,9 @@ static int ad7280_read(struct ad7280_state *st, unsigned int devaddr, if (ret) return ret; - __ad7280_read32(st, &tmp); + ret = __ad7280_read32(st, &tmp); + if (ret) + return ret; if (ad7280_check_crc(st, tmp)) return -EIO; @@ -294,7 +296,9 @@ static int ad7280_read_channel(struct ad7280_state *st, unsigned int devaddr, ad7280_delay(st); - __ad7280_read32(st, &tmp); + ret = __ad7280_read32(st, &tmp); + if (ret) + return ret; if (ad7280_check_crc(st, tmp)) return -EIO; @@ -327,7 +331,9 @@ static int ad7280_read_all_channels(struct ad7280_state *st, unsigned int cnt, ad7280_delay(st); for (i = 0; i < cnt; i++) { - __ad7280_read32(st, &tmp); + ret = __ad7280_read32(st, &tmp); + if (ret) + return ret; if (ad7280_check_crc(st, tmp)) return -EIO; @@ -370,7 +376,10 @@ static int ad7280_chain_setup(struct ad7280_state *st) return ret; for (n = 0; n <= AD7280A_MAX_CHAIN; n++) { - __ad7280_read32(st, &val); + ret = __ad7280_read32(st, &val); + if (ret) + return ret; + if (val == 0) return n - 1; From 5427c8d2775aac06665a660a2d00b3c6ffc740ba Mon Sep 17 00:00:00 2001 From: Deepak Sharma Date: Tue, 23 Oct 2018 17:35:48 +0100 Subject: [PATCH 1655/2608] drm/vgem: Fix vgem_init to get drm device available. [ Upstream commit d5c04dff24870ef07ce6453a3f4e1ffd9cf88d27 ] Modify vgem_init to take platform dev as parent in drm_dev_init. This will make drm device available at "/sys/devices/platform/vgem" in x86 chromebook. v2: rebase, address checkpatch typo and line over 80 characters Cc: Daniel Vetter Signed-off-by: Deepak Sharma Reviewed-by: Sean Paul Signed-off-by: Emil Velikov Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20181023163550.15211-1-emil.l.velikov@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vgem/vgem_drv.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index 2524ff116f00..81c7ab10c083 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -472,31 +472,31 @@ static int __init vgem_init(void) if (!vgem_device) return -ENOMEM; - ret = drm_dev_init(&vgem_device->drm, &vgem_driver, NULL); - if (ret) - goto out_free; - vgem_device->platform = platform_device_register_simple("vgem", -1, NULL, 0); if (IS_ERR(vgem_device->platform)) { ret = PTR_ERR(vgem_device->platform); - goto out_fini; + goto out_free; } dma_coerce_mask_and_coherent(&vgem_device->platform->dev, DMA_BIT_MASK(64)); + ret = drm_dev_init(&vgem_device->drm, &vgem_driver, + &vgem_device->platform->dev); + if (ret) + goto out_unregister; /* Final step: expose the device/driver to userspace */ ret = drm_dev_register(&vgem_device->drm, 0); if (ret) - goto out_unregister; + goto out_fini; return 0; -out_unregister: - platform_device_unregister(vgem_device->platform); out_fini: drm_dev_fini(&vgem_device->drm); +out_unregister: + platform_device_unregister(vgem_device->platform); out_free: kfree(vgem_device); return ret; From a9ee6b5a6e75e3b1ee12f9a642e19778d5433dbd Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 27 Oct 2018 10:15:33 +0200 Subject: [PATCH 1656/2608] pinctrl: bcm2835: Use raw spinlock for RT compatibility [ Upstream commit 3c7b30f704b6f5e53eed6bf89cf2c8d1b38b02c0 ] The BCM2835 pinctrl driver acquires a spinlock in its ->irq_enable, ->irq_disable and ->irq_set_type callbacks. Spinlocks become sleeping locks with CONFIG_PREEMPT_RT_FULL=y, therefore invocation of one of the callbacks in atomic context may cause a hard lockup if at least two GPIO pins in the same bank are used as interrupts. The issue doesn't occur with just a single interrupt pin per bank because the lock is never contended. I'm experiencing such lockups with GPIO 8 and 28 used as level-triggered interrupts, i.e. with ->irq_disable being invoked on reception of every IRQ. The critical section protected by the spinlock is very small (one bitop and one RMW of an MMIO register), hence converting to a raw spinlock seems a better trade-off than converting the driver to threaded IRQ handling (which would increase latency to handle an interrupt). Cc: Mathias Duckeck Signed-off-by: Lukas Wunner Acked-by: Julia Cartwright Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/bcm/pinctrl-bcm2835.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c index ff782445dfb7..e72bf2502eca 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c @@ -92,7 +92,7 @@ struct bcm2835_pinctrl { struct gpio_chip gpio_chip; struct pinctrl_gpio_range gpio_range; - spinlock_t irq_lock[BCM2835_NUM_BANKS]; + raw_spinlock_t irq_lock[BCM2835_NUM_BANKS]; }; /* pins are just named GPIO0..GPIO53 */ @@ -471,10 +471,10 @@ static void bcm2835_gpio_irq_enable(struct irq_data *data) unsigned bank = GPIO_REG_OFFSET(gpio); unsigned long flags; - spin_lock_irqsave(&pc->irq_lock[bank], flags); + raw_spin_lock_irqsave(&pc->irq_lock[bank], flags); set_bit(offset, &pc->enabled_irq_map[bank]); bcm2835_gpio_irq_config(pc, gpio, true); - spin_unlock_irqrestore(&pc->irq_lock[bank], flags); + raw_spin_unlock_irqrestore(&pc->irq_lock[bank], flags); } static void bcm2835_gpio_irq_disable(struct irq_data *data) @@ -486,12 +486,12 @@ static void bcm2835_gpio_irq_disable(struct irq_data *data) unsigned bank = GPIO_REG_OFFSET(gpio); unsigned long flags; - spin_lock_irqsave(&pc->irq_lock[bank], flags); + raw_spin_lock_irqsave(&pc->irq_lock[bank], flags); bcm2835_gpio_irq_config(pc, gpio, false); /* Clear events that were latched prior to clearing event sources */ bcm2835_gpio_set_bit(pc, GPEDS0, gpio); clear_bit(offset, &pc->enabled_irq_map[bank]); - spin_unlock_irqrestore(&pc->irq_lock[bank], flags); + raw_spin_unlock_irqrestore(&pc->irq_lock[bank], flags); } static int __bcm2835_gpio_irq_set_type_disabled(struct bcm2835_pinctrl *pc, @@ -594,7 +594,7 @@ static int bcm2835_gpio_irq_set_type(struct irq_data *data, unsigned int type) unsigned long flags; int ret; - spin_lock_irqsave(&pc->irq_lock[bank], flags); + raw_spin_lock_irqsave(&pc->irq_lock[bank], flags); if (test_bit(offset, &pc->enabled_irq_map[bank])) ret = __bcm2835_gpio_irq_set_type_enabled(pc, gpio, type); @@ -606,7 +606,7 @@ static int bcm2835_gpio_irq_set_type(struct irq_data *data, unsigned int type) else irq_set_handler_locked(data, handle_level_irq); - spin_unlock_irqrestore(&pc->irq_lock[bank], flags); + raw_spin_unlock_irqrestore(&pc->irq_lock[bank], flags); return ret; } @@ -1021,7 +1021,7 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) for_each_set_bit(offset, &events, 32) bcm2835_gpio_wr(pc, GPEDS0 + i * 4, BIT(offset)); - spin_lock_init(&pc->irq_lock[i]); + raw_spin_lock_init(&pc->irq_lock[i]); } err = gpiochip_add_data(&pc->gpio_chip, pc); From 2e68129649ff2a5786159628661c9003ae1336c0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 3 Nov 2018 22:21:22 +0100 Subject: [PATCH 1657/2608] ASoC: Intel: mrfld: fix uninitialized variable access [ Upstream commit 1539c7f23f256120f89f8b9ec53160790bce9ed2 ] Randconfig testing revealed a very old bug, with gcc-8: sound/soc/intel/atom/sst/sst_loader.c: In function 'sst_load_fw': sound/soc/intel/atom/sst/sst_loader.c:357:5: error: 'fw' may be used uninitialized in this function [-Werror=maybe-uninitialized] if (fw == NULL) { ^ sound/soc/intel/atom/sst/sst_loader.c:354:25: note: 'fw' was declared here const struct firmware *fw; We must check the return code of request_firmware() before we look at the pointer result that may be uninitialized when the function fails. Fixes: 9012c9544eea ("ASoC: Intel: mrfld - Add DSP load and management") Signed-off-by: Arnd Bergmann Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/atom/sst/sst_loader.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/intel/atom/sst/sst_loader.c b/sound/soc/intel/atom/sst/sst_loader.c index 33917146d9c4..054b1d514e8a 100644 --- a/sound/soc/intel/atom/sst/sst_loader.c +++ b/sound/soc/intel/atom/sst/sst_loader.c @@ -354,14 +354,14 @@ static int sst_request_fw(struct intel_sst_drv *sst) const struct firmware *fw; retval = request_firmware(&fw, sst->firmware_name, sst->dev); - if (fw == NULL) { - dev_err(sst->dev, "fw is returning as null\n"); - return -EINVAL; - } if (retval) { dev_err(sst->dev, "request fw failed %d\n", retval); return retval; } + if (fw == NULL) { + dev_err(sst->dev, "fw is returning as null\n"); + return -EINVAL; + } mutex_lock(&sst->sst_lock); retval = sst_cache_and_parse_fw(sst, fw); mutex_unlock(&sst->sst_lock); From 12101aa36f3d05bea09ec534cbc6b910880f78e6 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 19 Sep 2018 16:07:18 -0700 Subject: [PATCH 1658/2608] gpu: ipu-v3: image-convert: Prevent race between run and unprepare [ Upstream commit 819bec35c8c9706185498c9222bd244e0781ad35 ] Prevent possible race by parallel threads between ipu_image_convert_run() and ipu_image_convert_unprepare(). This involves setting ctx->aborting to true unconditionally so that no new job runs can be queued during unprepare, and holding the ctx->aborting flag until the context is freed. Note that the "normal" ipu_image_convert_abort() case (e.g. not during context unprepare) should clear the ctx->aborting flag after aborting any active run and clearing the context's pending queue. This is because it should be possible to continue to use the conversion context and queue more runs after an abort. Signed-off-by: Steve Longerbeam Tested-by: Philipp Zabel Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin --- drivers/gpu/ipu-v3/ipu-image-convert.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 524a717ab28e..a5e33d58e02f 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -1518,7 +1518,7 @@ unlock: EXPORT_SYMBOL_GPL(ipu_image_convert_queue); /* Abort any active or pending conversions for this context */ -void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx) +static void __ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx) { struct ipu_image_convert_chan *chan = ctx->chan; struct ipu_image_convert_priv *priv = chan->priv; @@ -1545,7 +1545,7 @@ void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx) need_abort = (run_count || active_run); - ctx->aborting = need_abort; + ctx->aborting = true; spin_unlock_irqrestore(&chan->irqlock, flags); @@ -1566,7 +1566,11 @@ void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx) dev_warn(priv->ipu->dev, "%s: timeout\n", __func__); force_abort(ctx); } +} +void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx) +{ + __ipu_image_convert_abort(ctx); ctx->aborting = false; } EXPORT_SYMBOL_GPL(ipu_image_convert_abort); @@ -1580,7 +1584,7 @@ void ipu_image_convert_unprepare(struct ipu_image_convert_ctx *ctx) bool put_res; /* make sure no runs are hanging around */ - ipu_image_convert_abort(ctx); + __ipu_image_convert_abort(ctx); dev_dbg(priv->ipu->dev, "%s: task %u: removing ctx %p\n", __func__, chan->ic_task, ctx); From e90aa4877220f294916063ee737973a8fdcdd8d0 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 2 Nov 2018 21:49:55 +0100 Subject: [PATCH 1659/2608] ath9k: dynack: use authentication messages for 'late' ack [ Upstream commit 3831a2a0010c72e3956020cbf1057a1701a2e469 ] In order to properly support dynack in ad-hoc mode running wpa_supplicant, take into account authentication frames for 'late ack' detection. This patch has been tested on devices mounted on offshore high-voltage stations connected through ~24Km link Reported-by: Koen Vandeputte Tested-by: Koen Vandeputte Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/dynack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/dynack.c b/drivers/net/wireless/ath/ath9k/dynack.c index 7334c9b09e82..cc0dc966c512 100644 --- a/drivers/net/wireless/ath/ath9k/dynack.c +++ b/drivers/net/wireless/ath/ath9k/dynack.c @@ -187,7 +187,8 @@ void ath_dynack_sample_tx_ts(struct ath_hw *ah, struct sk_buff *skb, /* late ACK */ if (ts->ts_status & ATH9K_TXERR_XRETRY) { if (ieee80211_is_assoc_req(hdr->frame_control) || - ieee80211_is_assoc_resp(hdr->frame_control)) { + ieee80211_is_assoc_resp(hdr->frame_control) || + ieee80211_is_auth(hdr->frame_control)) { ath_dbg(common, DYNACK, "late ack\n"); ath9k_hw_setslottime(ah, (LATEACK_TO - 3) / 2); ath9k_hw_set_ack_timeout(ah, LATEACK_TO); From 6adaeed09b2a9e2064bc4c6cacac2ab45fd4f820 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 23 Oct 2018 13:41:07 -0700 Subject: [PATCH 1660/2608] scsi: lpfc: Correct LCB RJT handling [ Upstream commit b114d9009d386276bfc3352289fc235781ae3353 ] When LCB's are rejected, if beaconing was already in progress, the Reason Code Explanation was not being set. Should have been set to command in progress. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_els.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 91783dbdf10c..fffe8a643e25 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -5696,6 +5696,9 @@ error: stat = (struct ls_rjt *)(pcmd + sizeof(uint32_t)); stat->un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; + if (shdr_add_status == ADD_STATUS_OPERATION_ALREADY_ACTIVE) + stat->un.b.lsRjtRsnCodeExp = LSEXP_CMD_IN_PROGRESS; + elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; phba->fc_stat.elsXmitLSRJT++; rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0); From b3d88da4e057bf0648448fbacc26f1456402cace Mon Sep 17 00:00:00 2001 From: Suganath Prabu Date: Wed, 31 Oct 2018 18:53:35 +0530 Subject: [PATCH 1661/2608] scsi: mpt3sas: Call sas_remove_host before removing the target devices [ Upstream commit dc730212e8a378763cb182b889f90c8101331332 ] Call sas_remove_host() before removing the target devices in the driver's .remove() callback function(i.e. during driver unload time). So that driver can provide a way to allow SYNC CACHE, START STOP unit commands etc. (which are issued from SML) to the target drives during driver unload time. Once sas_remove_host() is called before removing the target drives then driver can just clean up the resources allocated for target devices and no need to call sas_port_delete_phy(), sas_port_delete() API's as these API's internally called from sas_remove_host(). Signed-off-by: Suganath Prabu Reviewed-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 2 +- drivers/scsi/mpt3sas/mpt3sas_transport.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index ae5e579ac473..b28efddab7b1 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -8260,6 +8260,7 @@ static void scsih_remove(struct pci_dev *pdev) /* release all the volumes */ _scsih_ir_shutdown(ioc); + sas_remove_host(shost); list_for_each_entry_safe(raid_device, next, &ioc->raid_device_list, list) { if (raid_device->starget) { @@ -8296,7 +8297,6 @@ static void scsih_remove(struct pci_dev *pdev) ioc->sas_hba.num_phys = 0; } - sas_remove_host(shost); mpt3sas_base_detach(ioc); spin_lock(&gioc_lock); list_del(&ioc->list); diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c index 63dd9bc21ff2..66d9f04c4c0b 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_transport.c +++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c @@ -846,10 +846,13 @@ mpt3sas_transport_port_remove(struct MPT3SAS_ADAPTER *ioc, u64 sas_address, mpt3sas_port->remote_identify.sas_address, mpt3sas_phy->phy_id); mpt3sas_phy->phy_belongs_to_port = 0; - sas_port_delete_phy(mpt3sas_port->port, mpt3sas_phy->phy); + if (!ioc->remove_host) + sas_port_delete_phy(mpt3sas_port->port, + mpt3sas_phy->phy); list_del(&mpt3sas_phy->port_siblings); } - sas_port_delete(mpt3sas_port->port); + if (!ioc->remove_host) + sas_port_delete(mpt3sas_port->port); kfree(mpt3sas_port); } From 1f52e001dde3b367f177cf3acf8406c51303b1fd Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 23 Oct 2018 13:41:03 -0700 Subject: [PATCH 1662/2608] scsi: lpfc: Fix LOGO/PLOGI handling when triggerd by ABTS Timeout event [ Upstream commit 30e196cacefdd9a38c857caed23cefc9621bc5c1 ] After a LOGO in response to an ABTS timeout, a PLOGI wasn't issued to re-establish the login. An nlp_type check in the LOGO completion handler failed to restart discovery for NVME targets. Revised the nlp_type check for NVME as well as SCSI. While reviewing the LOGO handling a few other issues were seen and were addressed: - Better lock synchronization around ndlp data types - When the ABTS times out, unregister the RPI before sending the LOGO so that all local exchange contexts are cleared and nothing received while awaiting LOGO/PLOGI handling will be accepted. - LOGO handling optimized to: Wait only R_A_TOV for a response. It doesn't need to be retried on timeout. If there wasn't a response, a PLOGI will be sent, thus an implicit logout applies as well when the other port sees it. If there is a response, any kind of response is considered "good" and the XRI quarantined for a exchange qualifier window. - PLOGI is issued as soon a LOGO state is resolved. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_els.c | 49 +++++++++++++----------------- drivers/scsi/lpfc/lpfc_nportdisc.c | 5 +++ 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index fffe8a643e25..57cddbc4a977 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -242,6 +242,8 @@ lpfc_prep_els_iocb(struct lpfc_vport *vport, uint8_t expectRsp, icmd->ulpCommand = CMD_ELS_REQUEST64_CR; if (elscmd == ELS_CMD_FLOGI) icmd->ulpTimeout = FF_DEF_RATOV * 2; + else if (elscmd == ELS_CMD_LOGO) + icmd->ulpTimeout = phba->fc_ratov; else icmd->ulpTimeout = phba->fc_ratov * 2; } else { @@ -2674,16 +2676,15 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, goto out; } + /* The LOGO will not be retried on failure. A LOGO was + * issued to the remote rport and a ACC or RJT or no Answer are + * all acceptable. Note the failure and move forward with + * discovery. The PLOGI will retry. + */ if (irsp->ulpStatus) { - /* Check for retry */ - if (lpfc_els_retry(phba, cmdiocb, rspiocb)) { - /* ELS command is being retried */ - skip_recovery = 1; - goto out; - } /* LOGO failed */ lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, - "2756 LOGO failure DID:%06X Status:x%x/x%x\n", + "2756 LOGO failure, No Retry DID:%06X Status:x%x/x%x\n", ndlp->nlp_DID, irsp->ulpStatus, irsp->un.ulpWord[4]); /* Do not call DSM for lpfc_els_abort'ed ELS cmds */ @@ -2729,7 +2730,8 @@ out: * For any other port type, the rpi is unregistered as an implicit * LOGO. */ - if ((ndlp->nlp_type & NLP_FCP_TARGET) && (skip_recovery == 0)) { + if (ndlp->nlp_type & (NLP_FCP_TARGET | NLP_NVME_TARGET) && + skip_recovery == 0) { lpfc_cancel_retry_delay_tmo(vport, ndlp); spin_lock_irqsave(shost->host_lock, flags); ndlp->nlp_flag |= NLP_NPR_2B_DISC; @@ -2762,6 +2764,8 @@ out: * will be stored into the context1 field of the IOCB for the completion * callback function to the LOGO ELS command. * + * Callers of this routine are expected to unregister the RPI first + * * Return code * 0 - successfully issued logo * 1 - failed to issue logo @@ -2803,22 +2807,6 @@ lpfc_issue_els_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "Issue LOGO: did:x%x", ndlp->nlp_DID, 0, 0); - /* - * If we are issuing a LOGO, we may try to recover the remote NPort - * by issuing a PLOGI later. Even though we issue ELS cmds by the - * VPI, if we have a valid RPI, and that RPI gets unreg'ed while - * that ELS command is in-flight, the HBA returns a IOERR_INVALID_RPI - * for that ELS cmd. To avoid this situation, lets get rid of the - * RPI right now, before any ELS cmds are sent. - */ - spin_lock_irq(shost->host_lock); - ndlp->nlp_flag |= NLP_ISSUE_LOGO; - spin_unlock_irq(shost->host_lock); - if (lpfc_unreg_rpi(vport, ndlp)) { - lpfc_els_free_iocb(phba, elsiocb); - return 0; - } - phba->fc_stat.elsXmitLOGO++; elsiocb->iocb_cmpl = lpfc_cmpl_els_logo; spin_lock_irq(shost->host_lock); @@ -2826,7 +2814,6 @@ lpfc_issue_els_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ndlp->nlp_flag &= ~NLP_ISSUE_LOGO; spin_unlock_irq(shost->host_lock); rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0); - if (rc == IOCB_ERROR) { spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_LOGO_SND; @@ -2834,6 +2821,11 @@ lpfc_issue_els_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, lpfc_els_free_iocb(phba, elsiocb); return 1; } + + spin_lock_irq(shost->host_lock); + ndlp->nlp_prev_state = ndlp->nlp_state; + spin_unlock_irq(shost->host_lock); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_LOGO_ISSUE); return 0; } @@ -9483,7 +9475,8 @@ lpfc_sli_abts_recover_port(struct lpfc_vport *vport, "rport in state 0x%x\n", ndlp->nlp_state); return; } - lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + lpfc_printf_log(phba, KERN_ERR, + LOG_ELS | LOG_FCP_ERROR | LOG_NVME_IOERR, "3094 Start rport recovery on shost id 0x%x " "fc_id 0x%06x vpi 0x%x rpi 0x%x state 0x%x " "flags 0x%x\n", @@ -9496,8 +9489,8 @@ lpfc_sli_abts_recover_port(struct lpfc_vport *vport, */ spin_lock_irqsave(shost->host_lock, flags); ndlp->nlp_fcp_info &= ~NLP_FCP_2_DEVICE; + ndlp->nlp_flag |= NLP_ISSUE_LOGO; spin_unlock_irqrestore(shost->host_lock, flags); - lpfc_issue_els_logo(vport, ndlp, 0); - lpfc_nlp_set_state(vport, ndlp, NLP_STE_LOGO_ISSUE); + lpfc_unreg_rpi(vport, ndlp); } diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index d489f6827cc1..36fb549eb4e8 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -801,7 +801,9 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) struct Scsi_Host *shost = lpfc_shost_from_vport(vport); if (!(ndlp->nlp_flag & NLP_RPI_REGISTERED)) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_NPR_ADISC; + spin_unlock_irq(shost->host_lock); return 0; } @@ -816,7 +818,10 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) return 1; } } + + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_NPR_ADISC; + spin_unlock_irq(shost->host_lock); lpfc_unreg_rpi(vport, ndlp); return 0; } From c871547ec28fe1268423f2f13c3441434ab65eac Mon Sep 17 00:00:00 2001 From: Yufen Wang Date: Fri, 2 Nov 2018 11:51:31 +0100 Subject: [PATCH 1663/2608] ARM: 8808/1: kexec:offline panic_smp_self_stop CPU [ Upstream commit 82c08c3e7f171aa7f579b231d0abbc1d62e91974 ] In case panic() and panic() called at the same time on different CPUS. For example: CPU 0: panic() __crash_kexec machine_crash_shutdown crash_smp_send_stop machine_kexec BUG_ON(num_online_cpus() > 1); CPU 1: panic() local_irq_disable panic_smp_self_stop If CPU 1 calls panic_smp_self_stop() before crash_smp_send_stop(), kdump fails. CPU1 can't receive the ipi irq, CPU1 will be always online. To fix this problem, this patch split out the panic_smp_self_stop() and add set_cpu_online(smp_processor_id(), false). Signed-off-by: Yufen Wang Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/kernel/smp.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index e61af0600133..5e31c62127a0 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -691,6 +691,21 @@ void smp_send_stop(void) pr_warn("SMP: failed to stop secondary CPUs\n"); } +/* In case panic() and panic() called at the same time on CPU1 and CPU2, + * and CPU 1 calls panic_smp_self_stop() before crash_smp_send_stop() + * CPU1 can't receive the ipi irqs from CPU2, CPU1 will be always online, + * kdump fails. So split out the panic_smp_self_stop() and add + * set_cpu_online(smp_processor_id(), false). + */ +void panic_smp_self_stop(void) +{ + pr_debug("CPU %u will stop doing anything useful since another CPU has paniced\n", + smp_processor_id()); + set_cpu_online(smp_processor_id(), false); + while (1) + cpu_relax(); +} + /* * not supported here */ From 0ca40937b6e4411e0913258489a13cb654581e86 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Wed, 31 Oct 2018 15:41:41 +0800 Subject: [PATCH 1664/2608] clk: boston: fix possible memory leak in clk_boston_setup() [ Upstream commit 46fda5b5067a391912cf73bf3d32c26b6a22ad09 ] Smatch report warnings: drivers/clk/imgtec/clk-boston.c:76 clk_boston_setup() warn: possible memory leak of 'onecell' drivers/clk/imgtec/clk-boston.c:83 clk_boston_setup() warn: possible memory leak of 'onecell' drivers/clk/imgtec/clk-boston.c:90 clk_boston_setup() warn: possible memory leak of 'onecell' 'onecell' is malloced in clk_boston_setup(), but not be freed before leaving from the error handling cases. Signed-off-by: Yi Wang Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/imgtec/clk-boston.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/clk/imgtec/clk-boston.c b/drivers/clk/imgtec/clk-boston.c index 15af423cc0c9..f5d54a64d33c 100644 --- a/drivers/clk/imgtec/clk-boston.c +++ b/drivers/clk/imgtec/clk-boston.c @@ -73,27 +73,32 @@ static void __init clk_boston_setup(struct device_node *np) hw = clk_hw_register_fixed_rate(NULL, "input", NULL, 0, in_freq); if (IS_ERR(hw)) { pr_err("failed to register input clock: %ld\n", PTR_ERR(hw)); - return; + goto error; } onecell->hws[BOSTON_CLK_INPUT] = hw; hw = clk_hw_register_fixed_rate(NULL, "sys", "input", 0, sys_freq); if (IS_ERR(hw)) { pr_err("failed to register sys clock: %ld\n", PTR_ERR(hw)); - return; + goto error; } onecell->hws[BOSTON_CLK_SYS] = hw; hw = clk_hw_register_fixed_rate(NULL, "cpu", "input", 0, cpu_freq); if (IS_ERR(hw)) { pr_err("failed to register cpu clock: %ld\n", PTR_ERR(hw)); - return; + goto error; } onecell->hws[BOSTON_CLK_CPU] = hw; err = of_clk_add_hw_provider(np, of_clk_hw_onecell_get, onecell); if (err) pr_err("failed to add DT provider: %d\n", err); + + return; + +error: + kfree(onecell); } /* From ecaa10908412ccd001175f4ccce8ef78ae53c363 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 8 Nov 2018 14:04:50 -0500 Subject: [PATCH 1665/2608] dlm: Don't swamp the CPU with callbacks queued during recovery [ Upstream commit 216f0efd19b9cc32207934fd1b87a45f2c4c593e ] Before this patch, recovery would cause all callbacks to be delayed, put on a queue, and afterward they were all queued to the callback work queue. This patch does the same thing, but occasionally takes a break after 25 of them so it won't swamp the CPU at the expense of other RT processes like corosync. Signed-off-by: Bob Peterson Signed-off-by: David Teigland Signed-off-by: Sasha Levin --- fs/dlm/ast.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index 07fed838d8fd..15fa4239ae9f 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c @@ -290,6 +290,8 @@ void dlm_callback_suspend(struct dlm_ls *ls) flush_workqueue(ls->ls_callback_wq); } +#define MAX_CB_QUEUE 25 + void dlm_callback_resume(struct dlm_ls *ls) { struct dlm_lkb *lkb, *safe; @@ -300,15 +302,23 @@ void dlm_callback_resume(struct dlm_ls *ls) if (!ls->ls_callback_wq) return; +more: mutex_lock(&ls->ls_cb_mutex); list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) { list_del_init(&lkb->lkb_cb_list); queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work); count++; + if (count == MAX_CB_QUEUE) + break; } mutex_unlock(&ls->ls_cb_mutex); if (count) log_rinfo(ls, "dlm_callback_resume %d", count); + if (count == MAX_CB_QUEUE) { + count = 0; + cond_resched(); + goto more; + } } From 5fd505ca31586b2b812dbe10e0b70aa01081fb43 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 25 Oct 2018 14:52:31 +0100 Subject: [PATCH 1666/2608] x86/PCI: Fix Broadcom CNB20LE unintended sign extension (redux) [ Upstream commit 53bb565fc5439f2c8c57a786feea5946804aa3e9 ] In the expression "word1 << 16", word1 starts as u16, but is promoted to a signed int, then sign-extended to resource_size_t, which is probably not what was intended. Cast to resource_size_t to avoid the sign extension. This fixes an identical issue as fixed by commit 0b2d70764bb3 ("x86/PCI: Fix Broadcom CNB20LE unintended sign extension") back in 2014. Detected by CoverityScan, CID#138749, 138750 ("Unintended sign extension") Fixes: 3f6ea84a3035 ("PCI: read memory ranges out of Broadcom CNB20LE host bridge") Signed-off-by: Colin Ian King Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- arch/x86/pci/broadcom_bus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c index 526536c81ddc..ca1e8e6dccc8 100644 --- a/arch/x86/pci/broadcom_bus.c +++ b/arch/x86/pci/broadcom_bus.c @@ -50,8 +50,8 @@ static void __init cnb20le_res(u8 bus, u8 slot, u8 func) word1 = read_pci_config_16(bus, slot, func, 0xc0); word2 = read_pci_config_16(bus, slot, func, 0xc2); if (word1 != word2) { - res.start = (word1 << 16) | 0x0000; - res.end = (word2 << 16) | 0xffff; + res.start = ((resource_size_t) word1 << 16) | 0x0000; + res.end = ((resource_size_t) word2 << 16) | 0xffff; res.flags = IORESOURCE_MEM; update_res(info, res.start, res.end, res.flags, 0); } From c365c1b7dd522103a9849b81ff7014e662d65f2b Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Thu, 4 Oct 2018 20:27:16 -0700 Subject: [PATCH 1667/2608] powerpc/pseries: add of_node_put() in dlpar_detach_node() [ Upstream commit 5b3f5c408d8cc59b87e47f1ab9803dbd006e4a91 ] The previous commit, "of: overlay: add missing of_node_get() in __of_attach_node_sysfs" added a missing of_node_get() to __of_attach_node_sysfs(). This results in a refcount imbalance for nodes attached with dlpar_attach_node(). The calling sequence from dlpar_attach_node() to __of_attach_node_sysfs() is: dlpar_attach_node() of_attach_node() __of_attach_node_sysfs() For more detailed description of the node refcount, see commit 68baf692c435 ("powerpc/pseries: Fix of_node_put() underflow during DLPAR remove"). Tested-by: Alan Tull Acked-by: Michael Ellerman Signed-off-by: Frank Rowand Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/dlpar.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index e9149d05d30b..f4e6565dd7a9 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -284,6 +284,8 @@ int dlpar_detach_node(struct device_node *dn) if (rc) return rc; + of_node_put(dn); + return 0; } From e867d75658251211cba421522c3272c6cd1dabe6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 17 Oct 2018 21:37:58 -0700 Subject: [PATCH 1668/2608] crypto: aes_ti - disable interrupts while accessing S-box [ Upstream commit 0a6a40c2a8c184a2fb467efacfb1cd338d719e0b ] In the "aes-fixed-time" AES implementation, disable interrupts while accessing the S-box, in order to make cache-timing attacks more difficult. Previously it was possible for the CPU to be interrupted while the S-box was loaded into L1 cache, potentially evicting the cachelines and causing later table lookups to be time-variant. In tests I did on x86 and ARM, this doesn't affect performance significantly. Responsiveness is potentially a concern, but interrupts are only disabled for a single AES block. Note that even after this change, the implementation still isn't necessarily guaranteed to be constant-time; see https://cr.yp.to/antiforgery/cachetiming-20050414.pdf for a discussion of the many difficulties involved in writing truly constant-time AES software. But it's valuable to make such attacks more difficult. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/Kconfig | 3 ++- crypto/aes_ti.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 5579eb88d460..84f99f8eca4b 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -930,7 +930,8 @@ config CRYPTO_AES_TI 8 for decryption), this implementation only uses just two S-boxes of 256 bytes each, and attempts to eliminate data dependent latencies by prefetching the entire table into the cache at the start of each - block. + block. Interrupts are also disabled to avoid races where cachelines + are evicted when the CPU is interrupted to do something else. config CRYPTO_AES_586 tristate "AES cipher algorithms (i586)" diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c index 03023b2290e8..1ff9785b30f5 100644 --- a/crypto/aes_ti.c +++ b/crypto/aes_ti.c @@ -269,6 +269,7 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) const u32 *rkp = ctx->key_enc + 4; int rounds = 6 + ctx->key_length / 4; u32 st0[4], st1[4]; + unsigned long flags; int round; st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in); @@ -276,6 +277,12 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8); st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12); + /* + * Temporarily disable interrupts to avoid races where cachelines are + * evicted when the CPU is interrupted to do something else. + */ + local_irq_save(flags); + st0[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128]; st0[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160]; st0[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192]; @@ -300,6 +307,8 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4); put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8); put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12); + + local_irq_restore(flags); } static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) @@ -308,6 +317,7 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) const u32 *rkp = ctx->key_dec + 4; int rounds = 6 + ctx->key_length / 4; u32 st0[4], st1[4]; + unsigned long flags; int round; st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in); @@ -315,6 +325,12 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8); st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12); + /* + * Temporarily disable interrupts to avoid races where cachelines are + * evicted when the CPU is interrupted to do something else. + */ + local_irq_save(flags); + st0[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128]; st0[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160]; st0[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192]; @@ -339,6 +355,8 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4); put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8); put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12); + + local_irq_restore(flags); } static struct crypto_alg aes_alg = { From 326f64c25bd4472a6ae62e77b59d9bd4a0fd840f Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 9 Nov 2018 11:26:32 +0100 Subject: [PATCH 1669/2608] drm/vc4: ->x_scaling[1] should never be set to VC4_SCALING_NONE [ Upstream commit 0560054da5673b25d56bea6c57c8d069673af73b ] For the YUV conversion to work properly, ->x_scaling[1] should never be set to VC4_SCALING_NONE, but vc4_get_scaling_mode() might return VC4_SCALING_NONE if the horizontal scaling ratio exactly matches the horizontal subsampling factor. Add a test to turn VC4_SCALING_NONE into VC4_SCALING_PPF when that happens. The old ->x_scaling[0] adjustment is dropped as I couldn't find any mention to this constraint in the spec and it's proven to be unnecessary (I tested various multi-planar YUV formats with scaling disabled, and all of them worked fine without this adjustment). Fixes: fc04023fafec ("drm/vc4: Add support for YUV planes.") Signed-off-by: Boris Brezillon Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20181109102633.32603-1-boris.brezillon@bootlin.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vc4/vc4_plane.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 5bd3c2ef0067..6277a3f2d5d1 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -347,12 +347,14 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) vc4_get_scaling_mode(vc4_state->src_h[1], vc4_state->crtc_h); - /* YUV conversion requires that horizontal scaling be enabled, - * even on a plane that's otherwise 1:1. Looks like only PPF - * works in that case, so let's pick that one. + /* YUV conversion requires that horizontal scaling be enabled + * on the UV plane even if vc4_get_scaling_mode() returned + * VC4_SCALING_NONE (which can happen when the down-scaling + * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this + * case. */ - if (vc4_state->is_unity) - vc4_state->x_scaling[0] = VC4_SCALING_PPF; + if (vc4_state->x_scaling[1] == VC4_SCALING_NONE) + vc4_state->x_scaling[1] = VC4_SCALING_PPF; } else { vc4_state->is_yuv = false; vc4_state->x_scaling[1] = VC4_SCALING_NONE; From ba36191776b9ecf0a2b6d9caf1c65f315d5870c8 Mon Sep 17 00:00:00 2001 From: Andy Duan Date: Tue, 16 Oct 2018 07:32:22 +0000 Subject: [PATCH 1670/2608] serial: fsl_lpuart: clear parity enable bit when disable parity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 397bd9211fe014b347ca8f95a8f4e1017bac1aeb ] Current driver only enable parity enable bit and never clear it when user set the termios. The fix clear the parity enable bit when PARENB flag is not set in termios->c_cflag. Cc: Lukas Wunner Signed-off-by: Andy Duan Reviewed-by: Fabio Estevam Acked-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/fsl_lpuart.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index fd64ac2c1a74..716c33b2a11c 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -1482,6 +1482,8 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios, else cr1 &= ~UARTCR1_PT; } + } else { + cr1 &= ~UARTCR1_PE; } /* ask the core to calculate the divisor */ @@ -1694,6 +1696,8 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, else ctrl &= ~UARTCTRL_PT; } + } else { + ctrl &= ~UARTCTRL_PE; } /* ask the core to calculate the divisor */ From 4d703925ad2592abbeafccfa9e9047f4076247a0 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 9 Nov 2018 11:14:43 +0100 Subject: [PATCH 1671/2608] ptp: check gettime64 return code in PTP_SYS_OFFSET ioctl [ Upstream commit 83d0bdc7390b890905634186baaa294475cd6a06 ] If a gettime64 call fails, return the error and avoid copying data back to user. Cc: Richard Cochran Cc: Jacob Keller Signed-off-by: Miroslav Lichvar Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/ptp/ptp_chardev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index a421d6c551b6..ecb41eacd74b 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -228,7 +228,9 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) pct->sec = ts.tv_sec; pct->nsec = ts.tv_nsec; pct++; - ptp->info->gettime64(ptp->info, &ts); + err = ptp->info->gettime64(ptp->info, &ts); + if (err) + goto out; pct->sec = ts.tv_sec; pct->nsec = ts.tv_nsec; pct++; @@ -281,6 +283,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) break; } +out: kfree(sysoff); return err; } From 88eb7bc5764846e9e363ae66ed74ccc559d96338 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 10 Nov 2018 00:12:06 +0000 Subject: [PATCH 1672/2608] MIPS: Boston: Disable EG20T prefetch [ Upstream commit 5ec17af7ead09701e23d2065e16db6ce4e137289 ] The Intel EG20T Platform Controller Hub used on the MIPS Boston development board supports prefetching memory to optimize DMA transfers. Unfortunately for unknown reasons this doesn't work well with some MIPS CPUs such as the P6600, particularly when using an I/O Coherence Unit (IOCU) to provide cache-coherent DMA. In these systems it is common for DMA data to be lost, resulting in broken access to EG20T devices such as the MMC or SATA controllers. Support for a DT property to configure the prefetching was added a while back by commit 549ce8f134bd ("misc: pch_phub: Read prefetch value from device tree if passed") but we never added the DT snippet to make use of it. Add that now in order to disable the prefetching & fix DMA on the affected systems. Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/21068/ Cc: linux-mips@linux-mips.org Signed-off-by: Sasha Levin --- arch/mips/boot/dts/img/boston.dts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/mips/boot/dts/img/boston.dts b/arch/mips/boot/dts/img/boston.dts index f7aad80c69ab..bebb0fa21369 100644 --- a/arch/mips/boot/dts/img/boston.dts +++ b/arch/mips/boot/dts/img/boston.dts @@ -141,6 +141,12 @@ #size-cells = <2>; #interrupt-cells = <1>; + eg20t_phub@2,0,0 { + compatible = "pci8086,8801"; + reg = <0x00020000 0 0 0 0>; + intel,eg20t-prefetch = <0>; + }; + eg20t_mac@2,0,1 { compatible = "pci8086,8802"; reg = <0x00020100 0 0 0 0>; From cea8d9e424bd42ee7305c1b41d7f6b2846e472fb Mon Sep 17 00:00:00 2001 From: Matheus Tavares Date: Sat, 3 Nov 2018 19:49:44 -0300 Subject: [PATCH 1673/2608] staging:iio:ad2s90: Make probe handle spi_setup failure [ Upstream commit b3a3eafeef769c6982e15f83631dcbf8d1794efb ] Previously, ad2s90_probe ignored the return code from spi_setup, not handling its possible failure. This patch makes ad2s90_probe check if the code is an error code and, if so, do the following: - Call dev_err with an appropriate error message. - Return the spi_setup's error code. Note: The 'return ret' statement could be out of the 'if' block, but this whole block will be moved up in the function in the patch: 'staging:iio:ad2s90: Move device registration to the end of probe'. Signed-off-by: Matheus Tavares Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/staging/iio/resolver/ad2s90.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/staging/iio/resolver/ad2s90.c b/drivers/staging/iio/resolver/ad2s90.c index b2270908f26f..cbee9ad00f0d 100644 --- a/drivers/staging/iio/resolver/ad2s90.c +++ b/drivers/staging/iio/resolver/ad2s90.c @@ -86,7 +86,12 @@ static int ad2s90_probe(struct spi_device *spi) /* need 600ns between CS and the first falling edge of SCLK */ spi->max_speed_hz = 830000; spi->mode = SPI_MODE_3; - spi_setup(spi); + ret = spi_setup(spi); + + if (ret < 0) { + dev_err(&spi->dev, "spi_setup failed!\n"); + return ret; + } return 0; } From 21c53f73ea03b20cebc9a4d23a39abf05e58dfd9 Mon Sep 17 00:00:00 2001 From: Andreas Puhm Date: Wed, 7 Nov 2018 11:51:47 -0600 Subject: [PATCH 1674/2608] fpga: altera-cvp: Fix registration for CvP incapable devices [ Upstream commit 68f60538daa4bc3da5d0764d46f391916fba20fd ] The probe function needs to verify the CvP enable bit in order to properly determine if FPGA Manager functionality can be safely enabled. Fixes: 34d1dc17ce97 ("fpga manager: Add Altera CvP driver") Signed-off-by: Andreas Puhm Signed-off-by: Anatolij Gustschin Reviewed-by: Moritz Fischer Acked-by: Alan Tull Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/fpga/altera-cvp.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/fpga/altera-cvp.c b/drivers/fpga/altera-cvp.c index 00e73d28077c..b7558acd1a66 100644 --- a/drivers/fpga/altera-cvp.c +++ b/drivers/fpga/altera-cvp.c @@ -404,6 +404,7 @@ static int altera_cvp_probe(struct pci_dev *pdev, { struct altera_cvp_conf *conf; u16 cmd, val; + u32 regval; int ret; /* @@ -417,6 +418,14 @@ static int altera_cvp_probe(struct pci_dev *pdev, return -ENODEV; } + pci_read_config_dword(pdev, VSE_CVP_STATUS, ®val); + if (!(regval & VSE_CVP_STATUS_CVP_EN)) { + dev_err(&pdev->dev, + "CVP is disabled for this device: CVP_STATUS Reg 0x%x\n", + regval); + return -ENODEV; + } + conf = devm_kzalloc(&pdev->dev, sizeof(*conf), GFP_KERNEL); if (!conf) return -ENOMEM; From 1081bfdb18d7a02541e010ce0fb6a1642a13dc6e Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 18 Oct 2018 05:09:32 +0000 Subject: [PATCH 1675/2608] Tools: hv: kvp: Fix a warning of buffer overflow with gcc 8.0.1 [ Upstream commit 4fcba7802c3e15a6e56e255871d6c72f829b9dd8 ] The patch fixes: hv_kvp_daemon.c: In function 'kvp_set_ip_info': hv_kvp_daemon.c:1305:2: note: 'snprintf' output between 41 and 4136 bytes into a destination of size 4096 The "(unsigned int)str_len" is to avoid: hv_kvp_daemon.c:1309:30: warning: comparison of integer expressions of different signedness: 'int' and 'long unsigned int' [-Wsign-compare] Signed-off-by: Dexuan Cui Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- tools/hv/hv_kvp_daemon.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 3965186b375a..62c9a503ae05 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -1172,6 +1172,7 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) FILE *file; char cmd[PATH_MAX]; char *mac_addr; + int str_len; /* * Set the configuration for the specified interface with @@ -1295,8 +1296,18 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * invoke the external script to do its magic. */ - snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s", - "hv_set_ifconfig", if_file); + str_len = snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s", + "hv_set_ifconfig", if_file); + /* + * This is a little overcautious, but it's necessary to suppress some + * false warnings from gcc 8.0.1. + */ + if (str_len <= 0 || (unsigned int)str_len >= sizeof(cmd)) { + syslog(LOG_ERR, "Cmd '%s' (len=%d) may be too long", + cmd, str_len); + return HV_E_FAIL; + } + if (system(cmd)) { syslog(LOG_ERR, "Failed to execute cmd '%s'; error: %d %s", cmd, errno, strerror(errno)); From d5261a9b946352da750a5cadb344d08a2d2463d1 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 7 Nov 2018 18:49:39 -0800 Subject: [PATCH 1676/2608] platform/chrome: don't report EC_MKBP_EVENT_SENSOR_FIFO as wakeup [ Upstream commit 6ad16b78a039b45294b1ad5d69c14ac57b2fe706 ] EC_MKBP_EVENT_SENSOR_FIFO events can be triggered for a variety of reasons, and there are very few cases in which they should be treated as wakeup interrupts (particularly, when a certain MOTIONSENSE_MODULE_FLAG_* is set, but this is not even supported in the mainline cros_ec_sensor driver yet). Most of the time, they are benign sensor readings. In any case, the top-level cros_ec device doesn't know enough to determine that they should wake the system, and so it should not report the event. This would be the job of the cros_ec_sensors driver to parse. This patch adds checks to cros_ec_get_next_event() such that it doesn't signal 'wakeup' for events of type EC_MKBP_EVENT_SENSOR_FIFO. This patch is particularly relevant on devices like Scarlet (Rockchip RK3399 tablet, known as Acer Chromebook Tab 10), where the EC firmware reports sensor events much more frequently. This was causing /sys/power/wakeup_count to increase very frequently, often needlessly interrupting our ability to suspend the system. Signed-off-by: Brian Norris Signed-off-by: Benson Leung Signed-off-by: Sasha Levin --- drivers/platform/chrome/cros_ec_proto.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index e7bbdf947bbc..2ac4a7178470 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -551,6 +551,7 @@ static int get_keyboard_state_event(struct cros_ec_device *ec_dev) int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event) { + u8 event_type; u32 host_event; int ret; @@ -570,11 +571,22 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event) return ret; if (wake_event) { + event_type = ec_dev->event_data.event_type; host_event = cros_ec_get_host_event(ec_dev); - /* Consider non-host_event as wake event */ - *wake_event = !host_event || - !!(host_event & ec_dev->host_event_wake_mask); + /* + * Sensor events need to be parsed by the sensor sub-device. + * Defer them, and don't report the wakeup here. + */ + if (event_type == EC_MKBP_EVENT_SENSOR_FIFO) + *wake_event = false; + /* Masked host-events should not count as wake events. */ + else if (host_event && + !(host_event & ec_dev->host_event_wake_mask)) + *wake_event = false; + /* Consider all other events as wake events. */ + else + *wake_event = true; } return ret; From 0576a43acfb446c852915657845c1a289afeeeb5 Mon Sep 17 00:00:00 2001 From: Renato Lui Geh Date: Mon, 5 Nov 2018 17:14:58 -0200 Subject: [PATCH 1677/2608] staging: iio: ad7780: update voltage on read [ Upstream commit 336650c785b62c3bea7c8cf6061c933a90241f67 ] The ad7780 driver previously did not read the correct device output, as it read an outdated value set at initialization. It now updates its voltage on read. Signed-off-by: Renato Lui Geh Acked-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/staging/iio/adc/ad7780.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/staging/iio/adc/ad7780.c b/drivers/staging/iio/adc/ad7780.c index dec3ba6eba8a..52613f6a9dd8 100644 --- a/drivers/staging/iio/adc/ad7780.c +++ b/drivers/staging/iio/adc/ad7780.c @@ -87,12 +87,16 @@ static int ad7780_read_raw(struct iio_dev *indio_dev, long m) { struct ad7780_state *st = iio_priv(indio_dev); + int voltage_uv; switch (m) { case IIO_CHAN_INFO_RAW: return ad_sigma_delta_single_conversion(indio_dev, chan, val); case IIO_CHAN_INFO_SCALE: - *val = st->int_vref_mv * st->gain; + voltage_uv = regulator_get_voltage(st->reg); + if (voltage_uv < 0) + return voltage_uv; + *val = (voltage_uv / 1000) * st->gain; *val2 = chan->scan_type.realbits - 1; return IIO_VAL_FRACTIONAL_LOG2; case IIO_CHAN_INFO_OFFSET: From e6a73f46ddfcecb404e5e2401c3da2c5ce9b3d76 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 14 Nov 2018 11:50:19 +0000 Subject: [PATCH 1678/2608] usbnet: smsc95xx: fix rx packet alignment [ Upstream commit 810eeb1f41a9a272eedc94ca18c072e75678ede4 ] The smsc95xx driver already takes into account the NET_IP_ALIGN parameter when setting up the receive packet data, which means we do not need to worry about aligning the packets in the usbnet driver. Adding the EVENT_NO_IP_ALIGN means that the IPv4 header is now passed to the ip_rcv() routine with the start on an aligned address. Tested on Raspberry Pi B3. Signed-off-by: Ben Dooks Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/smsc95xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 2f65975a121f..fc48da1c702d 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1295,6 +1295,7 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->features |= NETIF_F_RXCSUM; dev->net->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM; + set_bit(EVENT_NO_IP_ALIGN, &dev->flags); smsc95xx_init_mac_address(dev); From 96a4a6a7fe18da4ee64e6097cc38acd7fc3aa342 Mon Sep 17 00:00:00 2001 From: Damian Kos Date: Tue, 6 Nov 2018 15:37:05 +0000 Subject: [PATCH 1679/2608] drm/rockchip: fix for mailbox read size [ Upstream commit fa68d4f8476bea4cdf441062b614b41bb85ef1da ] Some of the functions (like cdn_dp_dpcd_read, cdn_dp_get_edid_block) allow to read 64KiB, but the cdn_dp_mailbox_read_receive, that is used by them, can read only up to 255 bytes at once. Normally, it's not a big issue as DPCD or EDID reads won't (hopefully) exceed that value. The real issue here is the revocation list read during the HDCP authentication process. (problematic use case: https://chromium.googlesource.com/chromiumos/third_party/kernel/+/chromeos-4.4/drivers/gpu/drm/rockchip/cdn-dp-reg.c#1152) The list can reach 127*5+4 bytes (num devs * 5 bytes per ID/Bksv + 4 bytes of an additional info). In other words - CTSes with HDCP Repeater won't pass without this fix. Oh, and the driver will most likely stop working (best case scenario). Signed-off-by: Damian Kos Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/1541518625-25984-1-git-send-email-dkos@cadence.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/rockchip/cdn-dp-reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/cdn-dp-reg.c b/drivers/gpu/drm/rockchip/cdn-dp-reg.c index b14d211f6c21..0ed7e91471f6 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-reg.c +++ b/drivers/gpu/drm/rockchip/cdn-dp-reg.c @@ -147,7 +147,7 @@ static int cdn_dp_mailbox_validate_receive(struct cdn_dp_device *dp, } static int cdn_dp_mailbox_read_receive(struct cdn_dp_device *dp, - u8 *buff, u8 buff_size) + u8 *buff, u16 buff_size) { u32 i; int ret; From 4f558b831736eb441c45056be10963b1e6aa53e6 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 17 Oct 2018 17:52:07 -0700 Subject: [PATCH 1680/2608] ARM: OMAP2+: hwmod: Fix some section annotations [ Upstream commit c10b26abeb53cabc1e6271a167d3f3d396ce0218 ] When building the kernel with Clang, the following section mismatch warnings appears: WARNING: vmlinux.o(.text+0x2d398): Section mismatch in reference from the function _setup() to the function .init.text:_setup_iclk_autoidle() The function _setup() references the function __init _setup_iclk_autoidle(). This is often because _setup lacks a __init annotation or the annotation of _setup_iclk_autoidle is wrong. WARNING: vmlinux.o(.text+0x2d3a0): Section mismatch in reference from the function _setup() to the function .init.text:_setup_reset() The function _setup() references the function __init _setup_reset(). This is often because _setup lacks a __init annotation or the annotation of _setup_reset is wrong. WARNING: vmlinux.o(.text+0x2d408): Section mismatch in reference from the function _setup() to the function .init.text:_setup_postsetup() The function _setup() references the function __init _setup_postsetup(). This is often because _setup lacks a __init annotation or the annotation of _setup_postsetup is wrong. _setup is used in omap_hwmod_allocate_module, which isn't marked __init and looks like it shouldn't be, meaning to fix these warnings, those functions must be moved out of the init section, which this patch does. Signed-off-by: Nathan Chancellor Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/omap_hwmod.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 2dbd63239c54..45c8f2ef4e23 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2497,7 +2497,7 @@ static int __init _init(struct omap_hwmod *oh, void *data) * a stub; implementing this properly requires iclk autoidle usecounting in * the clock code. No return value. */ -static void __init _setup_iclk_autoidle(struct omap_hwmod *oh) +static void _setup_iclk_autoidle(struct omap_hwmod *oh) { struct omap_hwmod_ocp_if *os; @@ -2528,7 +2528,7 @@ static void __init _setup_iclk_autoidle(struct omap_hwmod *oh) * reset. Returns 0 upon success or a negative error code upon * failure. */ -static int __init _setup_reset(struct omap_hwmod *oh) +static int _setup_reset(struct omap_hwmod *oh) { int r; @@ -2589,7 +2589,7 @@ static int __init _setup_reset(struct omap_hwmod *oh) * * No return value. */ -static void __init _setup_postsetup(struct omap_hwmod *oh) +static void _setup_postsetup(struct omap_hwmod *oh) { u8 postsetup_state; From 17c8a0d7e13bae92cc819c9060a0d39b3ce860c5 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:31 -0800 Subject: [PATCH 1681/2608] net/mlx5: EQ, Use the right place to store/read IRQ affinity hint [ Upstream commit 1e86ace4c140fd5a693e266c9b23409358f25381 ] Currently the cpu affinity hint mask for completion EQs is stored and read from the wrong place, since reading and storing is done from the same index, there is no actual issue with that, but internal irq_info for completion EQs stars at MLX5_EQ_VEC_COMP_BASE offset in irq_info array, this patch changes the code to use the correct offset to store and read the IRQ affinity hint. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 14 ++++++++------ include/linux/mlx5/driver.h | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index bf34264c734b..14bab8a5550d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1605,7 +1605,7 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq) static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) { - return cpumask_first(priv->mdev->priv.irq_info[ix].mask); + return cpumask_first(priv->mdev->priv.irq_info[ix + MLX5_EQ_VEC_COMP_BASE].mask); } static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e99f1382a4f0..558fc6a05e2a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -619,18 +619,19 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev) static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) { struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + int irq = pci_irq_vector(mdev->pdev, vecidx); - if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { + if (!zalloc_cpumask_var(&priv->irq_info[vecidx].mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); return -ENOMEM; } cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - priv->irq_info[i].mask); + priv->irq_info[vecidx].mask); if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, priv->irq_info[i].mask)) + irq_set_affinity_hint(irq, priv->irq_info[vecidx].mask)) mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); return 0; @@ -638,11 +639,12 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) { + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); + int irq = pci_irq_vector(mdev->pdev, vecidx); irq_set_affinity_hint(irq, NULL); - free_cpumask_var(priv->irq_info[i].mask); + free_cpumask_var(priv->irq_info[vecidx].mask); } static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index fb677e4f902d..88f0c530fe9c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1195,7 +1195,7 @@ enum { static inline const struct cpumask * mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector) { - return dev->priv.irq_info[vector].mask; + return dev->priv.irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; } #endif /* MLX5_DRIVER_H */ From 20a7ded03f97005cd282f419bf35f22ab2ec74fc Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 23 Oct 2018 15:15:35 -0700 Subject: [PATCH 1682/2608] modpost: validate symbol names also in find_elf_symbol [ Upstream commit 5818c683a619c534c113e1f66d24f636defc29bc ] If an ARM mapping symbol shares an address with a valid symbol, find_elf_symbol can currently return the mapping symbol instead, as the symbol is not validated. This can result in confusing warnings: WARNING: vmlinux.o(.text+0x18f4028): Section mismatch in reference from the function set_reset_devices() to the variable .init.text:$x.0 This change adds a call to is_valid_name to find_elf_symbol, similarly to how it's already used in find_elf_symbol2. Signed-off-by: Sami Tolvanen Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/mod/modpost.c | 50 ++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 18bc8738e989..e36a673833ae 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1215,6 +1215,30 @@ static int secref_whitelist(const struct sectioncheck *mismatch, return 1; } +static inline int is_arm_mapping_symbol(const char *str) +{ + return str[0] == '$' && strchr("axtd", str[1]) + && (str[2] == '\0' || str[2] == '.'); +} + +/* + * If there's no name there, ignore it; likewise, ignore it if it's + * one of the magic symbols emitted used by current ARM tools. + * + * Otherwise if find_symbols_between() returns those symbols, they'll + * fail the whitelist tests and cause lots of false alarms ... fixable + * only by merging __exit and __init sections into __text, bloating + * the kernel (which is especially evil on embedded platforms). + */ +static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym) +{ + const char *name = elf->strtab + sym->st_name; + + if (!name || !strlen(name)) + return 0; + return !is_arm_mapping_symbol(name); +} + /** * Find symbol based on relocation record info. * In some cases the symbol supplied is a valid symbol so @@ -1240,6 +1264,8 @@ static Elf_Sym *find_elf_symbol(struct elf_info *elf, Elf64_Sword addr, continue; if (ELF_ST_TYPE(sym->st_info) == STT_SECTION) continue; + if (!is_valid_name(elf, sym)) + continue; if (sym->st_value == addr) return sym; /* Find a symbol nearby - addr are maybe negative */ @@ -1258,30 +1284,6 @@ static Elf_Sym *find_elf_symbol(struct elf_info *elf, Elf64_Sword addr, return NULL; } -static inline int is_arm_mapping_symbol(const char *str) -{ - return str[0] == '$' && strchr("axtd", str[1]) - && (str[2] == '\0' || str[2] == '.'); -} - -/* - * If there's no name there, ignore it; likewise, ignore it if it's - * one of the magic symbols emitted used by current ARM tools. - * - * Otherwise if find_symbols_between() returns those symbols, they'll - * fail the whitelist tests and cause lots of false alarms ... fixable - * only by merging __exit and __init sections into __text, bloating - * the kernel (which is especially evil on embedded platforms). - */ -static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym) -{ - const char *name = elf->strtab + sym->st_name; - - if (!name || !strlen(name)) - return 0; - return !is_arm_mapping_symbol(name); -} - /* * Find symbols before or equal addr and after addr - in the section sec. * If we find two symbols with equal offset prefer one with a valid name. From 3ad74a75e8f1e165f74caf0c351d6537b58e34ac Mon Sep 17 00:00:00 2001 From: Pu Wen Date: Mon, 12 Nov 2018 15:40:51 +0800 Subject: [PATCH 1683/2608] perf tools: Add Hygon Dhyana support [ Upstream commit 4787eff3fa88f62fede6ed7afa06477ae6bf984d ] The tool perf is useful for the performance analysis on the Hygon Dhyana platform. But right now there is no Hygon support for it to analyze the KVM guest os data. So add Hygon Dhyana support to it by checking vendor string to share the code path of AMD. Signed-off-by: Pu Wen Acked-by: Borislav Petkov Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1542008451-31735-1-git-send-email-puwen@hygon.cn Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/arch/x86/util/kvm-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c index b32409a0e546..081353d7b095 100644 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ b/tools/perf/arch/x86/util/kvm-stat.c @@ -156,7 +156,7 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid) if (strstr(cpuid, "Intel")) { kvm->exit_reasons = vmx_exit_reasons; kvm->exit_reasons_isa = "VMX"; - } else if (strstr(cpuid, "AMD")) { + } else if (strstr(cpuid, "AMD") || strstr(cpuid, "Hygon")) { kvm->exit_reasons = svm_exit_reasons; kvm->exit_reasons_isa = "SVM"; } else From ddfc290f6bd89ce384a1ea529f0a2546e2972659 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 21 Nov 2018 07:49:12 -0500 Subject: [PATCH 1684/2608] soc/tegra: Don't leak device tree node reference [ Upstream commit 9eb40fa2cd2d1f6829e7b49bb22692f754b9cfe0 ] of_find_node_by_path() acquires a reference to the node returned by it and that reference needs to be dropped by its caller. soc_is_tegra() doesn't do that, so fix it. Signed-off-by: Yangtao Li Acked-by: Jon Hunter [treding: slightly rewrite to avoid inline comparison] Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/soc/tegra/common.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/soc/tegra/common.c b/drivers/soc/tegra/common.c index cd8f41351add..7bfb154d6fa5 100644 --- a/drivers/soc/tegra/common.c +++ b/drivers/soc/tegra/common.c @@ -22,11 +22,15 @@ static const struct of_device_id tegra_machine_match[] = { bool soc_is_tegra(void) { + const struct of_device_id *match; struct device_node *root; root = of_find_node_by_path("/"); if (!root) return false; - return of_match_node(tegra_machine_match, root) != NULL; + match = of_match_node(tegra_machine_match, root); + of_node_put(root); + + return match != NULL; } From 0049727a4b991b227c03d43c6530fcbe3d2dc6fe Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 20 Oct 2018 13:50:19 -0400 Subject: [PATCH 1685/2608] media: mtk-vcodec: Release device nodes in mtk_vcodec_init_enc_pm() [ Upstream commit 8ea0f2ba0fa3f91ea1b8d823a54b042026ada6b3 ] of_parse_phandle() returns the device node with refcount incremented. There are two nodes that are used temporary in mtk_vcodec_init_enc_pm(), but their refcounts are not decremented. The patch adds one of_node_put() and fixes returning error codes. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_pm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_pm.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_pm.c index 3e73e9db781f..7c025045ea90 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_pm.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_pm.c @@ -41,25 +41,27 @@ int mtk_vcodec_init_enc_pm(struct mtk_vcodec_dev *mtkdev) node = of_parse_phandle(dev->of_node, "mediatek,larb", 0); if (!node) { mtk_v4l2_err("no mediatek,larb found"); - return -1; + return -ENODEV; } pdev = of_find_device_by_node(node); + of_node_put(node); if (!pdev) { mtk_v4l2_err("no mediatek,larb device found"); - return -1; + return -ENODEV; } pm->larbvenc = &pdev->dev; node = of_parse_phandle(dev->of_node, "mediatek,larb", 1); if (!node) { mtk_v4l2_err("no mediatek,larb found"); - return -1; + return -ENODEV; } pdev = of_find_device_by_node(node); + of_node_put(node); if (!pdev) { mtk_v4l2_err("no mediatek,larb device found"); - return -1; + return -ENODEV; } pm->larbvenclt = &pdev->dev; From c7d8d629fd6998675278471cc8d6ffd7de6efc41 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 23 Nov 2018 09:54:55 +0800 Subject: [PATCH 1686/2608] ptp: Fix pass zero to ERR_PTR() in ptp_clock_register [ Upstream commit aea0a897af9e44c258e8ab9296fad417f1bc063a ] Fix smatch warning: drivers/ptp/ptp_clock.c:298 ptp_clock_register() warn: passing zero to 'ERR_PTR' 'err' should be set while device_create_with_groups and pps_register_source fails Fixes: 85a66e550195 ("ptp: create "pins" together with the rest of attributes") Signed-off-by: YueHaibing Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/ptp/ptp_clock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 7eacc1c4b3b1..c64903a5978f 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -253,8 +253,10 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid, ptp, ptp->pin_attr_groups, "ptp%d", ptp->index); - if (IS_ERR(ptp->dev)) + if (IS_ERR(ptp->dev)) { + err = PTR_ERR(ptp->dev); goto no_device; + } /* Register a new PPS source. */ if (info->pps) { @@ -265,6 +267,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, pps.owner = info->owner; ptp->pps_source = pps_register_source(&pps, PTP_PPS_DEFAULTS); if (!ptp->pps_source) { + err = -EINVAL; pr_err("failed to register pps source\n"); goto no_pps; } From 1a0924634b8d4b7ac726251354ecf264f0014d07 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 25 Oct 2018 11:05:25 -0700 Subject: [PATCH 1687/2608] dmaengine: xilinx_dma: Remove __aligned attribute on zynqmp_dma_desc_ll [ Upstream commit aeaebcc17cdf37065d2693865eeb1ff1c7dc5bf3 ] Clang warns: drivers/dma/xilinx/zynqmp_dma.c:166:4: warning: attribute 'aligned' is ignored, place it after "struct" to apply attribute to type declaration [-Wignored-attributes] }; __aligned(64) ^ ./include/linux/compiler_types.h:200:38: note: expanded from macro '__aligned' ^ 1 warning generated. As Nick pointed out in the previous version of this patch, the author likely intended for this struct to be 8-byte (64-bit) aligned, not 64-byte, which is the default. Remove the hanging __aligned attribute. Fixes: b0cc417c1637 ("dmaengine: Add Xilinx zynqmp dma engine driver support") Reported-by: Nick Desaulniers Suggested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/xilinx/zynqmp_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c index 5cc8ed31f26b..6d86d05e53aa 100644 --- a/drivers/dma/xilinx/zynqmp_dma.c +++ b/drivers/dma/xilinx/zynqmp_dma.c @@ -159,7 +159,7 @@ struct zynqmp_dma_desc_ll { u32 ctrl; u64 nxtdscraddr; u64 rsvd; -}; __aligned(64) +}; /** * struct zynqmp_dma_desc_sw - Per Transaction structure From 723174b41234f3138137d2a3018d77de2733baf4 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Thu, 22 Nov 2018 08:46:43 +0100 Subject: [PATCH 1688/2608] iio: adc: meson-saradc: check for devm_kasprintf failure [ Upstream commit aad172b017617994343e36d8659c69e14cd694fd ] devm_kasprintf() may return NULL on failure of internal allocation thus the assignments to init.name are not safe if not checked. On error meson_sar_adc_clk_init() returns negative values so -ENOMEM in the (unlikely) failure case of devm_kasprintf() should be fine here. Signed-off-by: Nicholas Mc Guire Fixes: 3adbf3427330 ("iio: adc: add a driver for the SAR ADC found in Amlogic Meson SoCs") Acked-by: Martin Blumenstingl Tested-by: Martin Blumenstingl Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/meson_saradc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index 11484cb38b84..9a52a91166d2 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -585,6 +585,9 @@ static int meson_sar_adc_clk_init(struct iio_dev *indio_dev, init.name = devm_kasprintf(&indio_dev->dev, GFP_KERNEL, "%pOF#adc_div", indio_dev->dev.of_node); + if (!init.name) + return -ENOMEM; + init.flags = 0; init.ops = &clk_divider_ops; clk_parents[0] = __clk_get_name(priv->clkin); @@ -604,6 +607,9 @@ static int meson_sar_adc_clk_init(struct iio_dev *indio_dev, init.name = devm_kasprintf(&indio_dev->dev, GFP_KERNEL, "%pOF#adc_en", indio_dev->dev.of_node); + if (!init.name) + return -ENOMEM; + init.flags = CLK_SET_RATE_PARENT; init.ops = &clk_gate_ops; clk_parents[0] = __clk_get_name(priv->adc_div_clk); From 47726c3cbe9ec8877752dd2754c21458d8078069 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Thu, 22 Nov 2018 23:01:11 +0100 Subject: [PATCH 1689/2608] iio: adc: meson-saradc: fix internal clock names [ Upstream commit 50314f98b0ac468218e7c9af8c99f215a35436df ] Before this patch we are registering the internal clocks (for example on Meson8b, where the SAR ADC IP block implements the divider and gate clocks) with the following names: - /soc/cbus@c1100000/adc@8680#adc_div - /soc/cbus@c1100000/adc@8680#adc_en This is bad because the common clock framework uses the clock to create a directory in /clk. With such name, the directory creation (silently) fails and the debugfs entry ends up being created at the debugfs root. With this change, the new clock names are: - c1108680.adc#adc_div - c1108680.adc#adc_en This matches the clock naming scheme used in the PWM, Ethernet and MMC drivers. It also fixes the problem with debugfs. The idea is shamelessly taken from commit b96e9eb62841c5 ("pwm: meson: Fix mux clock names"). Fixes: 3921db46a8c5bc ("iio: Convert to using %pOF instead of full_name") Signed-off-by: Martin Blumenstingl Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/meson_saradc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index 9a52a91166d2..2515badf8b28 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -583,8 +583,8 @@ static int meson_sar_adc_clk_init(struct iio_dev *indio_dev, struct clk_init_data init; const char *clk_parents[1]; - init.name = devm_kasprintf(&indio_dev->dev, GFP_KERNEL, "%pOF#adc_div", - indio_dev->dev.of_node); + init.name = devm_kasprintf(&indio_dev->dev, GFP_KERNEL, "%s#adc_div", + dev_name(indio_dev->dev.parent)); if (!init.name) return -ENOMEM; @@ -605,8 +605,8 @@ static int meson_sar_adc_clk_init(struct iio_dev *indio_dev, if (WARN_ON(IS_ERR(priv->adc_div_clk))) return PTR_ERR(priv->adc_div_clk); - init.name = devm_kasprintf(&indio_dev->dev, GFP_KERNEL, "%pOF#adc_en", - indio_dev->dev.of_node); + init.name = devm_kasprintf(&indio_dev->dev, GFP_KERNEL, "%s#adc_en", + dev_name(indio_dev->dev.parent)); if (!init.name) return -ENOMEM; From f861389477c224be796ae3f28a5a720e140adc08 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 20 Nov 2018 12:10:14 +0100 Subject: [PATCH 1690/2608] iio: accel: kxcjk1013: Add KIOX010A ACPI Hardware-ID [ Upstream commit 7f6232e69539971cf9eaed07a6c14ab4a2361133 ] Various 2-in-1's use KIOX010A and KIOX020A as HIDs for 2 KXCJ91008 accelerometers. The KIOX010A HID is for the one in the base and the KIOX020A for the accelerometer in the keyboard. Since userspace does not have a way yet to deal with (or ignore) the accelerometer in the keyboard, this commit just adds the KIOX010A HID for now so that display rotation will work. Related: https://github.com/hadess/iio-sensor-proxy/issues/166 Signed-off-by: Hans de Goede Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/accel/kxcjk-1013.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index 3f968c46e667..784636800361 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -1393,6 +1393,7 @@ static const struct acpi_device_id kx_acpi_match[] = { {"KXCJ1008", KXCJ91008}, {"KXCJ9000", KXCJ91008}, {"KIOX000A", KXCJ91008}, + {"KIOX010A", KXCJ91008}, /* KXCJ91008 inside the display of a 2-in-1 */ {"KXTJ1009", KXTJ21009}, {"SMO8500", KXCJ91008}, { }, From cdde22f8d12e128b3db4606341bc089268794a31 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 8 Nov 2018 04:51:51 -0500 Subject: [PATCH 1691/2608] media: adv*/tc358743/ths8200: fill in min width/height/pixelclock [ Upstream commit 2912289a518077ddb8214e05336700148e97e235 ] The v4l2_dv_timings_cap struct is used to do sanity checks when setting and enumerating DV timings, ensuring that only valid timings as per the HW capabilities are allowed. However, many drivers just filled in 0 for the minimum width, height or pixelclock frequency. This can cause timings with e.g. 0 as width and height to be accepted, which will in turn lead to a potential division by zero. Fill in proper values are minimum boundaries. 640x350 was chosen since it is the smallest resolution in v4l2-dv-timings.h. Same for 13 MHz as the lowest pixelclock frequency (it's slightly below the minimum of 13.5 MHz in the v4l2-dv-timings.h header). Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/i2c/ad9389b.c | 2 +- drivers/media/i2c/adv7511.c | 2 +- drivers/media/i2c/adv7604.c | 4 ++-- drivers/media/i2c/adv7842.c | 4 ++-- drivers/media/i2c/tc358743.c | 2 +- drivers/media/i2c/ths8200.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/media/i2c/ad9389b.c b/drivers/media/i2c/ad9389b.c index a056d6cdaaaa..f0b200ae2127 100644 --- a/drivers/media/i2c/ad9389b.c +++ b/drivers/media/i2c/ad9389b.c @@ -590,7 +590,7 @@ static const struct v4l2_dv_timings_cap ad9389b_timings_cap = { .type = V4L2_DV_BT_656_1120, /* keep this initialization for compatibility with GCC < 4.4.6 */ .reserved = { 0 }, - V4L2_INIT_BT_TIMINGS(0, 1920, 0, 1200, 25000000, 170000000, + V4L2_INIT_BT_TIMINGS(640, 1920, 350, 1200, 25000000, 170000000, V4L2_DV_BT_STD_CEA861 | V4L2_DV_BT_STD_DMT | V4L2_DV_BT_STD_GTF | V4L2_DV_BT_STD_CVT, V4L2_DV_BT_CAP_PROGRESSIVE | V4L2_DV_BT_CAP_REDUCED_BLANKING | diff --git a/drivers/media/i2c/adv7511.c b/drivers/media/i2c/adv7511.c index 2817bafc67bf..80c20404334a 100644 --- a/drivers/media/i2c/adv7511.c +++ b/drivers/media/i2c/adv7511.c @@ -142,7 +142,7 @@ static const struct v4l2_dv_timings_cap adv7511_timings_cap = { .type = V4L2_DV_BT_656_1120, /* keep this initialization for compatibility with GCC < 4.4.6 */ .reserved = { 0 }, - V4L2_INIT_BT_TIMINGS(0, ADV7511_MAX_WIDTH, 0, ADV7511_MAX_HEIGHT, + V4L2_INIT_BT_TIMINGS(640, ADV7511_MAX_WIDTH, 350, ADV7511_MAX_HEIGHT, ADV7511_MIN_PIXELCLOCK, ADV7511_MAX_PIXELCLOCK, V4L2_DV_BT_STD_CEA861 | V4L2_DV_BT_STD_DMT | V4L2_DV_BT_STD_GTF | V4L2_DV_BT_STD_CVT, diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c index f289b8aca1da..d2108aad3c65 100644 --- a/drivers/media/i2c/adv7604.c +++ b/drivers/media/i2c/adv7604.c @@ -778,7 +778,7 @@ static const struct v4l2_dv_timings_cap adv7604_timings_cap_analog = { .type = V4L2_DV_BT_656_1120, /* keep this initialization for compatibility with GCC < 4.4.6 */ .reserved = { 0 }, - V4L2_INIT_BT_TIMINGS(0, 1920, 0, 1200, 25000000, 170000000, + V4L2_INIT_BT_TIMINGS(640, 1920, 350, 1200, 25000000, 170000000, V4L2_DV_BT_STD_CEA861 | V4L2_DV_BT_STD_DMT | V4L2_DV_BT_STD_GTF | V4L2_DV_BT_STD_CVT, V4L2_DV_BT_CAP_PROGRESSIVE | V4L2_DV_BT_CAP_REDUCED_BLANKING | @@ -789,7 +789,7 @@ static const struct v4l2_dv_timings_cap adv76xx_timings_cap_digital = { .type = V4L2_DV_BT_656_1120, /* keep this initialization for compatibility with GCC < 4.4.6 */ .reserved = { 0 }, - V4L2_INIT_BT_TIMINGS(0, 1920, 0, 1200, 25000000, 225000000, + V4L2_INIT_BT_TIMINGS(640, 1920, 350, 1200, 25000000, 225000000, V4L2_DV_BT_STD_CEA861 | V4L2_DV_BT_STD_DMT | V4L2_DV_BT_STD_GTF | V4L2_DV_BT_STD_CVT, V4L2_DV_BT_CAP_PROGRESSIVE | V4L2_DV_BT_CAP_REDUCED_BLANKING | diff --git a/drivers/media/i2c/adv7842.c b/drivers/media/i2c/adv7842.c index 65f34e7e146f..f9c23173c9fa 100644 --- a/drivers/media/i2c/adv7842.c +++ b/drivers/media/i2c/adv7842.c @@ -676,7 +676,7 @@ static const struct v4l2_dv_timings_cap adv7842_timings_cap_analog = { .type = V4L2_DV_BT_656_1120, /* keep this initialization for compatibility with GCC < 4.4.6 */ .reserved = { 0 }, - V4L2_INIT_BT_TIMINGS(0, 1920, 0, 1200, 25000000, 170000000, + V4L2_INIT_BT_TIMINGS(640, 1920, 350, 1200, 25000000, 170000000, V4L2_DV_BT_STD_CEA861 | V4L2_DV_BT_STD_DMT | V4L2_DV_BT_STD_GTF | V4L2_DV_BT_STD_CVT, V4L2_DV_BT_CAP_PROGRESSIVE | V4L2_DV_BT_CAP_REDUCED_BLANKING | @@ -687,7 +687,7 @@ static const struct v4l2_dv_timings_cap adv7842_timings_cap_digital = { .type = V4L2_DV_BT_656_1120, /* keep this initialization for compatibility with GCC < 4.4.6 */ .reserved = { 0 }, - V4L2_INIT_BT_TIMINGS(0, 1920, 0, 1200, 25000000, 225000000, + V4L2_INIT_BT_TIMINGS(640, 1920, 350, 1200, 25000000, 225000000, V4L2_DV_BT_STD_CEA861 | V4L2_DV_BT_STD_DMT | V4L2_DV_BT_STD_GTF | V4L2_DV_BT_STD_CVT, V4L2_DV_BT_CAP_PROGRESSIVE | V4L2_DV_BT_CAP_REDUCED_BLANKING | diff --git a/drivers/media/i2c/tc358743.c b/drivers/media/i2c/tc358743.c index e6f5c363ccab..c9647e24a4a3 100644 --- a/drivers/media/i2c/tc358743.c +++ b/drivers/media/i2c/tc358743.c @@ -70,7 +70,7 @@ static const struct v4l2_dv_timings_cap tc358743_timings_cap = { /* keep this initialization for compatibility with GCC < 4.4.6 */ .reserved = { 0 }, /* Pixel clock from REF_01 p. 20. Min/max height/width are unknown */ - V4L2_INIT_BT_TIMINGS(1, 10000, 1, 10000, 0, 165000000, + V4L2_INIT_BT_TIMINGS(640, 1920, 350, 1200, 13000000, 165000000, V4L2_DV_BT_STD_CEA861 | V4L2_DV_BT_STD_DMT | V4L2_DV_BT_STD_GTF | V4L2_DV_BT_STD_CVT, V4L2_DV_BT_CAP_PROGRESSIVE | diff --git a/drivers/media/i2c/ths8200.c b/drivers/media/i2c/ths8200.c index 498ad2368cbc..f5ee28058ea2 100644 --- a/drivers/media/i2c/ths8200.c +++ b/drivers/media/i2c/ths8200.c @@ -49,7 +49,7 @@ static const struct v4l2_dv_timings_cap ths8200_timings_cap = { .type = V4L2_DV_BT_656_1120, /* keep this initialization for compatibility with GCC < 4.4.6 */ .reserved = { 0 }, - V4L2_INIT_BT_TIMINGS(0, 1920, 0, 1080, 25000000, 148500000, + V4L2_INIT_BT_TIMINGS(640, 1920, 350, 1080, 25000000, 148500000, V4L2_DV_BT_STD_CEA861, V4L2_DV_BT_CAP_PROGRESSIVE) }; From 23d18c183c58e0c0ed49196f30dfaa4f6155d3b4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 21 Nov 2018 15:43:37 +0200 Subject: [PATCH 1692/2608] ACPI: SPCR: Consider baud rate 0 as preconfigured state [ Upstream commit b413b1abeb21b4a152c0bf8d1379efa30759b6e3 ] Since SPCR 1.04 [1] the baud rate of 0 means a preconfigured state of UART. Assume firmware or bootloader configures console correctly. [1]: https://docs.microsoft.com/en-us/windows-hardware/drivers/serports/serial-port-console-redirection-table Signed-off-by: Andy Shevchenko Reviewed-by: Prarit Bhargava Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/spcr.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c index 324b35bfe781..f567fa5f0148 100644 --- a/drivers/acpi/spcr.c +++ b/drivers/acpi/spcr.c @@ -148,6 +148,13 @@ int __init parse_spcr(bool earlycon) } switch (table->baud_rate) { + case 0: + /* + * SPCR 1.04 defines 0 as a preconfigured state of UART. + * Assume firmware or bootloader configures console correctly. + */ + baud_rate = 0; + break; case 3: baud_rate = 9600; break; @@ -196,6 +203,10 @@ int __init parse_spcr(bool earlycon) * UART so don't attempt to change to the baud rate state * in the table because driver cannot calculate the dividers */ + baud_rate = 0; + } + + if (!baud_rate) { snprintf(opts, sizeof(opts), "%s,%s,0x%llx", uart, iotype, table->serial_port.address); } else { From 84e4e56c9a385b7a1fa1e683f6525391c2cd00f3 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Sun, 25 Nov 2018 10:22:25 +0100 Subject: [PATCH 1693/2608] staging: pi433: fix potential null dereference [ Upstream commit 64c4c4ca6c129a4191e8e1e91b2d5d9b8d08c518 ] Add a test for successful call to cdev_alloc() to avoid potential null dereference. Issue reported by smatch. Signed-off-by: Michael Straube Fixes: 874bcba65f9a ("staging: pi433: New driver") Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/pi433/pi433_if.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/staging/pi433/pi433_if.c b/drivers/staging/pi433/pi433_if.c index 93c01680f016..5be40bdc191b 100644 --- a/drivers/staging/pi433/pi433_if.c +++ b/drivers/staging/pi433/pi433_if.c @@ -1210,6 +1210,10 @@ static int pi433_probe(struct spi_device *spi) /* create cdev */ device->cdev = cdev_alloc(); + if (!device->cdev) { + dev_dbg(device->dev, "allocation of cdev failed"); + goto cdev_failed; + } device->cdev->owner = THIS_MODULE; cdev_init(device->cdev, &pi433_fops); retval = cdev_add(device->cdev, device->devt, 1); From 1449abc3a84ea1b291d46d8dc0255dadda87a60b Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Tue, 6 Nov 2018 10:25:29 +0800 Subject: [PATCH 1694/2608] f2fs: move dir data flush to write checkpoint process [ Upstream commit b61ac5b720146c619c7cdf17eff2551b934399e5 ] This patch move dir data flush to write checkpoint process, by doing this, it may reduce some time for dir fsync. pre: -f2fs_do_sync_file enter -file_write_and_wait_range <- flush & wait -write_checkpoint -do_checkpoint <- wait all -f2fs_do_sync_file exit now: -f2fs_do_sync_file enter -write_checkpoint -block_operations <- flush dir & no wait -do_checkpoint <- wait all -f2fs_do_sync_file exit Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7d3189f1941c..5f549bc4e097 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -205,6 +205,9 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, trace_f2fs_sync_file_enter(inode); + if (S_ISDIR(inode->i_mode)) + goto go_write; + /* if fdatasync is triggered, let's do in-place-update */ if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) set_inode_flag(inode, FI_NEED_IPU); From 58282978fe278202bb9aebe41fc679f10e5dc447 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Wed, 14 Nov 2018 19:34:28 +0800 Subject: [PATCH 1695/2608] f2fs: fix race between write_checkpoint and write_begin [ Upstream commit 2866fb16d67992195b0526d19e65acb6640fb87f ] The following race could lead to inconsistent SIT bitmap: Task A Task B ====== ====== f2fs_write_checkpoint block_operations f2fs_lock_all down_write(node_change) down_write(node_write) ... sync ... up_write(node_change) f2fs_file_write_iter set_inode_flag(FI_NO_PREALLOC) ...... f2fs_write_begin(index=0, has inline data) prepare_write_begin __do_map_lock(AIO) => down_read(node_change) f2fs_convert_inline_page => update SIT __do_map_lock(AIO) => up_read(node_change) f2fs_flush_sit_entries <= inconsistent SIT finish write checkpoint sudden-power-off If SPO occurs after checkpoint is finished, SIT bitmap will be set incorrectly. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c68b319b07aa..3d37124eb63e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1880,6 +1880,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, bool locked = false; struct extent_info ei = {0,0,0}; int err = 0; + int flag; /* * we already allocated all the blocks, so we don't need to get @@ -1889,9 +1890,15 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, !is_inode_flag_set(inode, FI_NO_PREALLOC)) return 0; + /* f2fs_lock_op avoids race between write CP and convert_inline_page */ + if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode)) + flag = F2FS_GET_BLOCK_DEFAULT; + else + flag = F2FS_GET_BLOCK_PRE_AIO; + if (f2fs_has_inline_data(inode) || (pos & PAGE_MASK) >= i_size_read(inode)) { - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + __do_map_lock(sbi, flag, true); locked = true; } restart: @@ -1929,6 +1936,7 @@ restart: f2fs_put_dnode(&dn); __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO); locked = true; goto restart; } @@ -1942,7 +1950,7 @@ out: f2fs_put_dnode(&dn); unlock_out: if (locked) - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + __do_map_lock(sbi, flag, false); return err; } From c357fe55e4e2c3e2f45357cdbf065be183816230 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 21 Nov 2018 07:21:38 +0800 Subject: [PATCH 1696/2608] f2fs: fix wrong return value of f2fs_acl_create [ Upstream commit f6176473a0c7472380eef72ebeb330cf9485bf0a ] When call f2fs_acl_create_masq() failed, the caller f2fs_acl_create() should return -EIO instead of -ENOMEM, this patch makes it consistent with posix_acl_create() which has been fixed in commit beaf226b863a ("posix_acl: don't ignore return value of posix_acl_create_masq()"). Fixes: 83dfe53c185e ("f2fs: fix reference leaks in f2fs_acl_create") Signed-off-by: Tiezhu Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/acl.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 436b3a1464d9..5e4860b8bbfc 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -349,12 +349,14 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode, return PTR_ERR(p); clone = f2fs_acl_clone(p, GFP_NOFS); - if (!clone) - goto no_mem; + if (!clone) { + ret = -ENOMEM; + goto release_acl; + } ret = f2fs_acl_create_masq(clone, mode); if (ret < 0) - goto no_mem_clone; + goto release_clone; if (ret == 0) posix_acl_release(clone); @@ -368,11 +370,11 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode, return 0; -no_mem_clone: +release_clone: posix_acl_release(clone); -no_mem: +release_acl: posix_acl_release(p); - return -ENOMEM; + return ret; } int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, From 8121f61a10341405debb9d2f6bf4835ef3eacb9a Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 21 Nov 2018 13:09:28 +0100 Subject: [PATCH 1697/2608] i2c: sh_mobile: add support for r8a77990 (R-Car E3) [ Upstream commit 5eb316e636eb298c204f5b368526d4480b63c0ba ] Add support for the IIC code for the r8a77990 (R-Car E3). It is not considered compatible with existing fallback bindings due to the documented absence of automatic transmission registers. These registers are currently not used by the driver and thus the provides the same behaviour for "renesas,iic-r8a77990" and "renesas,rcar-gen3-iic". The point of declaring incompatibility is to allow for automatic transmission register support to be added to "renesas,iic-r8a77990" and "renesas,rcar-gen3-iic" in future. Signed-off-by: Simon Horman Reviewed-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-sh_mobile.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index 6f2aaeb7c4fa..338344e76e02 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -836,6 +836,7 @@ static const struct of_device_id sh_mobile_i2c_dt_ids[] = { { .compatible = "renesas,rcar-gen2-iic", .data = &fast_clock_dt_config }, { .compatible = "renesas,iic-r8a7795", .data = &fast_clock_dt_config }, { .compatible = "renesas,rcar-gen3-iic", .data = &fast_clock_dt_config }, + { .compatible = "renesas,iic-r8a77990", .data = &fast_clock_dt_config }, { .compatible = "renesas,iic-sh73a0", .data = &fast_clock_dt_config }, { .compatible = "renesas,rmobile-iic", .data = &default_dt_config }, {}, From 8f6e8dbc8f3652131765668660cf4b57780df010 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 7 Nov 2018 23:06:15 +0000 Subject: [PATCH 1698/2608] arm64: io: Ensure calls to delay routines are ordered against prior readX() [ Upstream commit 6460d32014717686d3b7963595950ba2c6d1bb5e ] A relatively standard idiom for ensuring that a pair of MMIO writes to a device arrive at that device with a specified minimum delay between them is as follows: writel_relaxed(42, dev_base + CTL1); readl(dev_base + CTL1); udelay(10); writel_relaxed(42, dev_base + CTL2); the intention being that the read-back from the device will push the prior write to CTL1, and the udelay will hold up the write to CTL1 until at least 10us have elapsed. Unfortunately, on arm64 where the underlying delay loop is implemented as a read of the architected counter, the CPU does not guarantee ordering from the readl() to the delay loop and therefore the delay loop could in theory be speculated and not provide the desired interval between the two writes. Fix this in a similar manner to PowerPC by introducing a dummy control dependency on the output of readX() which, combined with the ISB in the read of the architected counter, guarantees that a subsequent delay loop can not be executed until the readX() has returned its result. Cc: Benjamin Herrenschmidt Cc: Arnd Bergmann Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/include/asm/io.h | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 35b2e50f17fb..b2bc7dbc1fa6 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -106,7 +106,22 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) } /* IO barriers */ -#define __iormb() rmb() +#define __iormb(v) \ +({ \ + unsigned long tmp; \ + \ + rmb(); \ + \ + /* \ + * Create a dummy control dependency from the IO read to any \ + * later instructions. This ensures that a subsequent call to \ + * udelay() will be ordered due to the ISB in get_cycles(). \ + */ \ + asm volatile("eor %0, %1, %1\n" \ + "cbnz %0, ." \ + : "=r" (tmp) : "r" (v) : "memory"); \ +}) + #define __iowmb() wmb() #define mmiowb() do { } while (0) @@ -131,10 +146,10 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) * following Normal memory access. Writes are ordered relative to any prior * Normal memory access. */ -#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; }) -#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; }) -#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; }) -#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(); __v; }) +#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(__v); __v; }) +#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(__v); __v; }) +#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; }) +#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; }) #define writeb(v,c) ({ __iowmb(); writeb_relaxed((v),(c)); }) #define writew(v,c) ({ __iowmb(); writew_relaxed((v),(c)); }) @@ -185,9 +200,9 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); /* * io{read,write}{16,32,64}be() macros */ -#define ioread16be(p) ({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(); __v; }) -#define ioread32be(p) ({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(); __v; }) -#define ioread64be(p) ({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(); __v; }) +#define ioread16be(p) ({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(__v); __v; }) +#define ioread32be(p) ({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(__v); __v; }) +#define ioread64be(p) ({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(__v); __v; }) #define iowrite16be(v,p) ({ __iowmb(); __raw_writew((__force __u16)cpu_to_be16(v), p); }) #define iowrite32be(v,p) ({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); }) From 7ab4b6526fdcdfa7772984bbcd2ac4fd14fa223b Mon Sep 17 00:00:00 2001 From: Young Xiao Date: Wed, 28 Nov 2018 12:36:39 +0000 Subject: [PATCH 1699/2608] sunvdc: Do not spin in an infinite loop when vio_ldc_send() returns EAGAIN [ Upstream commit a11f6ca9aef989b56cd31ff4ee2af4fb31a172ec ] __vdc_tx_trigger should only loop on EAGAIN a finite number of times. See commit adddc32d6fde ("sunvnet: Do not spin in an infinite loop when vio_ldc_send() returns EAGAIN") for detail. Signed-off-by: Young Xiao Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/sunvdc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index ad9749463d4f..ed4d6276e94f 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -41,6 +41,8 @@ MODULE_VERSION(DRV_MODULE_VERSION); #define WAITING_FOR_GEN_CMD 0x04 #define WAITING_FOR_ANY -1 +#define VDC_MAX_RETRIES 10 + static struct workqueue_struct *sunvdc_wq; struct vdc_req_entry { @@ -427,6 +429,7 @@ static int __vdc_tx_trigger(struct vdc_port *port) .end_idx = dr->prod, }; int err, delay; + int retries = 0; hdr.seq = dr->snd_nxt; delay = 1; @@ -439,6 +442,8 @@ static int __vdc_tx_trigger(struct vdc_port *port) udelay(delay); if ((delay <<= 1) > 128) delay = 128; + if (retries++ > VDC_MAX_RETRIES) + break; } while (err == -EAGAIN); if (err == -ENOTCONN) From 834dfbb83f85591ecca6992f0ef28710e209d017 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sat, 24 Nov 2018 09:52:23 -0500 Subject: [PATCH 1700/2608] soc: bcm: brcmstb: Don't leak device tree node reference [ Upstream commit 1861a7f07e02292830a1ca256328d370deefea30 ] of_find_node_by_path() acquires a reference to the node returned by it and that reference needs to be dropped by its caller. soc_is_brcmstb() doesn't do that, so fix it. [treding: slightly rewrite to avoid inline comparison] Fixes: d52fad262041 ("soc: add stubs for brcmstb SoC's") Signed-off-by: Yangtao Li Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- drivers/soc/bcm/brcmstb/common.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/soc/bcm/brcmstb/common.c b/drivers/soc/bcm/brcmstb/common.c index 22e98a90468c..2f5ec424a390 100644 --- a/drivers/soc/bcm/brcmstb/common.c +++ b/drivers/soc/bcm/brcmstb/common.c @@ -31,13 +31,17 @@ static const struct of_device_id brcmstb_machine_match[] = { bool soc_is_brcmstb(void) { + const struct of_device_id *match; struct device_node *root; root = of_find_node_by_path("/"); if (!root) return false; - return of_match_node(brcmstb_machine_match, root) != NULL; + match = of_match_node(brcmstb_machine_match, root); + of_node_put(root); + + return match != NULL; } static const struct of_device_id sun_top_ctrl_match[] = { From 51d0d8e65de67e6a2695aeae325f0af63041de4e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 27 Nov 2018 15:54:17 -0500 Subject: [PATCH 1701/2608] nfsd4: fix crash on writing v4_end_grace before nfsd startup [ Upstream commit 62a063b8e7d1db684db3f207261a466fa3194e72 ] Anatoly Trosinenko reports that this: 1) Checkout fresh master Linux branch (tested with commit e195ca6cb) 2) Copy x84_64-config-4.14 to .config, then enable NFS server v4 and build 3) From `kvm-xfstests shell`: results in NULL dereference in locks_end_grace. Check that nfsd has been started before trying to end the grace period. Reported-by: Anatoly Trosinenko Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin --- fs/nfsd/nfsctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 6493df6b1bd5..4b8ebcc6b183 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1126,6 +1126,8 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) case 'Y': case 'y': case '1': + if (nn->nfsd_serv) + return -EBUSY; nfsd4_end_grace(nn); break; default: From e6ee8bfc97c23a368b2d22d5a2abdc2b29109593 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Thu, 29 Nov 2018 10:04:14 -0500 Subject: [PATCH 1702/2608] drm: Clear state->acquire_ctx before leaving drm_atomic_helper_commit_duplicated_state() [ Upstream commit aa394b0dd68cb00c483e151dcd84713d4d517ed1 ] drm_atomic_helper_commit_duplicated_state() sets state->acquire_ctx to the context given in the argument and leaves it in state after it quits. The lifetime of state and context are not guaranteed to be the same, so we shouldn't leave that pointer hanging around. This patch resets the context to NULL to avoid any oopses. Changes in v2: - Added to the set Suggested-by: Daniel Vetter Reviewed-by: Daniel Vetter Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20181129150423.239081-1-sean@poorly.run Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_atomic_helper.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 1f08d597b87a..d05ed0521e20 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -2899,7 +2899,7 @@ EXPORT_SYMBOL(drm_atomic_helper_suspend); int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state, struct drm_modeset_acquire_ctx *ctx) { - int i; + int i, ret; struct drm_plane *plane; struct drm_plane_state *new_plane_state; struct drm_connector *connector; @@ -2918,7 +2918,11 @@ int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state, for_each_new_connector_in_state(state, connector, new_conn_state, i) state->connectors[i].old_state = connector->state; - return drm_atomic_commit(state); + ret = drm_atomic_commit(state); + + state->acquire_ctx = NULL; + + return ret; } EXPORT_SYMBOL(drm_atomic_helper_commit_duplicated_state); From 6636dc5e01c613f032c9140b4bd113e783c24546 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 29 Nov 2018 16:31:04 +0000 Subject: [PATCH 1703/2608] arm64: io: Ensure value passed to __iormb() is held in a 64-bit register [ Upstream commit 1b57ec8c75279b873639eb44a215479236f93481 ] As of commit 6460d3201471 ("arm64: io: Ensure calls to delay routines are ordered against prior readX()"), MMIO reads smaller than 64 bits fail to compile under clang because we end up mixing 32-bit and 64-bit register operands for the same data processing instruction: ./include/asm-generic/io.h:695:9: warning: value size does not match register size specified by the constraint and modifier [-Wasm-operand-widths] return readb(addr); ^ ./arch/arm64/include/asm/io.h:147:58: note: expanded from macro 'readb' ^ ./include/asm-generic/io.h:695:9: note: use constraint modifier "w" ./arch/arm64/include/asm/io.h:147:50: note: expanded from macro 'readb' ^ ./arch/arm64/include/asm/io.h:118:24: note: expanded from macro '__iormb' asm volatile("eor %0, %1, %1\n" \ ^ Fix the build by casting the macro argument to 'unsigned long' when used as an input to the inline asm. Reported-by: Nick Desaulniers Reported-by: Nathan Chancellor Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/include/asm/io.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index b2bc7dbc1fa6..49bb9a020a09 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -119,7 +119,8 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) */ \ asm volatile("eor %0, %1, %1\n" \ "cbnz %0, ." \ - : "=r" (tmp) : "r" (v) : "memory"); \ + : "=r" (tmp) : "r" ((unsigned long)(v)) \ + : "memory"); \ }) #define __iowmb() wmb() From 3f5d37ba66c820d7255f61a86d51fe969cc28d05 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 7 Nov 2018 14:36:11 -0800 Subject: [PATCH 1704/2608] Thermal: do not clear passive state during system sleep [ Upstream commit 964f4843a455d2ffb199512b08be8d5f077c4cac ] commit ff140fea847e ("Thermal: handle thermal zone device properly during system sleep") added PM hook to call thermal zone reset during sleep. However resetting thermal zone will also clear the passive state and thus cancel the polling queue which leads the passive cooling device state not being cleared properly after sleep. thermal_pm_notify => thermal_zone_device_reset set passive to 0 thermal_zone_trip_update will skip update passive as `old_target == instance->target'. monitor_thermal_zone => thermal_zone_device_set_polling will cancel tz->poll_queue, so the cooling device state will not be changed afterwards. Reported-by: Kame Wang Signed-off-by: Wei Wang Signed-off-by: Zhang Rui Signed-off-by: Sasha Levin --- drivers/thermal/thermal_core.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 2b1b0ba393a4..17d6079c7642 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -454,16 +454,20 @@ static void update_temperature(struct thermal_zone_device *tz) tz->last_temperature, tz->temperature); } -static void thermal_zone_device_reset(struct thermal_zone_device *tz) +static void thermal_zone_device_init(struct thermal_zone_device *tz) { struct thermal_instance *pos; - tz->temperature = THERMAL_TEMP_INVALID; - tz->passive = 0; list_for_each_entry(pos, &tz->thermal_instances, tz_node) pos->initialized = false; } +static void thermal_zone_device_reset(struct thermal_zone_device *tz) +{ + tz->passive = 0; + thermal_zone_device_init(tz); +} + void thermal_zone_device_update(struct thermal_zone_device *tz, enum thermal_notify_event event) { @@ -1503,7 +1507,7 @@ static int thermal_pm_notify(struct notifier_block *nb, case PM_POST_SUSPEND: atomic_set(&in_suspend, 0); list_for_each_entry(tz, &thermal_tz_list, node) { - thermal_zone_device_reset(tz); + thermal_zone_device_init(tz); thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); } From 0ff7bc9be0d593bcd15f7cbcfb47e42a81227c31 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Thu, 29 Nov 2018 18:12:27 +0100 Subject: [PATCH 1705/2608] firmware/efi: Add NULL pointer checks in efivars API functions [ Upstream commit ab2180a15ce54739fed381efb4cb12e78dfb1561 ] Since commit: ce2e6db554fa ("brcmfmac: Add support for getting nvram contents from EFI variables") we have a device driver accessing the efivars API. Several functions in the efivars API assume __efivars is set, i.e., that they will be accessed only after efivars_register() has been called. However, the following NULL pointer access was reported calling efivar_entry_size() from the brcmfmac device driver: Unable to handle kernel NULL pointer dereference at virtual address 00000008 pgd = 60bfa5f1 [00000008] *pgd=00000000 Internal error: Oops: 5 [#1] SMP ARM ... Hardware name: NVIDIA Tegra SoC (Flattened Device Tree) Workqueue: events request_firmware_work_func PC is at efivar_entry_size+0x28/0x90 LR is at brcmf_fw_complete_request+0x3f8/0x8d4 [brcmfmac] pc : [] lr : [] psr: a00d0113 sp : ede7fe28 ip : ee983410 fp : c1787f30 r10: 00000000 r9 : 00000000 r8 : bf2b2258 r7 : ee983000 r6 : c1604c48 r5 : ede7fe88 r4 : edf337c0 r3 : 00000000 r2 : 00000000 r1 : ede7fe88 r0 : c17712c8 Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: ad16804a DAC: 00000051 Disassembly showed that the local static variable __efivars is NULL, which is not entirely unexpected given that it is a non-EFI platform. So add a NULL pointer check to efivar_entry_size(), and to related functions while at it. In efivars_register() a couple of sanity checks are added as well. Reported-by: Jon Hunter Signed-off-by: Arend van Spriel Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Snowberg Cc: Hans de Goede Cc: Joe Perches Cc: Julien Thierry Cc: Linus Torvalds Cc: Marc Zyngier Cc: Matt Fleming Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Sedat Dilek Cc: Thomas Gleixner Cc: YiFei Zhu Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-9-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- drivers/firmware/efi/vars.c | 99 +++++++++++++++++++++++++++++-------- 1 file changed, 78 insertions(+), 21 deletions(-) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 9336ffdf6e2c..fceaafd67ec6 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -318,7 +318,12 @@ EXPORT_SYMBOL_GPL(efivar_variable_is_removable); static efi_status_t check_var_size(u32 attributes, unsigned long size) { - const struct efivar_operations *fops = __efivars->ops; + const struct efivar_operations *fops; + + if (!__efivars) + return EFI_UNSUPPORTED; + + fops = __efivars->ops; if (!fops->query_variable_store) return EFI_UNSUPPORTED; @@ -329,7 +334,12 @@ check_var_size(u32 attributes, unsigned long size) static efi_status_t check_var_size_nonblocking(u32 attributes, unsigned long size) { - const struct efivar_operations *fops = __efivars->ops; + const struct efivar_operations *fops; + + if (!__efivars) + return EFI_UNSUPPORTED; + + fops = __efivars->ops; if (!fops->query_variable_store) return EFI_UNSUPPORTED; @@ -429,13 +439,18 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid, int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), void *data, bool duplicates, struct list_head *head) { - const struct efivar_operations *ops = __efivars->ops; + const struct efivar_operations *ops; unsigned long variable_name_size = 1024; efi_char16_t *variable_name; efi_status_t status; efi_guid_t vendor_guid; int err = 0; + if (!__efivars) + return -EFAULT; + + ops = __efivars->ops; + variable_name = kzalloc(variable_name_size, GFP_KERNEL); if (!variable_name) { printk(KERN_ERR "efivars: Memory allocation failed.\n"); @@ -583,12 +598,14 @@ static void efivar_entry_list_del_unlock(struct efivar_entry *entry) */ int __efivar_entry_delete(struct efivar_entry *entry) { - const struct efivar_operations *ops = __efivars->ops; efi_status_t status; - status = ops->set_variable(entry->var.VariableName, - &entry->var.VendorGuid, - 0, 0, NULL); + if (!__efivars) + return -EINVAL; + + status = __efivars->ops->set_variable(entry->var.VariableName, + &entry->var.VendorGuid, + 0, 0, NULL); return efi_status_to_err(status); } @@ -607,12 +624,17 @@ EXPORT_SYMBOL_GPL(__efivar_entry_delete); */ int efivar_entry_delete(struct efivar_entry *entry) { - const struct efivar_operations *ops = __efivars->ops; + const struct efivar_operations *ops; efi_status_t status; if (down_interruptible(&efivars_lock)) return -EINTR; + if (!__efivars) { + up(&efivars_lock); + return -EINVAL; + } + ops = __efivars->ops; status = ops->set_variable(entry->var.VariableName, &entry->var.VendorGuid, 0, 0, NULL); @@ -650,13 +672,19 @@ EXPORT_SYMBOL_GPL(efivar_entry_delete); int efivar_entry_set(struct efivar_entry *entry, u32 attributes, unsigned long size, void *data, struct list_head *head) { - const struct efivar_operations *ops = __efivars->ops; + const struct efivar_operations *ops; efi_status_t status; efi_char16_t *name = entry->var.VariableName; efi_guid_t vendor = entry->var.VendorGuid; if (down_interruptible(&efivars_lock)) return -EINTR; + + if (!__efivars) { + up(&efivars_lock); + return -EINVAL; + } + ops = __efivars->ops; if (head && efivar_entry_find(name, vendor, head, false)) { up(&efivars_lock); return -EEXIST; @@ -687,12 +715,17 @@ static int efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor, u32 attributes, unsigned long size, void *data) { - const struct efivar_operations *ops = __efivars->ops; + const struct efivar_operations *ops; efi_status_t status; if (down_trylock(&efivars_lock)) return -EBUSY; + if (!__efivars) { + up(&efivars_lock); + return -EINVAL; + } + status = check_var_size_nonblocking(attributes, size + ucs2_strsize(name, 1024)); if (status != EFI_SUCCESS) { @@ -700,6 +733,7 @@ efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor, return -ENOSPC; } + ops = __efivars->ops; status = ops->set_variable_nonblocking(name, &vendor, attributes, size, data); @@ -727,9 +761,13 @@ efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor, int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes, bool block, unsigned long size, void *data) { - const struct efivar_operations *ops = __efivars->ops; + const struct efivar_operations *ops; efi_status_t status; + if (!__efivars) + return -EINVAL; + + ops = __efivars->ops; if (!ops->query_variable_store) return -ENOSYS; @@ -829,13 +867,18 @@ EXPORT_SYMBOL_GPL(efivar_entry_find); */ int efivar_entry_size(struct efivar_entry *entry, unsigned long *size) { - const struct efivar_operations *ops = __efivars->ops; + const struct efivar_operations *ops; efi_status_t status; *size = 0; if (down_interruptible(&efivars_lock)) return -EINTR; + if (!__efivars) { + up(&efivars_lock); + return -EINVAL; + } + ops = __efivars->ops; status = ops->get_variable(entry->var.VariableName, &entry->var.VendorGuid, NULL, size, NULL); up(&efivars_lock); @@ -861,12 +904,14 @@ EXPORT_SYMBOL_GPL(efivar_entry_size); int __efivar_entry_get(struct efivar_entry *entry, u32 *attributes, unsigned long *size, void *data) { - const struct efivar_operations *ops = __efivars->ops; efi_status_t status; - status = ops->get_variable(entry->var.VariableName, - &entry->var.VendorGuid, - attributes, size, data); + if (!__efivars) + return -EINVAL; + + status = __efivars->ops->get_variable(entry->var.VariableName, + &entry->var.VendorGuid, + attributes, size, data); return efi_status_to_err(status); } @@ -882,14 +927,19 @@ EXPORT_SYMBOL_GPL(__efivar_entry_get); int efivar_entry_get(struct efivar_entry *entry, u32 *attributes, unsigned long *size, void *data) { - const struct efivar_operations *ops = __efivars->ops; efi_status_t status; if (down_interruptible(&efivars_lock)) return -EINTR; - status = ops->get_variable(entry->var.VariableName, - &entry->var.VendorGuid, - attributes, size, data); + + if (!__efivars) { + up(&efivars_lock); + return -EINVAL; + } + + status = __efivars->ops->get_variable(entry->var.VariableName, + &entry->var.VendorGuid, + attributes, size, data); up(&efivars_lock); return efi_status_to_err(status); @@ -921,7 +971,7 @@ EXPORT_SYMBOL_GPL(efivar_entry_get); int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, unsigned long *size, void *data, bool *set) { - const struct efivar_operations *ops = __efivars->ops; + const struct efivar_operations *ops; efi_char16_t *name = entry->var.VariableName; efi_guid_t *vendor = &entry->var.VendorGuid; efi_status_t status; @@ -940,6 +990,11 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, if (down_interruptible(&efivars_lock)) return -EINTR; + if (!__efivars) { + err = -EINVAL; + goto out; + } + /* * Ensure that the available space hasn't shrunk below the safe level */ @@ -956,6 +1011,8 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, } } + ops = __efivars->ops; + status = ops->set_variable(name, vendor, attributes, *size, data); if (status != EFI_SUCCESS) { err = efi_status_to_err(status); From abc8d4290d04a0bfe239df8d44592cf6f040d3eb Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 19 Nov 2018 11:36:13 +0100 Subject: [PATCH 1706/2608] s390/zcrypt: improve special ap message cmd handling [ Upstream commit be534791011100d204602e2e0496e9e6ce8edf63 ] There exist very few ap messages which need to have the 'special' flag enabled. This flag tells the firmware layer to do some pre- and maybe postprocessing. However, it may happen that this special flag is enabled but the firmware is unable to deal with this kind of message and thus returns with reply code 0x41. For example older firmware may not know the newest messages triggered by the zcrypt device driver and thus react with reject and the named reply code. Unfortunately this reply code is not known to the zcrypt error routines and thus default behavior is to switch the ap queue offline. This patch now makes the ap error routine aware of the reply code and so userspace is informed about the bad processing result but the queue is not switched to offline state any more. Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- arch/s390/include/uapi/asm/zcrypt.h | 4 ++-- drivers/s390/crypto/zcrypt_error.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index 137ef473584e..b9fb42089760 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -161,8 +161,8 @@ struct ica_xcRB { * @cprb_len: CPRB header length [0x0020] * @cprb_ver_id: CPRB version id. [0x04] * @pad_000: Alignment pad bytes - * @flags: Admin cmd [0x80] or functional cmd [0x00] - * @func_id: Function id / subtype [0x5434] + * @flags: Admin bit [0x80], Special bit [0x20] + * @func_id: Function id / subtype [0x5434] "T4" * @source_id: Source id [originator id] * @target_id: Target id [usage/ctrl domain id] * @ret_code: Return code diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index 13df60209ed3..9499cd3a05f8 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -65,6 +65,7 @@ struct error_hdr { #define REP82_ERROR_FORMAT_FIELD 0x29 #define REP82_ERROR_INVALID_COMMAND 0x30 #define REP82_ERROR_MALFORMED_MSG 0x40 +#define REP82_ERROR_INVALID_SPECIAL_CMD 0x41 #define REP82_ERROR_INVALID_DOMAIN_PRECHECK 0x42 #define REP82_ERROR_RESERVED_FIELDO 0x50 /* old value */ #define REP82_ERROR_WORD_ALIGNMENT 0x60 @@ -103,6 +104,7 @@ static inline int convert_error(struct zcrypt_queue *zq, case REP88_ERROR_MESSAGE_MALFORMD: case REP82_ERROR_INVALID_DOMAIN_PRECHECK: case REP82_ERROR_INVALID_DOMAIN_PENDING: + case REP82_ERROR_INVALID_SPECIAL_CMD: // REP88_ERROR_INVALID_KEY // '82' CEX2A // REP88_ERROR_OPERAND // '84' CEX2A // REP88_ERROR_OPERAND_EVEN_MOD // '85' CEX2A From bfadca610fcfa212b940b15dbb11127e80569d05 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 15 Nov 2018 22:42:01 +0000 Subject: [PATCH 1707/2608] arm64: ftrace: don't adjust the LR value [ Upstream commit 6e803e2e6e367db9a0d6ecae1bd24bb5752011bd ] The core ftrace code requires that when it is handed the PC of an instrumented function, this PC is the address of the instrumented instruction. This is necessary so that the core ftrace code can identify the specific instrumentation site. Since the instrumented function will be a BL, the address of the instrumented function is LR - 4 at entry to the ftrace code. This fixup is applied in the mcount_get_pc and mcount_get_pc0 helpers, which acquire the PC of the instrumented function. The mcount_get_lr helper is used to acquire the LR of the instrumented function, whose value does not require this adjustment, and cannot be adjusted to anything meaningful. No adjustment of this value is made on other architectures, including arm. However, arm64 adjusts this value by 4. This patch brings arm64 in line with other architectures and removes the adjustment of the LR value. Signed-off-by: Mark Rutland Cc: AKASHI Takahiro Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Torsten Duwe Cc: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/entry-ftrace.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index e1be42e11ff5..5a10e3a3e843 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -79,7 +79,6 @@ .macro mcount_get_lr reg ldr \reg, [x29] ldr \reg, [\reg, #8] - mcount_adjust_addr \reg, \reg .endm .macro mcount_get_lr_addr reg From 46d5a1942253df6bb06eaaa969ab2c8b493ac0c1 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Wed, 28 Nov 2018 18:53:10 +0100 Subject: [PATCH 1708/2608] ARM: dts: mmp2: fix TWSI2 [ Upstream commit 1147e05ac9fc2ef86a3691e7ca5c2db7602d81dd ] Marvell keeps their MMP2 datasheet secret, but there are good clues that TWSI2 is not on 0xd4025000 on that platform, not does it use IRQ 58. In fact, the IRQ 58 on MMP2 seems to be a signal processor: arch/arm/mach-mmp/irqs.h:#define IRQ_MMP2_MSP 58 I'm taking a somewhat educated guess that is probably a copy & paste error from PXA168 or PXA910 and that the real controller in fact hides at address 0xd4031000 and uses an interrupt line multiplexed via IRQ 17. I'm also copying some properties from TWSI1 that were missing or incorrect. Tested on a OLPC XO 1.75 machine, where the RTC is on TWSI2. Signed-off-by: Lubomir Rintel Tested-by: Pavel Machek Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin --- arch/arm/boot/dts/mmp2.dtsi | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/mmp2.dtsi b/arch/arm/boot/dts/mmp2.dtsi index 766bbb8495b6..47e5b63339d1 100644 --- a/arch/arm/boot/dts/mmp2.dtsi +++ b/arch/arm/boot/dts/mmp2.dtsi @@ -220,12 +220,15 @@ status = "disabled"; }; - twsi2: i2c@d4025000 { + twsi2: i2c@d4031000 { compatible = "mrvl,mmp-twsi"; - reg = <0xd4025000 0x1000>; - interrupts = <58>; + reg = <0xd4031000 0x1000>; + interrupt-parent = <&intcmux17>; + interrupts = <0>; clocks = <&soc_clocks MMP2_CLK_TWSI1>; resets = <&soc_clocks MMP2_CLK_TWSI1>; + #address-cells = <1>; + #size-cells = <0>; status = "disabled"; }; From 92476b7f5711368c202707d6d894319640e1d060 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 28 Nov 2018 23:20:11 +0100 Subject: [PATCH 1709/2608] x86/fpu: Add might_fault() to user_insn() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6637401c35b2f327a35d27f44bda05e327f2f017 ] Every user of user_insn() passes an user memory pointer to this macro. Add might_fault() to user_insn() so we can spot users which are using this macro in sections where page faulting is not allowed. [ bp: Space it out to make it more visible. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Rik van Riel Cc: "H. Peter Anvin" Cc: "Jason A. Donenfeld" Cc: Andy Lutomirski Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Thomas Gleixner Cc: kvm ML Cc: x86-ml Link: https://lkml.kernel.org/r/20181128222035.2996-6-bigeasy@linutronix.de Signed-off-by: Sasha Levin --- arch/x86/include/asm/fpu/internal.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 69dcdf195b61..fa2c93cb42a2 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -106,6 +106,9 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu); #define user_insn(insn, output, input...) \ ({ \ int err; \ + \ + might_fault(); \ + \ asm volatile(ASM_STAC "\n" \ "1:" #insn "\n\t" \ "2: " ASM_CLAC "\n" \ From 26e5e5fa0f2b22c35d50dfcd4e9df4497de6a7bb Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 23 Nov 2018 16:56:26 -0500 Subject: [PATCH 1710/2608] media: DaVinci-VPBE: fix error handling in vpbe_initialize() [ Upstream commit aa35dc3c71950e3fec3e230c06c27c0fbd0067f8 ] If vpbe_set_default_output() or vpbe_set_default_mode() fails, vpbe_initialize() returns error code without releasing resources. The patch adds error handling for that case. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/davinci/vpbe.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/davinci/vpbe.c b/drivers/media/platform/davinci/vpbe.c index 7f6462562579..1d3c13e36904 100644 --- a/drivers/media/platform/davinci/vpbe.c +++ b/drivers/media/platform/davinci/vpbe.c @@ -739,7 +739,7 @@ static int vpbe_initialize(struct device *dev, struct vpbe_device *vpbe_dev) if (ret) { v4l2_err(&vpbe_dev->v4l2_dev, "Failed to set default output %s", def_output); - return ret; + goto fail_kfree_amp; } printk(KERN_NOTICE "Setting default mode to %s\n", def_mode); @@ -747,12 +747,15 @@ static int vpbe_initialize(struct device *dev, struct vpbe_device *vpbe_dev) if (ret) { v4l2_err(&vpbe_dev->v4l2_dev, "Failed to set default mode %s", def_mode); - return ret; + goto fail_kfree_amp; } vpbe_dev->initialized = 1; /* TBD handling of bootargs for default output and mode */ return 0; +fail_kfree_amp: + mutex_lock(&vpbe_dev->lock); + kfree(vpbe_dev->amp); fail_kfree_encoders: kfree(vpbe_dev->encoders); fail_dev_unregister: From 9c58ef240c51bc1cc9b4740383a770ca6a61294d Mon Sep 17 00:00:00 2001 From: Zoran Markovic Date: Wed, 17 Oct 2018 16:25:44 -0700 Subject: [PATCH 1711/2608] smack: fix access permissions for keyring [ Upstream commit 5b841bfab695e3b8ae793172a9ff7990f99cc3e2 ] Function smack_key_permission() only issues smack requests for the following operations: - KEY_NEED_READ (issues MAY_READ) - KEY_NEED_WRITE (issues MAY_WRITE) - KEY_NEED_LINK (issues MAY_WRITE) - KEY_NEED_SETATTR (issues MAY_WRITE) A blank smack request is issued in all other cases, resulting in smack access being granted if there is any rule defined between subject and object, or denied with -EACCES otherwise. Request MAY_READ access for KEY_NEED_SEARCH and KEY_NEED_VIEW. Fix the logic in the unlikely case when both MAY_READ and MAY_WRITE are needed. Validate access permission field for valid contents. Signed-off-by: Zoran Markovic Signed-off-by: Casey Schaufler Cc: Casey Schaufler Cc: James Morris Cc: "Serge E. Hallyn" Signed-off-by: Sasha Levin --- security/smack/smack_lsm.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index c8fd5c10b7c6..0d5ce7190b17 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4356,6 +4356,12 @@ static int smack_key_permission(key_ref_t key_ref, int request = 0; int rc; + /* + * Validate requested permissions + */ + if (perm & ~KEY_NEED_ALL) + return -EINVAL; + keyp = key_ref_to_ptr(key_ref); if (keyp == NULL) return -EINVAL; @@ -4375,10 +4381,10 @@ static int smack_key_permission(key_ref_t key_ref, ad.a.u.key_struct.key = keyp->serial; ad.a.u.key_struct.key_desc = keyp->description; #endif - if (perm & KEY_NEED_READ) - request = MAY_READ; + if (perm & (KEY_NEED_READ | KEY_NEED_SEARCH | KEY_NEED_VIEW)) + request |= MAY_READ; if (perm & (KEY_NEED_WRITE | KEY_NEED_LINK | KEY_NEED_SETATTR)) - request = MAY_WRITE; + request |= MAY_WRITE; rc = smk_access(tkp, keyp->security, request, &ad); rc = smk_bu_note("key access", tkp, keyp->security, request, rc); return rc; From 6f61f3456637d8b183db256005ed2211ca55777e Mon Sep 17 00:00:00 2001 From: Anurag Kumar Vulisha Date: Sat, 1 Dec 2018 16:43:29 +0530 Subject: [PATCH 1712/2608] usb: dwc3: Correct the logic for checking TRB full in __dwc3_prepare_one_trb() [ Upstream commit b7a4fbe2300a8965ea760c7e871507b84aea17f6 ] Availability of TRB's is calculated using dwc3_calc_trbs_left(), which determines total available TRB's based on the HWO bit set in a TRB. In the present code, __dwc3_prepare_one_trb() is called with a TRB which needs to be prepared for transfer. This __dwc3_prepare_one_trb() calls dwc3_calc_trbs_left() to determine total available TRBs and set IOC bit if the total available TRBs are zero. Since the present working TRB (which is passed as an argument to __dwc3_prepare_one_trb() ) doesn't yet have the HWO bit set before calling dwc3_calc_trbs_left(), there are chances that dwc3_calc_trbs_left() wrongly calculates this present working TRB as free(since the HWO bit is not yet set) and returns the total available TRBs as greater than zero (including the present working TRB). This could be a problem. This patch corrects the above mentioned problem in __dwc3_prepare_one_trb() by increementing the dep->trb_enqueue at the last (after preparing the TRB) instead of increementing at the start and setting the IOC bit only if the total available TRBs returned by dwc3_calc_trbs_left() is 1 . Since we are increementing the dep->trb_enqueue at the last, the present working TRB is also considered as available by dwc3_calc_trbs_left() and non zero value is returned . So, according to the modified logic, when the total available TRBs is equal to 1 that means the total available TRBs in the pool are 0. Signed-off-by: Anurag Kumar Vulisha Reviewed-by: Thinh Nguyen Tested-by: Tejas Joglekar Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/dwc3/gadget.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 727bf3c9f53b..b80eefa355b6 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -890,8 +890,6 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, struct usb_gadget *gadget = &dwc->gadget; enum usb_device_speed speed = gadget->speed; - dwc3_ep_inc_enq(dep); - trb->size = DWC3_TRB_SIZE_LENGTH(length); trb->bpl = lower_32_bits(dma); trb->bph = upper_32_bits(dma); @@ -970,7 +968,7 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, } if ((!no_interrupt && !chain) || - (dwc3_calc_trbs_left(dep) == 0)) + (dwc3_calc_trbs_left(dep) == 1)) trb->ctrl |= DWC3_TRB_CTRL_IOC; if (chain) @@ -981,6 +979,8 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, trb->ctrl |= DWC3_TRB_CTRL_HWO; + dwc3_ep_inc_enq(dep); + trace_dwc3_prepare_trb(dep, trb); } From c5c88a3d924bf726f4ac8097081cf704d0cad39c Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 28 Nov 2018 15:55:21 +0200 Subject: [PATCH 1713/2608] usb: hub: delay hub autosuspend if USB3 port is still link training [ Upstream commit e86108940e541febf35813402ff29fa6f4a9ac0b ] When initializing a hub we want to give a USB3 port in link training the same debounce delay time before autosuspening the hub as already trained, connected enabled ports. USB3 ports won't reach the enabled state with "current connect status" and "connect status change" bits set until the USB3 link training finishes. Catching the port in link training (polling) and adding the debounce delay prevents unnecessary failed attempts to autosuspend the hub. Signed-off-by: Mathias Nyman Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/core/hub.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index a073cb5be013..4a4e666a8e09 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1110,6 +1110,16 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) USB_PORT_FEAT_ENABLE); } + /* + * Add debounce if USB3 link is in polling/link training state. + * Link will automatically transition to Enabled state after + * link training completes. + */ + if (hub_is_superspeed(hdev) && + ((portstatus & USB_PORT_STAT_LINK_STATE) == + USB_SS_PORT_LS_POLLING)) + need_debounce_delay = true; + /* Clear status-change flags; we'll debounce later */ if (portchange & USB_PORT_STAT_C_CONNECTION) { need_debounce_delay = true; From 923841f0d9860abcb5fb17c65b63f379f24318d5 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 28 Nov 2018 15:43:09 -0800 Subject: [PATCH 1714/2608] timekeeping: Use proper seqcount initializer [ Upstream commit ce10a5b3954f2514af726beb78ed8d7350c5e41c ] tk_core.seq is initialized open coded, but that misses to initialize the lockdep map when lockdep is enabled. Lockdep splats involving tk_core seq consequently lack a name and are hard to read. Use the proper initializer which takes care of the lockdep map initialization. [ tglx: Massaged changelog ] Signed-off-by: Bart Van Assche Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: tj@kernel.org Cc: johannes.berg@intel.com Link: https://lkml.kernel.org/r/20181128234325.110011-12-bvanassche@acm.org Signed-off-by: Sasha Levin --- kernel/time/timekeeping.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2cafb49aa65e..1ce7c404d0b0 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -41,7 +41,9 @@ static struct { seqcount_t seq; struct timekeeper timekeeper; -} tk_core ____cacheline_aligned; +} tk_core ____cacheline_aligned = { + .seq = SEQCNT_ZERO(tk_core.seq), +}; static DEFINE_RAW_SPINLOCK(timekeeper_lock); static struct timekeeper shadow_timekeeper; From 352f5747bfaa20797217e7e2cc094200bcd111e1 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Thu, 29 Nov 2018 10:34:34 +0800 Subject: [PATCH 1715/2608] usb: mtu3: fix the issue about SetFeature(U1/U2_Enable) [ Upstream commit a0678e2eed41e81004308693ac84ea95614b0920 ] Fix the issue: device doesn't accept LGO_U1/U2: 1. set SW_U1/U2_ACCEPT_ENABLE to eanble controller to accept LGO_U1/U2 by default; 2. enable/disable controller to initiate requests for transition into U1/U2 by SW_U1/U2_REQUEST_ENABLE instead of SW_U1/U2_ACCEPT_ENABLE; Signed-off-by: Chunfeng Yun Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/mtu3/mtu3_core.c | 4 +++- drivers/usb/mtu3/mtu3_gadget_ep0.c | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/usb/mtu3/mtu3_core.c b/drivers/usb/mtu3/mtu3_core.c index 947579842ad7..95978e3b363e 100644 --- a/drivers/usb/mtu3/mtu3_core.c +++ b/drivers/usb/mtu3/mtu3_core.c @@ -564,8 +564,10 @@ static void mtu3_regs_init(struct mtu3 *mtu) if (mtu->is_u3_ip) { /* disable LGO_U1/U2 by default */ mtu3_clrbits(mbase, U3D_LINK_POWER_CONTROL, - SW_U1_ACCEPT_ENABLE | SW_U2_ACCEPT_ENABLE | SW_U1_REQUEST_ENABLE | SW_U2_REQUEST_ENABLE); + /* enable accept LGO_U1/U2 link command from host */ + mtu3_setbits(mbase, U3D_LINK_POWER_CONTROL, + SW_U1_ACCEPT_ENABLE | SW_U2_ACCEPT_ENABLE); /* device responses to u3_exit from host automatically */ mtu3_clrbits(mbase, U3D_LTSSM_CTRL, SOFT_U3_EXIT_EN); /* automatically build U2 link when U3 detect fail */ diff --git a/drivers/usb/mtu3/mtu3_gadget_ep0.c b/drivers/usb/mtu3/mtu3_gadget_ep0.c index 958d74dd2b78..7997cf5f06fc 100644 --- a/drivers/usb/mtu3/mtu3_gadget_ep0.c +++ b/drivers/usb/mtu3/mtu3_gadget_ep0.c @@ -335,9 +335,9 @@ static int ep0_handle_feature_dev(struct mtu3 *mtu, lpc = mtu3_readl(mbase, U3D_LINK_POWER_CONTROL); if (set) - lpc |= SW_U1_ACCEPT_ENABLE; + lpc |= SW_U1_REQUEST_ENABLE; else - lpc &= ~SW_U1_ACCEPT_ENABLE; + lpc &= ~SW_U1_REQUEST_ENABLE; mtu3_writel(mbase, U3D_LINK_POWER_CONTROL, lpc); mtu->u1_enable = !!set; @@ -350,9 +350,9 @@ static int ep0_handle_feature_dev(struct mtu3 *mtu, lpc = mtu3_readl(mbase, U3D_LINK_POWER_CONTROL); if (set) - lpc |= SW_U2_ACCEPT_ENABLE; + lpc |= SW_U2_REQUEST_ENABLE; else - lpc &= ~SW_U2_ACCEPT_ENABLE; + lpc &= ~SW_U2_REQUEST_ENABLE; mtu3_writel(mbase, U3D_LINK_POWER_CONTROL, lpc); mtu->u2_enable = !!set; From c1cb9b7911324f44cc21447299c4c401a3f6a0cb Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 5 Dec 2018 18:11:51 +0800 Subject: [PATCH 1716/2608] clk: sunxi-ng: a33: Set CLK_SET_RATE_PARENT for all audio module clocks [ Upstream commit 6e6da2039c82271dd873b9ad2b902a692a7dd554 ] All the audio interfaces on Allwinner SoCs need to change their module clocks during operation, to switch between support for 44.1 kHz and 48 kHz family sample rates. The clock rate for the module clocks is governed by their upstream audio PLL. The module clocks themselves only have a gate, and sometimes a divider or mux. Thus any rate changes need to be propagated upstream. Set the CLK_SET_RATE_PARENT flag for all audio module clocks to achieve this. Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- drivers/clk/sunxi-ng/ccu-sun8i-a33.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c index 13eb5b23c5e7..c40d572a7602 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c @@ -366,10 +366,10 @@ static SUNXI_CCU_MP_WITH_MUX_GATE(spi1_clk, "spi1", mod0_default_parents, 0x0a4, static const char * const i2s_parents[] = { "pll-audio-8x", "pll-audio-4x", "pll-audio-2x", "pll-audio" }; static SUNXI_CCU_MUX_WITH_GATE(i2s0_clk, "i2s0", i2s_parents, - 0x0b0, 16, 2, BIT(31), 0); + 0x0b0, 16, 2, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_MUX_WITH_GATE(i2s1_clk, "i2s1", i2s_parents, - 0x0b4, 16, 2, BIT(31), 0); + 0x0b4, 16, 2, BIT(31), CLK_SET_RATE_PARENT); /* TODO: the parent for most of the USB clocks is not known */ static SUNXI_CCU_GATE(usb_phy0_clk, "usb-phy0", "osc24M", @@ -446,7 +446,7 @@ static SUNXI_CCU_M_WITH_GATE(ve_clk, "ve", "pll-ve", static SUNXI_CCU_GATE(ac_dig_clk, "ac-dig", "pll-audio", 0x140, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_GATE(ac_dig_4x_clk, "ac-dig-4x", "pll-audio-4x", - 0x140, BIT(30), 0); + 0x140, BIT(30), CLK_SET_RATE_PARENT); static SUNXI_CCU_GATE(avs_clk, "avs", "osc24M", 0x144, BIT(31), 0); From bed61b98c78a357e10d790924cff744be0e8fec7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 28 Nov 2018 16:32:11 -0800 Subject: [PATCH 1717/2608] driver core: Move async_synchronize_full call [ Upstream commit c37d721c68ad88925ba0e72f6e14acb829a8c6bb ] Move the async_synchronize_full call out of __device_release_driver and into driver_detach. The idea behind this is that the async_synchronize_full call will only guarantee that any existing async operations are flushed. This doesn't do anything to guarantee that a hotplug event that may occur while we are doing the release of the driver will not be asynchronously scheduled. By moving this into the driver_detach path we can avoid potential deadlocks as we aren't holding the device lock at this point and we should not have the driver we want to flush loaded so the flush will take care of any asynchronous events the driver we are detaching might have scheduled. Fixes: 765230b5f084 ("driver-core: add asynchronous probing support for drivers") Reviewed-by: Bart Van Assche Reviewed-by: Dan Williams Signed-off-by: Alexander Duyck Reviewed-by: Luis Chamberlain Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/dd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 55fc31f6fe7f..d928cc6d0638 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -813,9 +813,6 @@ static void __device_release_driver(struct device *dev, struct device *parent) drv = dev->driver; if (drv) { - if (driver_allows_async_probing(drv)) - async_synchronize_full(); - while (device_links_busy(dev)) { device_unlock(dev); if (parent) @@ -920,6 +917,9 @@ void driver_detach(struct device_driver *drv) struct device_private *dev_prv; struct device *dev; + if (driver_allows_async_probing(drv)) + async_synchronize_full(); + for (;;) { spin_lock(&drv->p->klist_devices.k_lock); if (list_empty(&drv->p->klist_devices.k_list)) { From 53dedc8d521acf94f6b6c9ed446545f9ca8b86f1 Mon Sep 17 00:00:00 2001 From: Peter Rajnoha Date: Wed, 5 Dec 2018 12:27:44 +0100 Subject: [PATCH 1718/2608] kobject: return error code if writing /sys/.../uevent fails [ Upstream commit df44b479654f62b478c18ee4d8bc4e9f897a9844 ] Propagate error code back to userspace if writing the /sys/.../uevent file fails. Before, the write operation always returned with success, even if we failed to recognize the input string or if we failed to generate the uevent itself. With the error codes properly propagated back to userspace, we are able to react in userspace accordingly by not assuming and awaiting a uevent that is not delivered. Signed-off-by: Peter Rajnoha Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/bus.c | 12 ++++++++---- drivers/base/core.c | 8 +++++++- kernel/module.c | 6 ++++-- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 1cf1460f8c90..3464c49dad0d 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -616,8 +616,10 @@ static void remove_probe_files(struct bus_type *bus) static ssize_t uevent_store(struct device_driver *drv, const char *buf, size_t count) { - kobject_synth_uevent(&drv->p->kobj, buf, count); - return count; + int rc; + + rc = kobject_synth_uevent(&drv->p->kobj, buf, count); + return rc ? rc : count; } static DRIVER_ATTR_WO(uevent); @@ -833,8 +835,10 @@ static void klist_devices_put(struct klist_node *n) static ssize_t bus_uevent_store(struct bus_type *bus, const char *buf, size_t count) { - kobject_synth_uevent(&bus->p->subsys.kobj, buf, count); - return count; + int rc; + + rc = kobject_synth_uevent(&bus->p->subsys.kobj, buf, count); + return rc ? rc : count; } static BUS_ATTR(uevent, S_IWUSR, NULL, bus_uevent_store); diff --git a/drivers/base/core.c b/drivers/base/core.c index fc5bbb2519fe..1c67bf24bc23 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -991,8 +991,14 @@ out: static ssize_t uevent_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - if (kobject_synth_uevent(&dev->kobj, buf, count)) + int rc; + + rc = kobject_synth_uevent(&dev->kobj, buf, count); + + if (rc) { dev_err(dev, "uevent: failed to send synthetic uevent\n"); + return rc; + } return count; } diff --git a/kernel/module.c b/kernel/module.c index 2a44c515f0d7..94528b891027 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1201,8 +1201,10 @@ static ssize_t store_uevent(struct module_attribute *mattr, struct module_kobject *mk, const char *buffer, size_t count) { - kobject_synth_uevent(&mk->kobj, buffer, count); - return count; + int rc; + + rc = kobject_synth_uevent(&mk->kobj, buffer, count); + return rc ? rc : count; } struct module_attribute module_uevent = From c98b5ee3001b86918296870cda83c114d9a19c9b Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Wed, 28 Nov 2018 10:22:09 -0800 Subject: [PATCH 1719/2608] IB/hfi1: Unreserve a reserved request when it is completed [ Upstream commit ca95f802ef5139722acc8d30aeaab6fe5bbe939e ] Currently, When a reserved operation is completed, its entry in the send queue will not be unreserved, which leads to the miscalculation of qp->s_avail and thus the triggering of a WARN_ON call trace. This patch fixes the problem by unreserving the reserved operation when it is completed. Fixes: 856cc4c237ad ("IB/hfi1: Add the capability for reserved operations") Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/rc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 818bac1a4056..d3b8cb92fd6d 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1162,6 +1162,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 && cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; + rvt_qp_wqe_unreserve(qp, wqe); s_last = qp->s_last; trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) @@ -1214,6 +1215,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, u32 s_last; rvt_put_swqe(wqe); + rvt_qp_wqe_unreserve(qp, wqe); s_last = qp->s_last; trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) From f138b2306de828231cb2a6b4a5584397a4d4e2ca Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 3 Dec 2018 11:28:47 +0200 Subject: [PATCH 1720/2608] usb: dwc3: trace: add missing break statement to make compiler happy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 54d48183d21e03f780053d7129312049cb5dd591 ] The missed break statement in the outer switch makes the code fall through always and thus always same value will be printed. Besides that, compiler warns about missed fall through marker: drivers/usb/dwc3/./trace.h: In function ‘trace_raw_output_dwc3_log_trb’: drivers/usb/dwc3/./trace.h:246:4: warning: this statement may fall through [-Wimplicit-fallthrough=] switch (pcm) { ^~~~~~ Add the missing break statement to work correctly without compilation warnings. Fixes: fa8d965d736b ("usb: dwc3: trace: pretty print high-bandwidth transfers too") Cc: Felipe Balbi Signed-off-by: Andy Shevchenko Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/dwc3/trace.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/trace.h b/drivers/usb/dwc3/trace.h index 6504b116da04..62ec20a26013 100644 --- a/drivers/usb/dwc3/trace.h +++ b/drivers/usb/dwc3/trace.h @@ -262,9 +262,11 @@ DECLARE_EVENT_CLASS(dwc3_log_trb, s = "2x "; break; case 3: + default: s = "3x "; break; } + break; default: s = ""; } s; }), From 114edb495729be76ab7222daa5bc2fe9da094ba7 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Sun, 2 Dec 2018 11:04:17 +0100 Subject: [PATCH 1721/2608] pinctrl: sx150x: handle failure case of devm_kstrdup [ Upstream commit a9d9f6b83f1bb05da849b3540e6d1f70ef1c2343 ] devm_kstrdup() may return NULL if internal allocation failed. Thus using label, name is unsafe without checking. Therefor in the unlikely case of allocation failure, sx150x_probe() simply returns -ENOMEM. Signed-off-by: Nicholas Mc Guire Fixes: 9e80f9064e73 ("pinctrl: Add SX150X GPIO Extender Pinctrl Driver") Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-sx150x.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/pinctrl-sx150x.c b/drivers/pinctrl/pinctrl-sx150x.c index 70a0228f4e7f..2d0f4f760326 100644 --- a/drivers/pinctrl/pinctrl-sx150x.c +++ b/drivers/pinctrl/pinctrl-sx150x.c @@ -1166,7 +1166,6 @@ static int sx150x_probe(struct i2c_client *client, } /* Register GPIO controller */ - pctl->gpio.label = devm_kstrdup(dev, client->name, GFP_KERNEL); pctl->gpio.base = -1; pctl->gpio.ngpio = pctl->data->npins; pctl->gpio.get_direction = sx150x_gpio_get_direction; @@ -1180,6 +1179,10 @@ static int sx150x_probe(struct i2c_client *client, pctl->gpio.of_node = dev->of_node; #endif pctl->gpio.can_sleep = true; + pctl->gpio.label = devm_kstrdup(dev, client->name, GFP_KERNEL); + if (!pctl->gpio.label) + return -ENOMEM; + /* * Setting multiple pins is not safe when all pins are not * handled by the same regmap register. The oscio pin (present @@ -1200,13 +1203,15 @@ static int sx150x_probe(struct i2c_client *client, /* Add Interrupt support if an irq is specified */ if (client->irq > 0) { - pctl->irq_chip.name = devm_kstrdup(dev, client->name, - GFP_KERNEL); pctl->irq_chip.irq_mask = sx150x_irq_mask; pctl->irq_chip.irq_unmask = sx150x_irq_unmask; pctl->irq_chip.irq_set_type = sx150x_irq_set_type; pctl->irq_chip.irq_bus_lock = sx150x_irq_bus_lock; pctl->irq_chip.irq_bus_sync_unlock = sx150x_irq_bus_sync_unlock; + pctl->irq_chip.name = devm_kstrdup(dev, client->name, + GFP_KERNEL); + if (!pctl->irq_chip.name) + return -ENOMEM; pctl->irq.masked = ~0; pctl->irq.sense = 0; From fa5df0d190e2b637d8e42f90fa8ae2df37212b17 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 6 Dec 2018 14:39:15 -0700 Subject: [PATCH 1722/2608] iommu/amd: Fix amd_iommu=force_isolation [ Upstream commit c12b08ebbe16f0d3a96a116d86709b04c1ee8e74 ] The parameter is still there but it's ignored. We need to check its value before deciding to go into passthrough mode for AMD IOMMU v2 capable device. We occasionally use this parameter to force v2 capable device into translation mode to debug memory corruption that we suspect is caused by DMA writes. To address the following comment from Joerg Roedel on the first version, v2 capability of device is completely ignored. > This breaks the iommu_v2 use-case, as it needs a direct mapping for the > devices that support it. And from Documentation/admin-guide/kernel-parameters.txt: This option does not override iommu=pt Fixes: aafd8ba0ca74 ("iommu/amd: Implement add_device and remove_device") Signed-off-by: Yu Zhao Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index efa6cd2500b9..766103ea237e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -442,7 +442,14 @@ static int iommu_init_device(struct device *dev) dev_data->alias = get_alias(dev); - if (dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) { + /* + * By default we use passthrough mode for IOMMUv2 capable device. + * But if amd_iommu=force_isolation is set (e.g. to debug DMA to + * invalid address), we ignore the capability for the device so + * it'll be forced to go into translation mode. + */ + if ((iommu_pass_through || !amd_iommu_force_isolation) && + dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) { struct amd_iommu *iommu; iommu = amd_iommu_rlookup_table[dev_data->devid]; From 43f93ca2607bc08f1165a77c4044ba76dba13504 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Fri, 7 Dec 2018 09:17:07 -0800 Subject: [PATCH 1723/2608] ARM: dts: Fix OMAP4430 SDP Ethernet startup [ Upstream commit 84fb6c7feb1494ebb7d1ec8b95cfb7ada0264465 ] It was noticed that unbinding and rebinding the KSZ8851 ethernet resulted in the driver reporting "failed to read device ID" at probe. Probing the reset line with a 'scope while repeatedly attempting to bind the driver in a shell loop revealed that the KSZ8851 RSTN pin is constantly held at zero, meaning the device is held in reset, and does not respond on the SPI bus. Experimentation with the startup delay on the regulator set to 50ms shows that the reset is positively released after 20ms. Schematics for this board are not available, and the traces are buried in the inner layers of the board which makes tracing where the RSTN pin extremely difficult. We can only guess that the RSTN pin is wired to a reset generator chip driven off the ethernet supply, which fits the observed behaviour. Include this delay in the regulator startup delay - effectively treating the reset as a "supply stable" indicator. This can not be modelled as a delay in the KSZ8851 driver since the reset generation is board specific - if the RSTN pin had been wired to a GPIO, reset could be released earlier via the already provided support in the KSZ8851 driver. This also got confirmed by Peter Ujfalusi based on Blaze schematics that should be very close to SDP4430: TPS22902YFPR is used as the regulator switch (gpio48 controlled): Convert arm boot_lock to raw The VOUT is routed to TPS3808G01DBV. (SCH Note: Threshold set at 90%. Vsense: 0.405V). According to the TPS3808 data sheet the RESET delay time when Ct is open (this is the case in the schema): MIN/TYP/MAX: 12/20/28 ms. Signed-off-by: Russell King Reviewed-by: Peter Ujfalusi [tony@atomide.com: updated with notes from schematics from Peter] Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/omap4-sdp.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts index 280d92d42bf1..bfad6aadfe88 100644 --- a/arch/arm/boot/dts/omap4-sdp.dts +++ b/arch/arm/boot/dts/omap4-sdp.dts @@ -33,6 +33,7 @@ gpio = <&gpio2 16 GPIO_ACTIVE_HIGH>; /* gpio line 48 */ enable-active-high; regulator-boot-on; + startup-delay-us = <25000>; }; vbat: fixedregulator-vbat { From fe4ae21bb039aa4237028f10441c69da4635a284 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Mon, 3 Dec 2018 17:27:54 -0500 Subject: [PATCH 1724/2608] mips: bpf: fix encoding bug for mm_srlv32_op [ Upstream commit 17f6c83fb5ebf7db4fcc94a5be4c22d5a7bfe428 ] For micro-mips, srlv inside POOL32A encoding space should use 0x50 sub-opcode, NOT 0x90. Some early version ISA doc describes the encoding as 0x90 for both srlv and srav, this looks to me was a typo. I checked Binutils libopcode implementation which is using 0x50 for srlv and 0x90 for srav. v1->v2: - Keep mm_srlv32_op sorted by value. Fixes: f31318fdf324 ("MIPS: uasm: Add srlv uasm instruction") Cc: Markos Chandras Cc: Paul Burton Cc: linux-mips@vger.kernel.org Acked-by: Jakub Kicinski Acked-by: Song Liu Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- arch/mips/include/uapi/asm/inst.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h index c05dcf5ab414..273ef58f4d43 100644 --- a/arch/mips/include/uapi/asm/inst.h +++ b/arch/mips/include/uapi/asm/inst.h @@ -369,8 +369,8 @@ enum mm_32a_minor_op { mm_ext_op = 0x02c, mm_pool32axf_op = 0x03c, mm_srl32_op = 0x040, + mm_srlv32_op = 0x050, mm_sra_op = 0x080, - mm_srlv32_op = 0x090, mm_rotr_op = 0x0c0, mm_lwxs_op = 0x118, mm_addu32_op = 0x150, From 1bba9e190b83e5f787f400adf12c96054708552c Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Wed, 28 Nov 2018 08:01:22 -0500 Subject: [PATCH 1725/2608] media: coda: fix H.264 deblocking filter controls [ Upstream commit 75fa6e4f83a0923fe753827d354998d448b4fd6a ] Add support for the third loop filter mode V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_DISABLED_AT_SLICE_BOUNDARY, and fix V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA and V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA controls. The filter offset controls are signed values in the -6 to 6 range and are stored into the slice header fields slice_alpha_c0_offset_div2 and slice_beta_offset_div2. The actual filter offsets FilterOffsetA/B are double their value, in range of -12 to 12. Rename variables to more closely match the nomenclature in the H.264 specification. Signed-off-by: Philipp Zabel Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/coda/coda-bit.c | 19 +++++++++---------- drivers/media/platform/coda/coda-common.c | 15 +++++++-------- drivers/media/platform/coda/coda.h | 6 +++--- drivers/media/platform/coda/coda_regs.h | 2 +- 4 files changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/media/platform/coda/coda-bit.c b/drivers/media/platform/coda/coda-bit.c index 291c40933935..3457a5f1c8a8 100644 --- a/drivers/media/platform/coda/coda-bit.c +++ b/drivers/media/platform/coda/coda-bit.c @@ -953,16 +953,15 @@ static int coda_start_encoding(struct coda_ctx *ctx) else coda_write(dev, CODA_STD_H264, CODA_CMD_ENC_SEQ_COD_STD); - if (ctx->params.h264_deblk_enabled) { - value = ((ctx->params.h264_deblk_alpha & - CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) << - CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) | - ((ctx->params.h264_deblk_beta & - CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) << - CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET); - } else { - value = 1 << CODA_264PARAM_DISABLEDEBLK_OFFSET; - } + value = ((ctx->params.h264_disable_deblocking_filter_idc & + CODA_264PARAM_DISABLEDEBLK_MASK) << + CODA_264PARAM_DISABLEDEBLK_OFFSET) | + ((ctx->params.h264_slice_alpha_c0_offset_div2 & + CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) << + CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) | + ((ctx->params.h264_slice_beta_offset_div2 & + CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) << + CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET); coda_write(dev, value, CODA_CMD_ENC_SEQ_264_PARA); break; case V4L2_PIX_FMT_JPEG: diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c index 99d138d3f87f..2e1472fadc2c 100644 --- a/drivers/media/platform/coda/coda-common.c +++ b/drivers/media/platform/coda/coda-common.c @@ -1675,14 +1675,13 @@ static int coda_s_ctrl(struct v4l2_ctrl *ctrl) ctx->params.h264_max_qp = ctrl->val; break; case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA: - ctx->params.h264_deblk_alpha = ctrl->val; + ctx->params.h264_slice_alpha_c0_offset_div2 = ctrl->val; break; case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA: - ctx->params.h264_deblk_beta = ctrl->val; + ctx->params.h264_slice_beta_offset_div2 = ctrl->val; break; case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE: - ctx->params.h264_deblk_enabled = (ctrl->val == - V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_ENABLED); + ctx->params.h264_disable_deblocking_filter_idc = ctrl->val; break; case V4L2_CID_MPEG_VIDEO_H264_PROFILE: /* TODO: switch between baseline and constrained baseline */ @@ -1764,13 +1763,13 @@ static void coda_encode_ctrls(struct coda_ctx *ctx) v4l2_ctrl_new_std(&ctx->ctrls, &coda_ctrl_ops, V4L2_CID_MPEG_VIDEO_H264_MAX_QP, 0, 51, 1, 51); v4l2_ctrl_new_std(&ctx->ctrls, &coda_ctrl_ops, - V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA, 0, 15, 1, 0); + V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA, -6, 6, 1, 0); v4l2_ctrl_new_std(&ctx->ctrls, &coda_ctrl_ops, - V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA, 0, 15, 1, 0); + V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA, -6, 6, 1, 0); v4l2_ctrl_new_std_menu(&ctx->ctrls, &coda_ctrl_ops, V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE, - V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_DISABLED, 0x0, - V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_ENABLED); + V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_DISABLED_AT_SLICE_BOUNDARY, + 0x0, V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_ENABLED); v4l2_ctrl_new_std_menu(&ctx->ctrls, &coda_ctrl_ops, V4L2_CID_MPEG_VIDEO_H264_PROFILE, V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE, 0x0, diff --git a/drivers/media/platform/coda/coda.h b/drivers/media/platform/coda/coda.h index c5f504d8cf67..389a882cc3da 100644 --- a/drivers/media/platform/coda/coda.h +++ b/drivers/media/platform/coda/coda.h @@ -114,9 +114,9 @@ struct coda_params { u8 h264_inter_qp; u8 h264_min_qp; u8 h264_max_qp; - u8 h264_deblk_enabled; - u8 h264_deblk_alpha; - u8 h264_deblk_beta; + u8 h264_disable_deblocking_filter_idc; + s8 h264_slice_alpha_c0_offset_div2; + s8 h264_slice_beta_offset_div2; u8 h264_profile_idc; u8 h264_level_idc; u8 mpeg4_intra_qp; diff --git a/drivers/media/platform/coda/coda_regs.h b/drivers/media/platform/coda/coda_regs.h index 38df5fd9a2fa..546f5762357c 100644 --- a/drivers/media/platform/coda/coda_regs.h +++ b/drivers/media/platform/coda/coda_regs.h @@ -292,7 +292,7 @@ #define CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET 8 #define CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK 0x0f #define CODA_264PARAM_DISABLEDEBLK_OFFSET 6 -#define CODA_264PARAM_DISABLEDEBLK_MASK 0x01 +#define CODA_264PARAM_DISABLEDEBLK_MASK 0x03 #define CODA_264PARAM_CONSTRAINEDINTRAPREDFLAG_OFFSET 5 #define CODA_264PARAM_CONSTRAINEDINTRAPREDFLAG_MASK 0x01 #define CODA_264PARAM_CHROMAQPOFFSET_OFFSET 0 From 03a127ef9b58df879594dbac1dd6cc5b38acaab9 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 17 May 2018 17:00:10 +0200 Subject: [PATCH 1726/2608] ARM: dts: Fix up the D-Link DIR-685 MTD partition info [ Upstream commit 738a05e673435afb986b53da43befd83ad87ec3b ] The vendor firmware was analyzed to get the right idea about this flash layout. /proc/mtd contains: dev: size erasesize name mtd0: 01e7ff40 00020000 "rootfs" mtd1: 01f40000 00020000 "upgrade" mtd2: 00040000 00020000 "rgdb" mtd3: 00020000 00020000 "nvram" mtd4: 00040000 00020000 "RedBoot" mtd5: 00020000 00020000 "LangPack" mtd6: 02000000 00020000 "flash" Here "flash" is obviously the whole device and we know "rootfs" is a bogus hack to point to a squashfs rootfs inside of the main "upgrade partition". We know "RedBoot" is the first 0x40000 of the flash and the "upgrade" partition follows from 0x40000 to 0x1f8000. So we have mtd0, 1, 4 and 6 covered. Remains: mtd2: 00040000 00020000 "rgdb" mtd3: 00020000 00020000 "nvram" mtd5: 00020000 00020000 "LangPack" Inspecting the flash at 0x1f8000 and 0x1fa000 reveals each of these starting with "RGCFG1" so we assume 0x1f8000-1fbfff is "rgdb" of 0x40000. Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- arch/arm/boot/dts/gemini-dlink-dir-685.dts | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/arm/boot/dts/gemini-dlink-dir-685.dts b/arch/arm/boot/dts/gemini-dlink-dir-685.dts index e75e2d44371c..d6f752ab07bb 100644 --- a/arch/arm/boot/dts/gemini-dlink-dir-685.dts +++ b/arch/arm/boot/dts/gemini-dlink-dir-685.dts @@ -128,20 +128,16 @@ read-only; }; /* - * Between the boot loader and the rootfs is the kernel - * in a custom Storlink format flashed from the boot - * menu. The rootfs is in squashfs format. + * This firmware image contains the kernel catenated + * with the squashfs root filesystem. For some reason + * this is called "upgrade" on the vendor system. */ - partition@1800c0 { - label = "rootfs"; - reg = <0x001800c0 0x01dbff40>; - read-only; - }; - partition@1f40000 { + partition@40000 { label = "upgrade"; - reg = <0x01f40000 0x00040000>; + reg = <0x00040000 0x01f40000>; read-only; }; + /* RGDB, Residental Gateway Database? */ partition@1f80000 { label = "rgdb"; reg = <0x01f80000 0x00040000>; From 18ddd41b3ea1a064425fdf2bbdfc5b3cd6e4a185 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 7 Nov 2018 20:46:02 +0100 Subject: [PATCH 1727/2608] watchdog: renesas_wdt: don't set divider while watchdog is running [ Upstream commit e990e12741877e9bfac402ca468f4007a75f6e2a ] The datasheet says we must stop the timer before changing the clock divider. This can happen when the restart handler is called while the watchdog is running. Signed-off-by: Wolfram Sang Reviewed-by: Fabrizio Castro Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/renesas_wdt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/renesas_wdt.c b/drivers/watchdog/renesas_wdt.c index 831ef83f6de1..c4a17d72d025 100644 --- a/drivers/watchdog/renesas_wdt.c +++ b/drivers/watchdog/renesas_wdt.c @@ -74,12 +74,17 @@ static int rwdt_init_timeout(struct watchdog_device *wdev) static int rwdt_start(struct watchdog_device *wdev) { struct rwdt_priv *priv = watchdog_get_drvdata(wdev); + u8 val; pm_runtime_get_sync(wdev->parent); - rwdt_write(priv, 0, RWTCSRB); - rwdt_write(priv, priv->cks, RWTCSRA); + /* Stop the timer before we modify any register */ + val = readb_relaxed(priv->base + RWTCSRA) & ~RWTCSRA_TME; + rwdt_write(priv, val, RWTCSRA); + rwdt_init_timeout(wdev); + rwdt_write(priv, priv->cks, RWTCSRA); + rwdt_write(priv, 0, RWTCSRB); while (readb_relaxed(priv->base + RWTCSRA) & RWTCSRA_WRFLG) cpu_relax(); From b38ab62237c9c1cd51886348f10ef71826c8040f Mon Sep 17 00:00:00 2001 From: Tejas Joglekar Date: Mon, 10 Dec 2018 16:08:13 +0530 Subject: [PATCH 1728/2608] usb: dwc3: gadget: Disable CSP for stream OUT ep [ Upstream commit 244add8ebfb231c39db9e33b204bd0ce8f24f782 ] In stream mode, when fast-forwarding TRBs, the stream number is not cleared causing the new stream to not get assigned. So we don't want controller to carry on transfers when short packet is received. So disable the CSP for stream capable endpoint. This is based on the 3.30a Programming guide, where table 3-1 device descriptor structure field definitions says for CSP bit If this bit is 0, the controller generates an XferComplete event and remove the stream. So if we keep CSP as 1 then switching between streams would not happen as in stream mode, when fast-forwarding TRBs, the stream number is not cleared causing the new stream to not get assigned. Signed-off-by: Tejas Joglekar Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/dwc3/gadget.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index b80eefa355b6..5b34994b7b08 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -959,9 +959,13 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, usb_endpoint_type(dep->endpoint.desc)); } - /* always enable Continue on Short Packet */ + /* + * Enable Continue on Short Packet + * when endpoint is not a stream capable + */ if (usb_endpoint_dir_out(dep->endpoint.desc)) { - trb->ctrl |= DWC3_TRB_CTRL_CSP; + if (!dep->stream_capable) + trb->ctrl |= DWC3_TRB_CTRL_CSP; if (short_not_ok) trb->ctrl |= DWC3_TRB_CTRL_ISP_IMI; From 4e8ca0d44cdecb61c02bc00e92d337ebdbd2e366 Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Tue, 4 Dec 2018 11:52:13 +0530 Subject: [PATCH 1729/2608] iommu/arm-smmu: Add support for qcom,smmu-v2 variant [ Upstream commit 89cddc563743cb1e0068867ac97013b2a5bf86aa ] qcom,smmu-v2 is an arm,smmu-v2 implementation with specific clock and power requirements. On msm8996, multiple cores, viz. mdss, video, etc. use this smmu. On sdm845, this smmu is used with gpu. Add bindings for the same. Signed-off-by: Vivek Gautam Reviewed-by: Rob Herring Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Robin Murphy Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/iommu/arm-smmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 15b5856475fc..01a6a0ea2a4f 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -117,6 +117,7 @@ enum arm_smmu_implementation { GENERIC_SMMU, ARM_MMU500, CAVIUM_SMMUV2, + QCOM_SMMUV2, }; /* Until ACPICA headers cover IORT rev. C */ @@ -1910,6 +1911,7 @@ ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500); ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2); +ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2); static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 }, @@ -1918,6 +1920,7 @@ static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,mmu-401", .data = &arm_mmu401 }, { .compatible = "arm,mmu-500", .data = &arm_mmu500 }, { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 }, + { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 }, { }, }; MODULE_DEVICE_TABLE(of, arm_smmu_of_match); From d765bc20bd4a552fedcd1c62a3a2117c48ee04e9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 7 Nov 2018 22:58:24 +0000 Subject: [PATCH 1730/2608] iommu/arm-smmu-v3: Use explicit mb() when moving cons pointer [ Upstream commit a868e8530441286342f90c1fd9c5f24de3aa2880 ] After removing an entry from a queue (e.g. reading an event in arm_smmu_evtq_thread()) it is necessary to advance the MMIO consumer pointer to free the queue slot back to the SMMU. A memory barrier is required here so that all reads targetting the queue entry have completed before the consumer pointer is updated. The implementation of queue_inc_cons() relies on a writel() to complete the previous reads, but this is incorrect because writel() is only guaranteed to complete prior writes. This patch replaces the call to writel() with an mb(); writel_relaxed() sequence, which gives us the read->write ordering which we require. Cc: Robin Murphy Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/iommu/arm-smmu-v3.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 26e99c03390f..09eb258a9a7d 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -730,7 +730,13 @@ static void queue_inc_cons(struct arm_smmu_queue *q) u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1; q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); - writel(q->cons, q->cons_reg); + + /* + * Ensure that all CPU accesses (reads and writes) to the queue + * are complete before we update the cons pointer. + */ + mb(); + writel_relaxed(q->cons, q->cons_reg); } static int queue_sync_prod(struct arm_smmu_queue *q) From cca840d0bba7891a920c7771fb620d3d765d3133 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 24 Nov 2018 21:14:16 +0300 Subject: [PATCH 1731/2608] sata_rcar: fix deferred probing [ Upstream commit 9f83cfdb1ace3ef268ecc6fda50058d2ec37d603 ] The driver overrides the error codes returned by platform_get_irq() to -EINVAL, so if it returns -EPROBE_DEFER, the driver would fail the probe permanently instead of the deferred probing. Switch to propagating the error code upstream, still checking/overriding IRQ0 as libata regards it as "no IRQ" (thus polling) anyway... Fixes: 9ec36cafe43b ("of/irq: do irq resolution in platform_get_irq") Reviewed-by: Simon Horman Reviewed-by: Geert Uytterhoeven Signed-off-by: Sergei Shtylyov Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/ata/sata_rcar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 537d11869069..3e82a4ac239e 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -880,7 +880,9 @@ static int sata_rcar_probe(struct platform_device *pdev) int ret = 0; irq = platform_get_irq(pdev, 0); - if (irq <= 0) + if (irq < 0) + return irq; + if (!irq) return -EINVAL; priv = devm_kzalloc(&pdev->dev, sizeof(struct sata_rcar_priv), From bda9f846ae0708a42588235b68533255058bd94d Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Fri, 30 Nov 2018 07:23:47 +0000 Subject: [PATCH 1732/2608] clk: imx6sl: ensure MMDC CH0 handshake is bypassed [ Upstream commit 0efcc2c0fd2001a83240a8c3d71f67770484917e ] Same as other i.MX6 SoCs, ensure unused MMDC channel's handshake is bypassed, this is to make sure no request signal will be generated when periphe_clk_sel is changed or SRC warm reset is triggered. Signed-off-by: Anson Huang Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-imx6sl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/clk/imx/clk-imx6sl.c b/drivers/clk/imx/clk-imx6sl.c index 9642cdf0fb88..c264a744fae8 100644 --- a/drivers/clk/imx/clk-imx6sl.c +++ b/drivers/clk/imx/clk-imx6sl.c @@ -17,6 +17,8 @@ #include "clk.h" +#define CCDR 0x4 +#define BM_CCM_CCDR_MMDC_CH0_MASK (1 << 17) #define CCSR 0xc #define BM_CCSR_PLL1_SW_CLK_SEL (1 << 2) #define CACRR 0x10 @@ -414,6 +416,10 @@ static void __init imx6sl_clocks_init(struct device_node *ccm_node) clks[IMX6SL_CLK_USDHC3] = imx_clk_gate2("usdhc3", "usdhc3_podf", base + 0x80, 6); clks[IMX6SL_CLK_USDHC4] = imx_clk_gate2("usdhc4", "usdhc4_podf", base + 0x80, 8); + /* Ensure the MMDC CH0 handshake is bypassed */ + writel_relaxed(readl_relaxed(base + CCDR) | + BM_CCM_CCDR_MMDC_CH0_MASK, base + CCDR); + imx_check_clocks(clks, ARRAY_SIZE(clks)); clk_data.clks = clks; From ebe9a3ca07d84556b007fa1418cf9501660af4d0 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Mon, 10 Dec 2018 11:26:41 -0500 Subject: [PATCH 1733/2608] cpuidle: big.LITTLE: fix refcount leak [ Upstream commit 9456823c842f346c74265fcd98d008d87a7eb6f5 ] of_find_node_by_path() acquires a reference to the node returned by it and that reference needs to be dropped by its caller. bl_idle_init() doesn't do that, so fix it. Signed-off-by: Yangtao Li Acked-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpuidle/cpuidle-big_little.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/cpuidle-big_little.c b/drivers/cpuidle/cpuidle-big_little.c index db2ede565f1a..b44476a1b7ad 100644 --- a/drivers/cpuidle/cpuidle-big_little.c +++ b/drivers/cpuidle/cpuidle-big_little.c @@ -167,6 +167,7 @@ static int __init bl_idle_init(void) { int ret; struct device_node *root = of_find_node_by_path("/"); + const struct of_device_id *match_id; if (!root) return -ENODEV; @@ -174,7 +175,11 @@ static int __init bl_idle_init(void) /* * Initialize the driver just for a compliant set of machines */ - if (!of_match_node(compatible_machine_match, root)) + match_id = of_match_node(compatible_machine_match, root); + + of_node_put(root); + + if (!match_id) return -ENODEV; if (!mcpm_is_available()) From 0d3194a346c586655c9b1d91c60e720eb81b6af7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 11 Dec 2018 16:32:47 +0530 Subject: [PATCH 1734/2608] OPP: Use opp_table->regulators to verify no regulator case [ Upstream commit 90e3577b5feb42bac1269e16bb3d2bdd8f6df40f ] The value of opp_table->regulator_count is not very consistent right now and it may end up being 0 while we do have a "opp-microvolt" property in the OPP table. It was kept that way as we used to check if any regulators are set with the OPP core for a device or not using value of regulator_count. Lets use opp_table->regulators for that purpose as the meaning of regulator_count is going to change in the later patches. Reported-by: Quentin Perret Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/base/power/opp/core.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index d4862775b9f6..d5e7e8cc4f22 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -192,12 +192,12 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) if (IS_ERR(opp_table)) return 0; - count = opp_table->regulator_count; - /* Regulator may not be required for the device */ - if (!count) + if (!opp_table->regulators) goto put_opp_table; + count = opp_table->regulator_count; + uV = kmalloc_array(count, sizeof(*uV), GFP_KERNEL); if (!uV) goto put_opp_table; @@ -921,6 +921,9 @@ static bool _opp_supported_by_regulators(struct dev_pm_opp *opp, struct regulator *reg; int i; + if (!opp_table->regulators) + return true; + for (i = 0; i < opp_table->regulator_count; i++) { reg = opp_table->regulators[i]; @@ -1226,7 +1229,7 @@ static int _allocate_set_opp_data(struct opp_table *opp_table) struct dev_pm_set_opp_data *data; int len, count = opp_table->regulator_count; - if (WARN_ON(!count)) + if (WARN_ON(!opp_table->regulators)) return -EINVAL; /* space for set_opp_data */ From 8e6df638f192fb96a9130a4c6fe34efd2b69fd88 Mon Sep 17 00:00:00 2001 From: "Adamski, Krzysztof (Nokia - PL/Wroclaw)" Date: Mon, 10 Dec 2018 15:01:27 +0000 Subject: [PATCH 1735/2608] i2c-axxia: check for error conditions first [ Upstream commit 4f5c85fe3a60ace555d09898166af372547f97fc ] It was observed that when using seqentional mode contrary to the documentation, the SS bit (which is supposed to only be set if automatic/sequence command completed normally), is sometimes set together with NA (NAK in address phase) causing transfer to falsely be considered successful. My assumption is that this does not happen during manual mode since the controller is stopping its work the moment it sets NA/ND bit in status register. This is not the case in Automatic/Sequentional mode where it is still working to send STOP condition and the actual status we get depends on the time when the ISR is run. This patch changes the order of checking status bits in ISR - error conditions are checked first and only if none of them occurred, the transfer may be considered successful. This is required to introduce using of sequentional mode in next patch. Signed-off-by: Krzysztof Adamski Reviewed-by: Alexander Sverdlin Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-axxia.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/i2c/busses/i2c-axxia.c b/drivers/i2c/busses/i2c-axxia.c index deea13838648..30d80ce0bde3 100644 --- a/drivers/i2c/busses/i2c-axxia.c +++ b/drivers/i2c/busses/i2c-axxia.c @@ -296,22 +296,7 @@ static irqreturn_t axxia_i2c_isr(int irq, void *_dev) i2c_int_disable(idev, MST_STATUS_TFL); } - if (status & MST_STATUS_SCC) { - /* Stop completed */ - i2c_int_disable(idev, ~MST_STATUS_TSS); - complete(&idev->msg_complete); - } else if (status & MST_STATUS_SNS) { - /* Transfer done */ - i2c_int_disable(idev, ~MST_STATUS_TSS); - if (i2c_m_rd(idev->msg) && idev->msg_xfrd < idev->msg->len) - axxia_i2c_empty_rx_fifo(idev); - complete(&idev->msg_complete); - } else if (status & MST_STATUS_TSS) { - /* Transfer timeout */ - idev->msg_err = -ETIMEDOUT; - i2c_int_disable(idev, ~MST_STATUS_TSS); - complete(&idev->msg_complete); - } else if (unlikely(status & MST_STATUS_ERR)) { + if (unlikely(status & MST_STATUS_ERR)) { /* Transfer error */ i2c_int_disable(idev, ~0); if (status & MST_STATUS_AL) @@ -328,6 +313,21 @@ static irqreturn_t axxia_i2c_isr(int irq, void *_dev) readl(idev->base + MST_TX_BYTES_XFRD), readl(idev->base + MST_TX_XFER)); complete(&idev->msg_complete); + } else if (status & MST_STATUS_SCC) { + /* Stop completed */ + i2c_int_disable(idev, ~MST_STATUS_TSS); + complete(&idev->msg_complete); + } else if (status & MST_STATUS_SNS) { + /* Transfer done */ + i2c_int_disable(idev, ~MST_STATUS_TSS); + if (i2c_m_rd(idev->msg) && idev->msg_xfrd < idev->msg->len) + axxia_i2c_empty_rx_fifo(idev); + complete(&idev->msg_complete); + } else if (status & MST_STATUS_TSS) { + /* Transfer timeout */ + idev->msg_err = -ETIMEDOUT; + i2c_int_disable(idev, ~MST_STATUS_TSS); + complete(&idev->msg_complete); } out: From 6d1709b73d6b4da752e86c207bd7016daede0fa0 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 4 Oct 2018 20:28:47 +0800 Subject: [PATCH 1736/2608] phy: sun4i-usb: add support for missing USB PHY index [ Upstream commit 2659392e5c08dff626e6db1d739adff58a94604d ] The new Allwinner H6 SoC's USB2 PHY has two holes -- USB1 (which is a 3.0 port with dedicated PHY) and USB2 (which doesn't exist at all). Add support for this kind of missing USB PHY index. Signed-off-by: Icenowy Zheng Reviewed-by: Chen-Yu Tsai Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Sasha Levin --- drivers/phy/allwinner/phy-sun4i-usb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/phy/allwinner/phy-sun4i-usb.c b/drivers/phy/allwinner/phy-sun4i-usb.c index afedb8cd1990..d1ccff527756 100644 --- a/drivers/phy/allwinner/phy-sun4i-usb.c +++ b/drivers/phy/allwinner/phy-sun4i-usb.c @@ -125,6 +125,7 @@ struct sun4i_usb_phy_cfg { bool dedicated_clocks; bool enable_pmu_unk1; bool phy0_dual_route; + int missing_phys; }; struct sun4i_usb_phy_data { @@ -645,6 +646,9 @@ static struct phy *sun4i_usb_phy_xlate(struct device *dev, if (args->args[0] >= data->cfg->num_phys) return ERR_PTR(-ENODEV); + if (data->cfg->missing_phys & BIT(args->args[0])) + return ERR_PTR(-ENODEV); + return data->phys[args->args[0]].phy; } @@ -740,6 +744,9 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev) struct sun4i_usb_phy *phy = data->phys + i; char name[16]; + if (data->cfg->missing_phys & BIT(i)) + continue; + snprintf(name, sizeof(name), "usb%d_vbus", i); phy->vbus = devm_regulator_get_optional(dev, name); if (IS_ERR(phy->vbus)) { From 285187e29616daed36a77812a96f1666eb2d6c5d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 12 Dec 2018 14:29:20 +0100 Subject: [PATCH 1737/2608] udf: Fix BUG on corrupted inode [ Upstream commit d288d95842f1503414b7eebce3773bac3390457e ] When inode is corrupted so that extent type is invalid, some functions (such as udf_truncate_extents()) will just BUG. Check that extent type is valid when loading the inode to memory. Reported-by: Anatoly Trosinenko Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/udf/inode.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 8dacf4f57414..28b9d7cca29b 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1357,6 +1357,12 @@ reread: iinfo->i_alloc_type = le16_to_cpu(fe->icbTag.flags) & ICBTAG_FLAG_AD_MASK; + if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_SHORT && + iinfo->i_alloc_type != ICBTAG_FLAG_AD_LONG && + iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { + ret = -EIO; + goto out; + } iinfo->i_unique = 0; iinfo->i_lenEAttr = 0; iinfo->i_lenExtents = 0; From cf96f234ef4f3ad6b43c968c77c307271537692a Mon Sep 17 00:00:00 2001 From: Joey Zhang Date: Mon, 10 Dec 2018 17:12:22 +0800 Subject: [PATCH 1738/2608] switchtec: Fix SWITCHTEC_IOCTL_EVENT_IDX_ALL flags overwrite [ Upstream commit e4a7dca5de625018b29417ecc39dc5037d9a5a36 ] In the ioctl_event_ctl() SWITCHTEC_IOCTL_EVENT_IDX_ALL case, we call event_ctl() several times with the same "ctl" struct. Each call clobbers ctl.flags, which leads to the problem that we may not actually enable or disable all events as the user requested. Preserve the event flag value with a temporary variable. Fixes: 52eabba5bcdb ("switchtec: Add IOCTLs to the Switchtec driver") Signed-off-by: Joey Zhang Signed-off-by: Wesley Sheng [bhelgaas: changelog] Signed-off-by: Bjorn Helgaas Reviewed-by: Logan Gunthorpe Signed-off-by: Sasha Levin --- drivers/pci/switch/switchtec.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index 620f5b995a12..e3aefdafae89 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -1064,6 +1064,7 @@ static int ioctl_event_ctl(struct switchtec_dev *stdev, { int ret; int nr_idxs; + unsigned int event_flags; struct switchtec_ioctl_event_ctl ctl; if (copy_from_user(&ctl, uctl, sizeof(ctl))) @@ -1085,7 +1086,9 @@ static int ioctl_event_ctl(struct switchtec_dev *stdev, else return -EINVAL; + event_flags = ctl.flags; for (ctl.index = 0; ctl.index < nr_idxs; ctl.index++) { + ctl.flags = event_flags; ret = event_ctl(stdev, &ctl); if (ret < 0) return ret; From f1aae01e9087a5c6da79894fa821d0bd23bd85b3 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 11 Dec 2018 19:20:52 -0800 Subject: [PATCH 1739/2608] selftests/bpf: use __bpf_constant_htons in test_prog.c [ Upstream commit a0517a0f7ef23550b4484c37e2b9c2d32abebf64 ] For some reason, my older GCC (< 4.8) isn't smart enough to optimize the !__builtin_constant_p() branch in bpf_htons, I see: error: implicit declaration of function '__builtin_bswap16' Let's use __bpf_constant_htons as suggested by Daniel Borkmann. I tried to use simple htons, but it produces the following: test_progs.c:54:17: error: braced-group within expression allowed only inside a function .eth.h_proto = htons(ETH_P_IP), Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/test_progs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 11ee25cea227..1903fb4f45d8 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -43,10 +43,10 @@ static struct { struct iphdr iph; struct tcphdr tcp; } __packed pkt_v4 = { - .eth.h_proto = bpf_htons(ETH_P_IP), + .eth.h_proto = __bpf_constant_htons(ETH_P_IP), .iph.ihl = 5, .iph.protocol = 6, - .iph.tot_len = bpf_htons(MAGIC_BYTES), + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.urg_ptr = 123, }; @@ -56,9 +56,9 @@ static struct { struct ipv6hdr iph; struct tcphdr tcp; } __packed pkt_v6 = { - .eth.h_proto = bpf_htons(ETH_P_IPV6), + .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), .iph.nexthdr = 6, - .iph.payload_len = bpf_htons(MAGIC_BYTES), + .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.urg_ptr = 123, }; From 94d4bd831683bb02d8c84f218c404bf0c08cc671 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 10 Dec 2018 22:58:39 +0100 Subject: [PATCH 1740/2608] ARM: pxa: avoid section mismatch warning [ Upstream commit 88af3209aa0881aa5ffd99664b6080a4be5f24e5 ] WARNING: vmlinux.o(.text+0x19f90): Section mismatch in reference from the function littleton_init_lcd() to the function .init.text:pxa_set_fb_info() The function littleton_init_lcd() references the function __init pxa_set_fb_info(). This is often because littleton_init_lcd lacks a __init annotation or the annotation of pxa_set_fb_info is wrong. WARNING: vmlinux.o(.text+0xf824): Section mismatch in reference from the function zeus_register_ohci() to the function .init.text:pxa_set_ohci_info() The function zeus_register_ohci() references the function __init pxa_set_ohci_info(). This is often because zeus_register_ohci lacks a __init annotation or the annotation of pxa_set_ohci_info is wrong. WARNING: vmlinux.o(.text+0xf95c): Section mismatch in reference from the function cm_x300_init_u2d() to the function .init.text:pxa3xx_set_u2d_info() The function cm_x300_init_u2d() references the function __init pxa3xx_set_u2d_info(). This is often because cm_x300_init_u2d lacks a __init annotation or the annotation of pxa3xx_set_u2d_info is wrong. Signed-off-by: Arnd Bergmann Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin --- arch/arm/mach-pxa/cm-x300.c | 2 +- arch/arm/mach-pxa/littleton.c | 2 +- arch/arm/mach-pxa/zeus.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c index 868448d2cd82..38ab30869821 100644 --- a/arch/arm/mach-pxa/cm-x300.c +++ b/arch/arm/mach-pxa/cm-x300.c @@ -547,7 +547,7 @@ static struct pxa3xx_u2d_platform_data cm_x300_u2d_platform_data = { .exit = cm_x300_u2d_exit, }; -static void cm_x300_init_u2d(void) +static void __init cm_x300_init_u2d(void) { pxa3xx_set_u2d_info(&cm_x300_u2d_platform_data); } diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c index fae38fdc8d8e..5cd6b4bd31e0 100644 --- a/arch/arm/mach-pxa/littleton.c +++ b/arch/arm/mach-pxa/littleton.c @@ -183,7 +183,7 @@ static struct pxafb_mach_info littleton_lcd_info = { .lcd_conn = LCD_COLOR_TFT_16BPP, }; -static void littleton_init_lcd(void) +static void __init littleton_init_lcd(void) { pxa_set_fb_info(NULL, &littleton_lcd_info); } diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c index ecbcaee5a2d5..c293ea0a7eaf 100644 --- a/arch/arm/mach-pxa/zeus.c +++ b/arch/arm/mach-pxa/zeus.c @@ -558,7 +558,7 @@ static struct pxaohci_platform_data zeus_ohci_platform_data = { .flags = ENABLE_PORT_ALL | POWER_SENSE_LOW, }; -static void zeus_register_ohci(void) +static void __init zeus_register_ohci(void) { /* Port 2 is shared between host and client interface. */ UP2OCR = UP2OCR_HXOE | UP2OCR_HXS | UP2OCR_DMPDE | UP2OCR_DPPDE; From 6bf11a2faf40992bfc3f1b1088392f41e2759828 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 13 Dec 2018 00:08:38 -0200 Subject: [PATCH 1741/2608] ASoC: fsl: Fix SND_SOC_EUKREA_TLV320 build error on i.MX8M [ Upstream commit add6883619a9e3bf9658eaff1a547354131bbcd9 ] eukrea-tlv320.c machine driver runs on non-DT platforms and include header file in order to be able to use some machine_is_eukrea_xxx() macros. Building it for ARM64 causes the following build error: sound/soc/fsl/eukrea-tlv320.c:28:10: fatal error: asm/mach-types.h: No such file or directory Avoid this error by not allowing to build the SND_SOC_EUKREA_TLV320 driver when ARM64 is selected. This is needed in preparation for the i.MX8M support. Reported-by: kbuild test robot Signed-off-by: Fabio Estevam Acked-by: Shawn Guo Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig index 37f9b6201918..4087deeda7cf 100644 --- a/sound/soc/fsl/Kconfig +++ b/sound/soc/fsl/Kconfig @@ -221,7 +221,7 @@ config SND_SOC_PHYCORE_AC97 config SND_SOC_EUKREA_TLV320 tristate "Eukrea TLV320" - depends on ARCH_MXC && I2C + depends on ARCH_MXC && !ARM64 && I2C select SND_SOC_TLV320AIC23_I2C select SND_SOC_IMX_AUDMUX select SND_SOC_IMX_SSI From e8041b3dd34892597d0ed25fd6761d00d14a1c29 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Fri, 14 Dec 2018 16:29:03 +1100 Subject: [PATCH 1742/2608] KVM: PPC: Book3S: Only report KVM_CAP_SPAPR_TCE_VFIO on powernv machines [ Upstream commit 693ac10a88a2219bde553b2e8460dbec97e594e6 ] The kvm capability KVM_CAP_SPAPR_TCE_VFIO is used to indicate the availability of in kernel tce acceleration for vfio. However it is currently the case that this is only available on a powernv machine, not for a pseries machine. Thus make this capability dependent on having the cpu feature CPU_FTR_HVMODE. [paulus@ozlabs.org - fixed compilation for Book E.] Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras Signed-off-by: Sasha Levin --- arch/powerpc/kvm/powerpc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ecb45361095b..a35995a6b34a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -540,8 +540,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: case KVM_CAP_SPAPR_TCE_64: - /* fallthrough */ + r = 1; + break; case KVM_CAP_SPAPR_TCE_VFIO: + r = !!cpu_has_feature(CPU_FTR_HVMODE); + break; case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: case KVM_CAP_PPC_ENABLE_HCALL: From b84daf000ce9cf99fe97ef80e959eaf628088e71 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Sun, 11 Nov 2018 21:23:54 +0100 Subject: [PATCH 1743/2608] mmc: bcm2835: Recover from MMC_SEND_EXT_CSD [ Upstream commit 07d405769afea5718529fc9e341f0b13b3189b6f ] If the user issues an "mmc extcsd read", the SD controller receives what it thinks is a SEND_IF_COND command with an unexpected data block. The resulting operations leave the FSM stuck in READWAIT, a state which persists until the MMC framework resets the controller, by which point the root filesystem is likely to have been unmounted. A less heavyweight solution is to detect the condition and nudge the FSM by asserting the (self-clearing) FORCE_DATA_MODE bit. Link: https://github.com/raspberrypi/linux/issues/2728 Signed-off-by: Phil Elwell Signed-off-by: Stefan Wahren Acked-by: Eric Anholt Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/bcm2835.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c index 0d3b7473bc21..abf1f3c8b0c3 100644 --- a/drivers/mmc/host/bcm2835.c +++ b/drivers/mmc/host/bcm2835.c @@ -772,6 +772,8 @@ static void bcm2835_finish_command(struct bcm2835_host *host) if (!(sdhsts & SDHSTS_CRC7_ERROR) || (host->cmd->opcode != MMC_SEND_OP_COND)) { + u32 edm, fsm; + if (sdhsts & SDHSTS_CMD_TIME_OUT) { host->cmd->error = -ETIMEDOUT; } else { @@ -780,6 +782,13 @@ static void bcm2835_finish_command(struct bcm2835_host *host) bcm2835_dumpregs(host); host->cmd->error = -EILSEQ; } + edm = readl(host->ioaddr + SDEDM); + fsm = edm & SDEDM_FSM_MASK; + if (fsm == SDEDM_FSM_READWAIT || + fsm == SDEDM_FSM_WRITESTART1) + /* Kick the FSM out of its wait */ + writel(edm | SDEDM_FORCE_DATA_MODE, + host->ioaddr + SDEDM); bcm2835_finish_request(host); return; } From f569566a1f61bfc1452091863a2fed40e1aa344a Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Sun, 11 Nov 2018 21:23:53 +0100 Subject: [PATCH 1744/2608] mmc: bcm2835: reset host on timeout [ Upstream commit f6000a4eb34e6462bc0dd39809c1bb99f9633269 ] The bcm2835 mmc host tends to lock up for unknown reason so reset it on timeout. The upper mmc block layer tries retransimitting with single blocks which tends to work out after a long wait. This is better than giving up and leaving the machine broken for no obvious reason. Fixes: 660fc733bd74 ("mmc: bcm2835: Add new driver for the sdhost controller.") Signed-off-by: Michal Suchanek Signed-off-by: Stefan Wahren Acked-by: Eric Anholt Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/bcm2835.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c index abf1f3c8b0c3..5301302fb531 100644 --- a/drivers/mmc/host/bcm2835.c +++ b/drivers/mmc/host/bcm2835.c @@ -286,6 +286,7 @@ static void bcm2835_reset(struct mmc_host *mmc) if (host->dma_chan) dmaengine_terminate_sync(host->dma_chan); + host->dma_chan = NULL; bcm2835_reset_internal(host); } @@ -846,6 +847,8 @@ static void bcm2835_timeout(struct work_struct *work) dev_err(dev, "timeout waiting for hardware interrupt.\n"); bcm2835_dumpregs(host); + bcm2835_reset(host->mmc); + if (host->data) { host->data->error = -ETIMEDOUT; bcm2835_finish_data(host); From 7c114e3dc2f9dfc95282000fab2a97f797d9df15 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 5 Nov 2018 16:45:04 +0800 Subject: [PATCH 1745/2608] memstick: Prevent memstick host from getting runtime suspended during card detection [ Upstream commit e03e303edf1c63e6dd455ccd568c74e93ef3ba8c ] We can use MEMSTICK_POWER_{ON,OFF} along with pm_runtime_{get,put} helpers to let memstick host support runtime pm. The rpm count may go down to zero before the memstick host powers on, so the host can be runtime suspended. So before doing card detection, increment the rpm count to avoid the host gets runtime suspended. Balance the rpm count after card detection is done. Signed-off-by: Kai-Heng Feng Tested-by: Oleksandr Natalenko Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/memstick/core/memstick.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index 76382c858c35..1246d69ba187 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -18,6 +18,7 @@ #include #include #include +#include #define DRIVER_NAME "memstick" @@ -436,6 +437,7 @@ static void memstick_check(struct work_struct *work) struct memstick_dev *card; dev_dbg(&host->dev, "memstick_check started\n"); + pm_runtime_get_noresume(host->dev.parent); mutex_lock(&host->lock); if (!host->card) { if (memstick_power_on(host)) @@ -479,6 +481,7 @@ out_power_off: host->set_param(host, MEMSTICK_POWER, MEMSTICK_POWER_OFF); mutex_unlock(&host->lock); + pm_runtime_put(host->dev.parent); dev_dbg(&host->dev, "memstick_check finished\n"); } From 0749593286d6e2ae8e070beab686ce7963004ccb Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 10 Dec 2018 10:56:24 +0200 Subject: [PATCH 1746/2608] mmc: sdhci-of-esdhc: Fix timeout checks [ Upstream commit ea6d027312111c6d96309ad1a684b33cb37e6764 ] Always check the wait condition before returning timeout. Signed-off-by: Adrian Hunter Reviewed-by: Yangbo Lu Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci-of-esdhc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index 8332f56e6c0d..7b7d077e40fd 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -481,8 +481,12 @@ static void esdhc_clock_enable(struct sdhci_host *host, bool enable) /* Wait max 20 ms */ timeout = ktime_add_ms(ktime_get(), 20); val = ESDHC_CLOCK_STABLE; - while (!(sdhci_readl(host, ESDHC_PRSSTAT) & val)) { - if (ktime_after(ktime_get(), timeout)) { + while (1) { + bool timedout = ktime_after(ktime_get(), timeout); + + if (sdhci_readl(host, ESDHC_PRSSTAT) & val) + break; + if (timedout) { pr_err("%s: Internal clock never stabilised.\n", mmc_hostname(host->mmc)); break; @@ -558,8 +562,12 @@ static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock) /* Wait max 20 ms */ timeout = ktime_add_ms(ktime_get(), 20); - while (!(sdhci_readl(host, ESDHC_PRSSTAT) & ESDHC_CLOCK_STABLE)) { - if (ktime_after(ktime_get(), timeout)) { + while (1) { + bool timedout = ktime_after(ktime_get(), timeout); + + if (sdhci_readl(host, ESDHC_PRSSTAT) & ESDHC_CLOCK_STABLE) + break; + if (timedout) { pr_err("%s: Internal clock never stabilised.\n", mmc_hostname(host->mmc)); return; From bfbf397caedb99b2bf499bad7b8a524624d4967c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 10 Dec 2018 10:56:26 +0200 Subject: [PATCH 1747/2608] mmc: sdhci-xenon: Fix timeout checks [ Upstream commit 0e6e7c2ff397e1bbebc882ca3132148aaaef1ddd ] Always check the wait condition before returning timeout. Signed-off-by: Adrian Hunter Reviewed-by: Zhoujie Wu Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci-xenon-phy.c | 10 +++++++--- drivers/mmc/host/sdhci-xenon.c | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c index ec8794335241..82051f2b7191 100644 --- a/drivers/mmc/host/sdhci-xenon-phy.c +++ b/drivers/mmc/host/sdhci-xenon-phy.c @@ -357,9 +357,13 @@ static int xenon_emmc_phy_enable_dll(struct sdhci_host *host) /* Wait max 32 ms */ timeout = ktime_add_ms(ktime_get(), 32); - while (!(sdhci_readw(host, XENON_SLOT_EXT_PRESENT_STATE) & - XENON_DLL_LOCK_STATE)) { - if (ktime_after(ktime_get(), timeout)) { + while (1) { + bool timedout = ktime_after(ktime_get(), timeout); + + if (sdhci_readw(host, XENON_SLOT_EXT_PRESENT_STATE) & + XENON_DLL_LOCK_STATE) + break; + if (timedout) { dev_err(mmc_dev(host->mmc), "Wait for DLL Lock time-out\n"); return -ETIMEDOUT; } diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c index 4d0791f6ec23..a0b5089b3274 100644 --- a/drivers/mmc/host/sdhci-xenon.c +++ b/drivers/mmc/host/sdhci-xenon.c @@ -34,9 +34,13 @@ static int xenon_enable_internal_clk(struct sdhci_host *host) sdhci_writel(host, reg, SDHCI_CLOCK_CONTROL); /* Wait max 20 ms */ timeout = ktime_add_ms(ktime_get(), 20); - while (!((reg = sdhci_readw(host, SDHCI_CLOCK_CONTROL)) - & SDHCI_CLOCK_INT_STABLE)) { - if (ktime_after(ktime_get(), timeout)) { + while (1) { + bool timedout = ktime_after(ktime_get(), timeout); + + reg = sdhci_readw(host, SDHCI_CLOCK_CONTROL); + if (reg & SDHCI_CLOCK_INT_STABLE) + break; + if (timedout) { dev_err(mmc_dev(host->mmc), "Internal clock never stabilised.\n"); return -ETIMEDOUT; } From 1d150b52ff169252785a8f639e0676f574818547 Mon Sep 17 00:00:00 2001 From: Beomho Seo Date: Fri, 14 Dec 2018 12:34:08 +0100 Subject: [PATCH 1748/2608] tty: serial: samsung: Properly set flags in autoCTS mode [ Upstream commit 31e933645742ee6719d37573a27cce0761dcf92b ] Commit 391f93f2ec9f ("serial: core: Rework hw-assited flow control support") has changed the way the autoCTS mode is handled. According to that change, serial drivers which enable H/W autoCTS mode must set UPSTAT_AUTOCTS to prevent the serial core from inadvertently disabling TX. This patch adds proper handling of UPSTAT_AUTOCTS flag. Signed-off-by: Beomho Seo [mszyprow: rephrased commit message] Signed-off-by: Marek Szyprowski Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/samsung.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index 57baa84ccf86..f4b8e4e17a86 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -1343,11 +1343,14 @@ static void s3c24xx_serial_set_termios(struct uart_port *port, wr_regl(port, S3C2410_ULCON, ulcon); wr_regl(port, S3C2410_UBRDIV, quot); + port->status &= ~UPSTAT_AUTOCTS; + umcon = rd_regl(port, S3C2410_UMCON); if (termios->c_cflag & CRTSCTS) { umcon |= S3C2410_UMCOM_AFC; /* Disable RTS when RX FIFO contains 63 bytes */ umcon &= ~S3C2412_UMCON_AFC_8; + port->status = UPSTAT_AUTOCTS; } else { umcon &= ~S3C2410_UMCOM_AFC; } From 4e194026e74ab9f2e09c9d60e7cfe4f2d3a3e1cf Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 22 Nov 2018 16:04:56 +0200 Subject: [PATCH 1749/2608] perf test: Fix perf_event_attr test failure [ Upstream commit 741dad88dde296999da30332157ca47f0543747d ] Fix inconsistent use of tabs and spaces error: # perf test 16 -v 16: Setup struct perf_event_attr : --- start --- test child forked, pid 20224 File "/usr/libexec/perf-core/tests/attr.py", line 119 log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) ^ TabError: inconsistent use of tabs and spaces in indentation test child finished with -1 ---- end ---- Setup struct perf_event_attr: FAILED! Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181122140456.16817-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/attr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index ff9b60b99f52..44090a9a19f3 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -116,7 +116,7 @@ class Event(dict): if not self.has_key(t) or not other.has_key(t): continue if not data_equal(self[t], other[t]): - log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) + log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) # Test file description needs to have following sections: # [config] From 766c50140a7757cf81bcd0464928021cf0b5308f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Dec 2018 11:02:57 -0300 Subject: [PATCH 1750/2608] perf header: Fix unchecked usage of strncpy() [ Upstream commit 7572588085a13d5db02bf159542189f52fdb507e ] The strncpy() function may leave the destination string buffer unterminated, better use strlcpy() that we have a __weak fallback implementation for systems without it. This fixes this warning on an Alpine Linux Edge system with gcc 8.2: util/header.c: In function 'perf_event__synthesize_event_update_unit': util/header.c:3586:2: error: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length [-Werror=stringop-truncation] strncpy(ev->data, evsel->unit, size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ util/header.c:3579:16: note: length computed here size_t size = strlen(evsel->unit); ^~~~~~~~~~~~~~~~~~~ Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Fixes: a6e5281780d1 ("perf tools: Add event_update event unit type") Link: https://lkml.kernel.org/n/tip-fiikh5nay70bv4zskw2aa858@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 1ceb332575bd..696f2654826b 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3132,7 +3132,7 @@ perf_event__synthesize_event_update_unit(struct perf_tool *tool, if (ev == NULL) return -ENOMEM; - strncpy(ev->data, evsel->unit, size); + strlcpy(ev->data, evsel->unit, size + 1); err = process(tool, (union perf_event *)ev, NULL, NULL); free(ev); return err; From e7752f5caa8b157d69988ab838e5c4e0e6ccd61c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Dec 2018 11:50:08 -0300 Subject: [PATCH 1751/2608] perf probe: Fix unchecked usage of strncpy() [ Upstream commit bef0b8970f27da5ca223e522a174d03e2587761d ] The strncpy() function may leave the destination string buffer unterminated, better use strlcpy() that we have a __weak fallback implementation for systems without it. In this case the 'target' buffer is coming from a list of build-ids that are expected to have a len of at most (SBUILD_ID_SIZE - 1) chars, so probably we're safe, but since we're using strncpy() here, use strlcpy() instead to provide the intended safety checking without the using the problematic strncpy() function. This fixes this warning on an Alpine Linux Edge system with gcc 8.2: util/probe-file.c: In function 'probe_cache__open.isra.5': util/probe-file.c:427:3: error: 'strncpy' specified bound 41 equals destination size [-Werror=stringop-truncation] strncpy(sbuildid, target, SBUILD_ID_SIZE); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors Cc: Adrian Hunter Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Fixes: 1f3736c9c833 ("perf probe: Show all cached probes") Link: https://lkml.kernel.org/n/tip-l7n8ggc9kl38qtdlouke5yp5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/probe-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index cdf8d83a484c..6ab9230ce8ee 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -424,7 +424,7 @@ static int probe_cache__open(struct probe_cache *pcache, const char *target, if (target && build_id_cache__cached(target)) { /* This is a cached buildid */ - strncpy(sbuildid, target, SBUILD_ID_SIZE); + strlcpy(sbuildid, target, SBUILD_ID_SIZE); dir_name = build_id_cache__linkname(sbuildid, NULL, 0); goto found; } From 65575cf1c800be3e3f6d6860fc10603be9552456 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Nov 2018 15:07:10 +0000 Subject: [PATCH 1752/2608] arm64: KVM: Skip MMIO insn after emulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0d640732dbebed0f10f18526de21652931f0b2f2 ] When we emulate an MMIO instruction, we advance the CPU state within decode_hsr(), before emulating the instruction effects. Having this logic in decode_hsr() is opaque, and advancing the state before emulation is problematic. It gets in the way of applying consistent single-step logic, and it prevents us from being able to fail an MMIO instruction with a synchronous exception. Clean this up by only advancing the CPU state *after* the effects of the instruction are emulated. Cc: Peter Maydell Reviewed-by: Alex Bennée Reviewed-by: Christoffer Dall Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- virt/kvm/arm/mmio.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c index dac7ceb1a677..08443a15e6be 100644 --- a/virt/kvm/arm/mmio.c +++ b/virt/kvm/arm/mmio.c @@ -117,6 +117,12 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); } + /* + * The MMIO instruction is emulated and should not be re-executed + * in the guest. + */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 0; } @@ -144,11 +150,6 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) vcpu->arch.mmio_decode.sign_extend = sign_extend; vcpu->arch.mmio_decode.rt = rt; - /* - * The MMIO instruction is emulated and should not be re-executed - * in the guest. - */ - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 0; } From 440814927873ccc2891ec42aa7e1d3cf927b0bc7 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 18 Dec 2018 07:58:04 -0600 Subject: [PATCH 1753/2608] usb: musb: dsps: fix otg state machine [ Upstream commit 6010abf2c2c0e382d7e8ee44bd11f343aae90cce ] Due to lack of ID pin interrupt event on AM335x devices, the musb dsps driver uses polling to detect usb device attach for dual-role port. But in the case if a micro-A cable adapter is attached without a USB device attached to the cable, the musb state machine gets stuck in a_wait_vrise state waiting for the MUSB_CONNECT interrupt which won't happen due to the usb device is not attached. The state is stuck in a_wait_vrise even after the micro-A cable is detached, which could cause VBUS retention if then the dual-role port is attached to a host port. To fix the problem, make a_wait_vrise as a transient state, then move the state to either a_wait_bcon for host port or a_idle state for dual-role port, if no usb device is attached to the port. Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/musb/musb_dsps.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index dbb482b7e0ba..b7d460adaa61 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -242,8 +242,13 @@ static int dsps_check_status(struct musb *musb, void *unused) switch (musb->xceiv->otg->state) { case OTG_STATE_A_WAIT_VRISE: - dsps_mod_timer_optional(glue); - break; + if (musb->port_mode == MUSB_HOST) { + musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON; + dsps_mod_timer_optional(glue); + break; + } + /* fall through */ + case OTG_STATE_A_WAIT_BCON: /* keep VBUS on for host-only mode */ if (musb->port_mode == MUSB_PORT_MODE_HOST) { From 04fed9d3e7d2fdce1217195fc5ddc08e8209579a Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Tue, 18 Dec 2018 08:42:27 -0800 Subject: [PATCH 1754/2608] percpu: convert spin_lock_irq to spin_lock_irqsave. [ Upstream commit 6ab7d47bcbf0144a8cb81536c2cead4cde18acfe ] From Michael Cree: "Bisection lead to commit b38d08f3181c ("percpu: restructure locking") as being the cause of lockups at initial boot on the kernel built for generic Alpha. On a suggestion by Tejun Heo that: So, the only thing I can think of is that it's calling spin_unlock_irq() while irq handling isn't set up yet. Can you please try the followings? 1. Convert all spin_[un]lock_irq() to spin_lock_irqsave/unlock_irqrestore()." Fixes: b38d08f3181c ("percpu: restructure locking") Reported-and-tested-by: Michael Cree Acked-by: Tejun Heo Signed-off-by: Dennis Zhou Signed-off-by: Sasha Levin --- mm/percpu-km.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/percpu-km.c b/mm/percpu-km.c index 0d88d7bd5706..c22d959105b6 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c @@ -50,6 +50,7 @@ static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; struct pcpu_chunk *chunk; struct page *pages; + unsigned long flags; int i; chunk = pcpu_alloc_chunk(gfp); @@ -68,9 +69,9 @@ static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) chunk->data = pages; chunk->base_addr = page_address(pages) - pcpu_group_offsets[0]; - spin_lock_irq(&pcpu_lock); + spin_lock_irqsave(&pcpu_lock, flags); pcpu_chunk_populated(chunk, 0, nr_pages, false); - spin_unlock_irq(&pcpu_lock); + spin_unlock_irqrestore(&pcpu_lock, flags); pcpu_stats_chunk_alloc(); trace_percpu_create_chunk(chunk->base_addr); From e93ea07f482a772740f6ae2169d9cbee2ee6dfa1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 10 Dec 2018 06:50:09 +0000 Subject: [PATCH 1755/2608] powerpc/uaccess: fix warning/error with access_ok() [ Upstream commit 05a4ab823983d9136a460b7b5e0d49ee709a6f86 ] With the following piece of code, the following compilation warning is encountered: if (_IOC_DIR(ioc) != _IOC_NONE) { int verify = _IOC_DIR(ioc) & _IOC_READ ? VERIFY_WRITE : VERIFY_READ; if (!access_ok(verify, ioarg, _IOC_SIZE(ioc))) { drivers/platform/test/dev.c: In function 'my_ioctl': drivers/platform/test/dev.c:219:7: warning: unused variable 'verify' [-Wunused-variable] int verify = _IOC_DIR(ioc) & _IOC_READ ? VERIFY_WRITE : VERIFY_READ; This patch fixes it by referencing 'type' in the macro allthough doing nothing with it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 565cead12be2..cf26e62b268d 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -54,7 +54,7 @@ #endif #define access_ok(type, addr, size) \ - (__chk_user_ptr(addr), \ + (__chk_user_ptr(addr), (void)(type), \ __access_ok((__force unsigned long)(addr), (size), get_fs())) /* From 2c2fb2411198b997ba2613bff49ec8cc60ddb511 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 15 Dec 2018 11:03:12 +0200 Subject: [PATCH 1756/2608] mac80211: fix radiotap vendor presence bitmap handling [ Upstream commit efc38dd7d5fa5c8cdd0c917c5d00947aa0539443 ] Due to the alignment handling, it actually matters where in the code we add the 4 bytes for the presence bitmap to the length; the first field is the timestamp with 8 byte alignment so we need to add the space for the extra vendor namespace presence bitmap *before* we do any alignment for the fields. Move the presence bitmap length accounting to the right place to fix the alignment for the data properly. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/rx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9e19ddbcb06e..c7ac1a480b1d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -141,6 +141,9 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, /* allocate extra bitmaps */ if (status->chains) len += 4 * hweight8(status->chains); + /* vendor presence bitmap */ + if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) + len += 4; if (ieee80211_have_rx_timestamp(status)) { len = ALIGN(len, 8); @@ -182,8 +185,6 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) { struct ieee80211_vendor_radiotap *rtap = (void *)skb->data; - /* vendor presence bitmap */ - len += 4; /* alignment for fixed 6-byte vendor data header */ len = ALIGN(len, 2); /* vendor data header */ From d619610b097b50a830a6bf57c31bf0a953a8e3c1 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 19 Dec 2018 14:45:09 +0800 Subject: [PATCH 1757/2608] xfrm6_tunnel: Fix spi check in __xfrm6_tunnel_alloc_spi [ Upstream commit fa89a4593b927b3f59c3b69379f31d3b22272e4e ] gcc warn this: net/ipv6/xfrm6_tunnel.c:143 __xfrm6_tunnel_alloc_spi() warn: always true condition '(spi <= 4294967295) => (0-u32max <= u32max)' 'spi' is u32, which always not greater than XFRM6_TUNNEL_SPI_MAX because of wrap around. So the second forloop will never reach. Signed-off-by: YueHaibing Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/ipv6/xfrm6_tunnel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 4e438bc7ee87..c28e3eaad7c2 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -144,6 +144,9 @@ static u32 __xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr) index = __xfrm6_tunnel_spi_check(net, spi); if (index >= 0) goto alloc_spi; + + if (spi == XFRM6_TUNNEL_SPI_MAX) + break; } for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tn->spi; spi++) { index = __xfrm6_tunnel_spi_check(net, spi); From d57d6a4b8e3c7168445491603cda21f4c51814b3 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 27 Nov 2018 11:37:46 +0200 Subject: [PATCH 1758/2608] Bluetooth: Fix unnecessary error message for HCI request completion [ Upstream commit 1629db9c75342325868243d6bca5853017d91cf8 ] In case a command which completes in Command Status was sent using the hci_cmd_send-family of APIs there would be a misleading error in the hci_get_cmd_complete function, since the code would be trying to fetch the Command Complete parameters when there are none. Avoid the misleading error and silently bail out from the function in case the received event is a command status. Signed-off-by: Johan Hedberg Acked-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin --- net/bluetooth/hci_event.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 01f211e31f47..363dc85bbc5c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5212,6 +5212,12 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, return true; } + /* Check if request ended in Command Status - no way to retreive + * any extra parameters in this case. + */ + if (hdr->evt == HCI_EV_CMD_STATUS) + return false; + if (hdr->evt != HCI_EV_CMD_COMPLETE) { BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt); return false; From 3167911bd59ffa9462ffa401ce8617807f3e6fa0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 19 Dec 2018 06:08:45 +0000 Subject: [PATCH 1759/2608] mlxsw: spectrum: Properly cleanup LAG uppers when removing port from LAG [ Upstream commit be2d6f421f680e01d58f7cd452646e0d8586d49b ] When a LAG device or a VLAN device on top of it is enslaved to a bridge, the driver propagates the CHANGEUPPER event to the LAG's slaves. This causes each physical port to increase the reference count of the internal representation of the bridge port by calling mlxsw_sp_port_bridge_join(). However, when a port is removed from a LAG, the corresponding leave() function is not called and the reference count is not decremented. This leads to ugly hacks such as mlxsw_sp_bridge_port_should_destroy() that try to understand if the bridge port should be destroyed even when its reference count is not 0. Instead, make sure that when a port is unlinked from a LAG it would see the same events as if the LAG (or its uppers) were unlinked from a bridge. The above is achieved by walking the LAG's uppers when a port is unlinked and calling mlxsw_sp_port_bridge_leave() for each upper that is enslaved to a bridge. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 23 ++++++++++++++++ .../mellanox/mlxsw/spectrum_switchdev.c | 27 +------------------ 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index cf65b2ee8b95..7892e6b8d2e8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3907,6 +3907,25 @@ void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port) dev_put(mlxsw_sp_port->dev); } +static void +mlxsw_sp_port_lag_uppers_cleanup(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + struct net_device *br_dev = netdev_master_upper_dev_get(lag_dev); + struct net_device *upper_dev; + struct list_head *iter; + + if (netif_is_bridge_port(lag_dev)) + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, lag_dev, br_dev); + + netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) { + if (!netif_is_bridge_port(upper_dev)) + continue; + br_dev = netdev_master_upper_dev_get(upper_dev); + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, upper_dev, br_dev); + } +} + static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id) { char sldr_pl[MLXSW_REG_SLDR_LEN]; @@ -4094,6 +4113,10 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, /* Any VLANs configured on the port are no longer valid */ mlxsw_sp_port_vlan_flush(mlxsw_sp_port); + /* Make the LAG and its directly linked uppers leave bridges they + * are memeber in + */ + mlxsw_sp_port_lag_uppers_cleanup(mlxsw_sp_port, lag_dev); if (lag->ref_count == 1) mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 9052e93e1925..f33fb95c4189 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -291,30 +291,6 @@ mlxsw_sp_bridge_port_destroy(struct mlxsw_sp_bridge_port *bridge_port) kfree(bridge_port); } -static bool -mlxsw_sp_bridge_port_should_destroy(const struct mlxsw_sp_bridge_port * - bridge_port) -{ - struct net_device *dev = bridge_port->dev; - struct mlxsw_sp *mlxsw_sp; - - if (is_vlan_dev(dev)) - mlxsw_sp = mlxsw_sp_lower_get(vlan_dev_real_dev(dev)); - else - mlxsw_sp = mlxsw_sp_lower_get(dev); - - /* In case ports were pulled from out of a bridged LAG, then - * it's possible the reference count isn't zero, yet the bridge - * port should be destroyed, as it's no longer an upper of ours. - */ - if (!mlxsw_sp && list_empty(&bridge_port->vlans_list)) - return true; - else if (bridge_port->ref_count == 0) - return true; - else - return false; -} - static struct mlxsw_sp_bridge_port * mlxsw_sp_bridge_port_get(struct mlxsw_sp_bridge *bridge, struct net_device *brport_dev) @@ -352,8 +328,7 @@ static void mlxsw_sp_bridge_port_put(struct mlxsw_sp_bridge *bridge, { struct mlxsw_sp_bridge_device *bridge_device; - bridge_port->ref_count--; - if (!mlxsw_sp_bridge_port_should_destroy(bridge_port)) + if (--bridge_port->ref_count != 0) return; bridge_device = bridge_port->bridge_device; mlxsw_sp_bridge_port_destroy(bridge_port); From 2575a9a6a11d622e1d8d12749abdef0a24e98d91 Mon Sep 17 00:00:00 2001 From: Mahesh Rajashekhara Date: Fri, 7 Dec 2018 16:28:29 -0600 Subject: [PATCH 1760/2608] scsi: smartpqi: correct host serial num for ssa [ Upstream commit b2346b5030cf9458f30a84028d9fe904b8c942a7 ] Reviewed-by: Scott Benesh Reviewed-by: Ajish Koshy Reviewed-by: Murthy Bhat Reviewed-by: Mahesh Rajashekhara Reviewed-by: Dave Carroll Reviewed-by: Scott Teel Reviewed-by: Kevin Barnett Signed-off-by: Mahesh Rajashekhara Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/smartpqi/smartpqi_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index bc15999f1c7c..cc27ae2e8a2d 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -653,6 +653,7 @@ struct bmic_host_wellness_driver_version { u8 driver_version_tag[2]; __le16 driver_version_length; char driver_version[32]; + u8 dont_write_tag[2]; u8 end_tag[2]; }; @@ -682,6 +683,8 @@ static int pqi_write_driver_version_to_host_wellness( strncpy(buffer->driver_version, "Linux " DRIVER_VERSION, sizeof(buffer->driver_version) - 1); buffer->driver_version[sizeof(buffer->driver_version) - 1] = '\0'; + buffer->dont_write_tag[0] = 'D'; + buffer->dont_write_tag[1] = 'W'; buffer->end_tag[0] = 'Z'; buffer->end_tag[1] = 'Z'; From 71641aadb02c7ed699c82499fb6b8a7c372fe852 Mon Sep 17 00:00:00 2001 From: Dave Carroll Date: Fri, 7 Dec 2018 16:29:45 -0600 Subject: [PATCH 1761/2608] scsi: smartpqi: correct volume status [ Upstream commit 7ff44499bafbd376115f0bb6b578d980f56ee13b ] - fix race condition when a unit is deleted after an RLL, and before we have gotten the LV_STATUS page of the unit. - In this case we will get a standard inquiry, rather than the desired page. This will result in a unit presented which no longer exists. - If we ask for LV_STATUS, insure we get LV_STATUS Reviewed-by: Murthy Bhat Reviewed-by: Mahesh Rajashekhara Reviewed-by: Scott Teel Reviewed-by: Kevin Barnett Signed-off-by: Dave Carroll Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/smartpqi/smartpqi_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index cc27ae2e8a2d..5ec2898d21cd 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -1184,6 +1184,9 @@ static void pqi_get_volume_status(struct pqi_ctrl_info *ctrl_info, if (rc) goto out; + if (vpd->page_code != CISS_VPD_LV_STATUS) + goto out; + page_length = offsetof(struct ciss_vpd_logical_volume_status, volume_status) + vpd->page_length; if (page_length < sizeof(*vpd)) From dce955b9db59e0534a814ad1faaf8680ce04ef9c Mon Sep 17 00:00:00 2001 From: Mahesh Rajashekhara Date: Tue, 18 Dec 2018 17:39:01 -0600 Subject: [PATCH 1762/2608] scsi: smartpqi: increase fw status register read timeout [ Upstream commit 65111785acccb836ec75263b03b0e33f21e74f47 ] Problem: - during the driver initialization, driver will poll fw for KERNEL_UP in a 30 seconds timeout. - if the firmware is not ready after 30 seconds, driver will not be loaded. Fix: - change timeout from 30 seconds to 3 minutes. Reported-by: Feng Li Reviewed-by: Ajish Koshy Reviewed-by: Murthy Bhat Reviewed-by: Dave Carroll Reviewed-by: Kevin Barnett Signed-off-by: Mahesh Rajashekhara Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/smartpqi/smartpqi_sis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/smartpqi/smartpqi_sis.c b/drivers/scsi/smartpqi/smartpqi_sis.c index 5141bd4c9f06..ca7dfb3a520f 100644 --- a/drivers/scsi/smartpqi/smartpqi_sis.c +++ b/drivers/scsi/smartpqi/smartpqi_sis.c @@ -59,7 +59,7 @@ #define SIS_CTRL_KERNEL_UP 0x80 #define SIS_CTRL_KERNEL_PANIC 0x100 -#define SIS_CTRL_READY_TIMEOUT_SECS 30 +#define SIS_CTRL_READY_TIMEOUT_SECS 180 #define SIS_CTRL_READY_RESUME_TIMEOUT_SECS 90 #define SIS_CTRL_READY_POLL_INTERVAL_MSECS 10 From 1938b4a9d294d20cba521350b8b56d3f4ab28210 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Fri, 14 Dec 2018 11:55:21 +0800 Subject: [PATCH 1763/2608] cw1200: Fix concurrency use-after-free bugs in cw1200_hw_scan() [ Upstream commit 4f68ef64cd7feb1220232bd8f501d8aad340a099 ] The function cw1200_bss_info_changed() and cw1200_hw_scan() can be concurrently executed. The two functions both access a possible shared variable "frame.skb". This shared variable is freed by dev_kfree_skb() in cw1200_upload_beacon(), which is called by cw1200_bss_info_changed(). The free operation is protected by a mutex lock "priv->conf_mutex" in cw1200_bss_info_changed(). In cw1200_hw_scan(), this shared variable is accessed without the protection of the mutex lock "priv->conf_mutex". Thus, concurrency use-after-free bugs may occur. To fix these bugs, the original calls to mutex_lock(&priv->conf_mutex) and mutex_unlock(&priv->conf_mutex) are moved to the places, which can protect the accesses to the shared variable. Signed-off-by: Jia-Ju Bai Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/st/cw1200/scan.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/st/cw1200/scan.c b/drivers/net/wireless/st/cw1200/scan.c index cc2ce60f4f09..f22c8ae15ad8 100644 --- a/drivers/net/wireless/st/cw1200/scan.c +++ b/drivers/net/wireless/st/cw1200/scan.c @@ -78,6 +78,10 @@ int cw1200_hw_scan(struct ieee80211_hw *hw, if (req->n_ssids > WSM_SCAN_MAX_NUM_OF_SSIDS) return -EINVAL; + /* will be unlocked in cw1200_scan_work() */ + down(&priv->scan.lock); + mutex_lock(&priv->conf_mutex); + frame.skb = ieee80211_probereq_get(hw, priv->vif->addr, NULL, 0, req->ie_len); if (!frame.skb) @@ -86,19 +90,15 @@ int cw1200_hw_scan(struct ieee80211_hw *hw, if (req->ie_len) skb_put_data(frame.skb, req->ie, req->ie_len); - /* will be unlocked in cw1200_scan_work() */ - down(&priv->scan.lock); - mutex_lock(&priv->conf_mutex); - ret = wsm_set_template_frame(priv, &frame); if (!ret) { /* Host want to be the probe responder. */ ret = wsm_set_probe_responder(priv, true); } if (ret) { + dev_kfree_skb(frame.skb); mutex_unlock(&priv->conf_mutex); up(&priv->scan.lock); - dev_kfree_skb(frame.skb); return ret; } @@ -120,10 +120,9 @@ int cw1200_hw_scan(struct ieee80211_hw *hw, ++priv->scan.n_ssids; } - mutex_unlock(&priv->conf_mutex); - if (frame.skb) dev_kfree_skb(frame.skb); + mutex_unlock(&priv->conf_mutex); queue_work(priv->workqueue, &priv->scan.work); return 0; } From e1fe3f1e1c03bbbe5d558bd2105846526b5f54f8 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 9 Dec 2018 14:48:15 +0530 Subject: [PATCH 1764/2608] powerpc/perf: Fix thresholding counter data for unknown type [ Upstream commit 17cfccc91545682513541924245abb876d296063 ] MMCRA[34:36] and MMCRA[38:44] expose the thresholding counter value. Thresholding counter can be used to count latency cycles such as load miss to reload. But threshold counter value is not relevant when the sampled instruction type is unknown or reserved. Patch to fix the thresholding counter value to zero when sampled instruction type is unknown or reserved. Fixes: 170a315f41c6('powerpc/perf: Support to export MMCRA[TEC*] field to userspace') Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/perf/isa207-common.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 2efee3f196f5..cf9c35aa0cf4 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -228,8 +228,13 @@ void isa207_get_mem_weight(u64 *weight) u64 mmcra = mfspr(SPRN_MMCRA); u64 exp = MMCRA_THR_CTR_EXP(mmcra); u64 mantissa = MMCRA_THR_CTR_MANT(mmcra); + u64 sier = mfspr(SPRN_SIER); + u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT; - *weight = mantissa << (2 * exp); + if (val == 0 || val == 7) + *weight = 0; + else + *weight = mantissa << (2 * exp); } int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) From 440a5c61c49e4943b42a543c507ab75824ffaee5 Mon Sep 17 00:00:00 2001 From: Roland Kammerer Date: Thu, 20 Dec 2018 17:23:28 +0100 Subject: [PATCH 1765/2608] drbd: narrow rcu_read_lock in drbd_sync_handshake [ Upstream commit d29e89e34952a9ad02c77109c71a80043544296e ] So far there was the possibility that we called genlmsg_new(GFP_NOIO)/mutex_lock() while holding an rcu_read_lock(). This included cases like: drbd_sync_handshake (acquire the RCU lock) drbd_asb_recover_1p drbd_khelper drbd_bcast_event genlmsg_new(GFP_NOIO) --> may sleep drbd_sync_handshake (acquire the RCU lock) drbd_asb_recover_1p drbd_khelper notify_helper genlmsg_new(GFP_NOIO) --> may sleep drbd_sync_handshake (acquire the RCU lock) drbd_asb_recover_1p drbd_khelper notify_helper mutex_lock --> may sleep While using GFP_ATOMIC whould have been possible in the first two cases, the real fix is to narrow the rcu_read_lock. Reported-by: Jia-Ju Bai Reviewed-by: Lars Ellenberg Signed-off-by: Roland Kammerer Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_receiver.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 796eaf347dc0..143c5a666e25 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3361,7 +3361,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, enum drbd_conns rv = C_MASK; enum drbd_disk_state mydisk; struct net_conf *nc; - int hg, rule_nr, rr_conflict, tentative; + int hg, rule_nr, rr_conflict, tentative, always_asbp; mydisk = device->state.disk; if (mydisk == D_NEGOTIATING) @@ -3412,8 +3412,12 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, rcu_read_lock(); nc = rcu_dereference(peer_device->connection->net_conf); + always_asbp = nc->always_asbp; + rr_conflict = nc->rr_conflict; + tentative = nc->tentative; + rcu_read_unlock(); - if (hg == 100 || (hg == -100 && nc->always_asbp)) { + if (hg == 100 || (hg == -100 && always_asbp)) { int pcount = (device->state.role == R_PRIMARY) + (peer_role == R_PRIMARY); int forced = (hg == -100); @@ -3452,9 +3456,6 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, "Sync from %s node\n", (hg < 0) ? "peer" : "this"); } - rr_conflict = nc->rr_conflict; - tentative = nc->tentative; - rcu_read_unlock(); if (hg == -100) { /* FIXME this log message is not correct if we end up here From 42a04f73dbf0c94ddeaef246990cc97e9edd552b Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 20 Dec 2018 17:23:32 +0100 Subject: [PATCH 1766/2608] drbd: disconnect, if the wrong UUIDs are attached on a connected peer [ Upstream commit b17b59602b6dcf8f97a7dc7bc489a48388d7063a ] With "on-no-data-accessible suspend-io", DRBD requires the next attach or connect to be to the very same data generation uuid tag it lost last. If we first lost connection to the peer, then later lost connection to our own disk, we would usually refuse to re-connect to the peer, because it presents the wrong data set. However, if the peer first connects without a disk, and then attached its disk, we accepted that same wrong data set, which would be "unexpected" by any user of that DRBD and cause "undefined results" (read: very likely data corruption). The fix is to forcefully disconnect as soon as we notice that the peer attached to the "wrong" dataset. Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_receiver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 143c5a666e25..1aad373da50e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4139,7 +4139,7 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info kfree(device->p_uuid); device->p_uuid = p_uuid; - if (device->state.conn < C_CONNECTED && + if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) && device->state.disk < D_INCONSISTENT && device->state.role == R_PRIMARY && (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) { From 9b2f23985fafc8e064cb293f07637a35d5357563 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 20 Dec 2018 17:23:41 +0100 Subject: [PATCH 1767/2608] drbd: skip spurious timeout (ping-timeo) when failing promote [ Upstream commit 9848b6ddd8c92305252f94592c5e278574e7a6ac ] If you try to promote a Secondary while connected to a Primary and allow-two-primaries is NOT set, we will wait for "ping-timeout" to give this node a chance to detect a dead primary, in case the cluster manager noticed faster than we did. But if we then are *still* connected to a Primary, we fail (after an additional timeout of ping-timout). This change skips the spurious second timeout. Most people won't notice really, since "ping-timeout" by default is half a second. But in some installations, ping-timeout may be 10 or 20 seconds or more, and spuriously delaying the error return becomes annoying. Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_nl.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a12f77e6891e..ad13ec66c8e4 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -668,14 +668,15 @@ drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int for if (rv == SS_TWO_PRIMARIES) { /* Maybe the peer is detected as dead very soon... retry at most once more in this case. */ - int timeo; - rcu_read_lock(); - nc = rcu_dereference(connection->net_conf); - timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; - rcu_read_unlock(); - schedule_timeout_interruptible(timeo); - if (try < max_tries) + if (try < max_tries) { + int timeo; try = max_tries - 1; + rcu_read_lock(); + nc = rcu_dereference(connection->net_conf); + timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; + rcu_read_unlock(); + schedule_timeout_interruptible(timeo); + } continue; } if (rv < SS_SUCCESS) { From 4634125557a2354a675f0bd0bb89a8a994b5b39e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 20 Dec 2018 17:23:43 +0100 Subject: [PATCH 1768/2608] drbd: Avoid Clang warning about pointless switch statment [ Upstream commit a52c5a16cf19d8a85831bb1b915a221dd4ffae3c ] There are several warnings from Clang about no case statement matching the constant 0: In file included from drivers/block/drbd/drbd_receiver.c:48: In file included from drivers/block/drbd/drbd_int.h:48: In file included from ./include/linux/drbd_genl_api.h:54: In file included from ./include/linux/genl_magic_struct.h:236: ./include/linux/drbd_genl.h:321:1: warning: no case matching constant switch condition '0' GENL_struct(DRBD_NLA_HELPER, 24, drbd_helper_info, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/genl_magic_struct.h:220:10: note: expanded from macro 'GENL_struct' switch (0) { ^ Silence this warning by adding a 'case 0:' statement. Additionally, adjust the alignment of the statements in the ct_assert_unique macro to avoid a checkpatch warning. This solution was originally sent by Arnd Bergmann with a default case statement: https://lore.kernel.org/patchwork/patch/756723/ Link: https://github.com/ClangBuiltLinux/linux/issues/43 Suggested-by: Lars Ellenberg Signed-off-by: Nathan Chancellor Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- include/linux/genl_magic_struct.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 5972e4969197..eeae59d3ceb7 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -191,6 +191,7 @@ static inline void ct_assert_unique_operations(void) { switch (0) { #include GENL_MAGIC_INCLUDE_FILE + case 0: ; } } @@ -209,6 +210,7 @@ static inline void ct_assert_unique_top_level_attributes(void) { switch (0) { #include GENL_MAGIC_INCLUDE_FILE + case 0: ; } } @@ -218,7 +220,8 @@ static inline void ct_assert_unique_top_level_attributes(void) static inline void ct_assert_unique_ ## s_name ## _attributes(void) \ { \ switch (0) { \ - s_fields \ + s_fields \ + case 0: \ ; \ } \ } From 63bb4d78244d703564ff2e43331c387f093d2a02 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Thu, 20 Dec 2018 19:13:07 +0100 Subject: [PATCH 1769/2608] video: clps711x-fb: release disp device node in probe() [ Upstream commit fdac751355cd76e049f628afe6acb8ff4b1399f7 ] clps711x_fb_probe() increments refcnt of disp device node by of_parse_phandle() and leaves it undecremented on both successful and error paths. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Cc: Alexander Shiyan Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin --- drivers/video/fbdev/clps711x-fb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/clps711x-fb.c b/drivers/video/fbdev/clps711x-fb.c index ff561073ee4e..42f909618f04 100644 --- a/drivers/video/fbdev/clps711x-fb.c +++ b/drivers/video/fbdev/clps711x-fb.c @@ -287,14 +287,17 @@ static int clps711x_fb_probe(struct platform_device *pdev) } ret = of_get_fb_videomode(disp, &cfb->mode, OF_USE_NATIVE_MODE); - if (ret) + if (ret) { + of_node_put(disp); goto out_fb_release; + } of_property_read_u32(disp, "ac-prescale", &cfb->ac_prescale); cfb->cmap_invert = of_property_read_bool(disp, "cmap-invert"); ret = of_property_read_u32(disp, "bits-per-pixel", &info->var.bits_per_pixel); + of_node_put(disp); if (ret) goto out_fb_release; From 36ef7512a76eb89ac85e2aedf1676e9e5f6b11be Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Wed, 19 Dec 2018 14:19:25 +0800 Subject: [PATCH 1770/2608] md: fix raid10 hang issue caused by barrier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e820d55cb99dd93ac2dc949cf486bb187e5cd70d ] When both regular IO and resync IO happen at the same time, and if we also need to split regular. Then we can see tasks hang due to barrier. 1. resync thread [ 1463.757205] INFO: task md1_resync:5215 blocked for more than 480 seconds. [ 1463.757207] Not tainted 4.19.5-1-default #1 [ 1463.757209] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 1463.757212] md1_resync D 0 5215 2 0x80000000 [ 1463.757216] Call Trace: [ 1463.757223] ? __schedule+0x29a/0x880 [ 1463.757231] ? raise_barrier+0x8d/0x140 [raid10] [ 1463.757236] schedule+0x78/0x110 [ 1463.757243] raise_barrier+0x8d/0x140 [raid10] [ 1463.757248] ? wait_woken+0x80/0x80 [ 1463.757257] raid10_sync_request+0x1f6/0x1e30 [raid10] [ 1463.757265] ? _raw_spin_unlock_irq+0x22/0x40 [ 1463.757284] ? is_mddev_idle+0x125/0x137 [md_mod] [ 1463.757302] md_do_sync.cold.78+0x404/0x969 [md_mod] [ 1463.757311] ? wait_woken+0x80/0x80 [ 1463.757336] ? md_rdev_init+0xb0/0xb0 [md_mod] [ 1463.757351] md_thread+0xe9/0x140 [md_mod] [ 1463.757358] ? _raw_spin_unlock_irqrestore+0x2e/0x60 [ 1463.757364] ? __kthread_parkme+0x4c/0x70 [ 1463.757369] kthread+0x112/0x130 [ 1463.757374] ? kthread_create_worker_on_cpu+0x40/0x40 [ 1463.757380] ret_from_fork+0x3a/0x50 2. regular IO [ 1463.760679] INFO: task kworker/0:8:5367 blocked for more than 480 seconds. [ 1463.760683] Not tainted 4.19.5-1-default #1 [ 1463.760684] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 1463.760687] kworker/0:8 D 0 5367 2 0x80000000 [ 1463.760718] Workqueue: md submit_flushes [md_mod] [ 1463.760721] Call Trace: [ 1463.760731] ? __schedule+0x29a/0x880 [ 1463.760741] ? wait_barrier+0xdd/0x170 [raid10] [ 1463.760746] schedule+0x78/0x110 [ 1463.760753] wait_barrier+0xdd/0x170 [raid10] [ 1463.760761] ? wait_woken+0x80/0x80 [ 1463.760768] raid10_write_request+0xf2/0x900 [raid10] [ 1463.760774] ? wait_woken+0x80/0x80 [ 1463.760778] ? mempool_alloc+0x55/0x160 [ 1463.760795] ? md_write_start+0xa9/0x270 [md_mod] [ 1463.760801] ? try_to_wake_up+0x44/0x470 [ 1463.760810] raid10_make_request+0xc1/0x120 [raid10] [ 1463.760816] ? wait_woken+0x80/0x80 [ 1463.760831] md_handle_request+0x121/0x190 [md_mod] [ 1463.760851] md_make_request+0x78/0x190 [md_mod] [ 1463.760860] generic_make_request+0x1c6/0x470 [ 1463.760870] raid10_write_request+0x77a/0x900 [raid10] [ 1463.760875] ? wait_woken+0x80/0x80 [ 1463.760879] ? mempool_alloc+0x55/0x160 [ 1463.760895] ? md_write_start+0xa9/0x270 [md_mod] [ 1463.760904] raid10_make_request+0xc1/0x120 [raid10] [ 1463.760910] ? wait_woken+0x80/0x80 [ 1463.760926] md_handle_request+0x121/0x190 [md_mod] [ 1463.760931] ? _raw_spin_unlock_irq+0x22/0x40 [ 1463.760936] ? finish_task_switch+0x74/0x260 [ 1463.760954] submit_flushes+0x21/0x40 [md_mod] So resync io is waiting for regular write io to complete to decrease nr_pending (conf->barrier++ is called before waiting). The regular write io splits another bio after call wait_barrier which call nr_pending++, then the splitted bio would continue with raid10_write_request -> wait_barrier, so the splitted bio has to wait for barrier to be zero, then deadlock happens as follows. resync io regular io raise_barrier wait_barrier generic_make_request wait_barrier To resolve the issue, we need to call allow_barrier to decrease nr_pending before generic_make_request since regular IO is not issued to underlying devices, and wait_barrier is called again to ensure no internal IO happening. Fixes: fc9977dd069e ("md/raid10: simplify the splitting of requests.") Reported-and-tested-by: Siniša Bandin Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin --- drivers/md/raid10.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 52ddfa0fca94..2ce079a0b0bd 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1190,7 +1190,9 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, struct bio *split = bio_split(bio, max_sectors, gfp, conf->bio_split); bio_chain(split, bio); + allow_barrier(conf); generic_make_request(bio); + wait_barrier(conf); bio = split; r10_bio->master_bio = bio; r10_bio->sectors = max_sectors; @@ -1479,7 +1481,9 @@ retry_write: struct bio *split = bio_split(bio, r10_bio->sectors, GFP_NOIO, conf->bio_split); bio_chain(split, bio); + allow_barrier(conf); generic_make_request(bio); + wait_barrier(conf); bio = split; r10_bio->master_bio = bio; } From 74b86d3e726fd4dc0dc7528d81061f6deb7733ac Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Thu, 20 Dec 2018 19:13:07 +0100 Subject: [PATCH 1771/2608] fbdev: fbmem: behave better with small rotated displays and many CPUs [ Upstream commit f75df8d4b4fabfad7e3cba2debfad12741c6fde7 ] Blitting an image with "negative" offsets is not working since there is no clipping. It hopefully just crashes. For the bootup logo, there is protection so that blitting does not happen as the image is drawn further and further to the right (ROTATE_UR) or further and further down (ROTATE_CW). There is however no protection when drawing in the opposite directions (ROTATE_UD and ROTATE_CCW). Add back this protection. The regression is 20-odd years old but the mindless warning-killing mentality displayed in commit 34bdb666f4b2 ("fbdev: fbmem: remove positive test on unsigned values") is also to blame, methinks. Fixes: 448d479747b8 ("fbdev: fb_do_show_logo() updates") Signed-off-by: Peter Rosin Cc: Tomi Valkeinen Cc: Fabian Frederick Cc: Geert Uytterhoeven cc: Geoff Levand Cc: James Simmons Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin --- drivers/video/fbdev/core/fbmem.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 11d73b5fc885..302cce7185e3 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -435,7 +435,9 @@ static void fb_do_show_logo(struct fb_info *info, struct fb_image *image, image->dx += image->width + 8; } } else if (rotate == FB_ROTATE_UD) { - for (x = 0; x < num; x++) { + u32 dx = image->dx; + + for (x = 0; x < num && image->dx <= dx; x++) { info->fbops->fb_imageblit(info, image); image->dx -= image->width + 8; } @@ -447,7 +449,9 @@ static void fb_do_show_logo(struct fb_info *info, struct fb_image *image, image->dy += image->height + 8; } } else if (rotate == FB_ROTATE_CCW) { - for (x = 0; x < num; x++) { + u32 dy = image->dy; + + for (x = 0; x < num && image->dy <= dy; x++) { info->fbops->fb_imageblit(info, image); image->dy -= image->height + 8; } From 8181b2f24d3a845dc01a0ad781fc3b0acc732fdb Mon Sep 17 00:00:00 2001 From: Konstantin Khorenko Date: Fri, 23 Nov 2018 19:10:28 +0300 Subject: [PATCH 1772/2608] i40e: define proper net_device::neigh_priv_len [ Upstream commit 31389b53b3e0b535867af9090a5d19ec64768d55 ] Out of bound read reported by KASan. i40iw_net_event() reads unconditionally 16 bytes from neigh->primary_key while the memory allocated for "neighbour" struct is evaluated in neigh_alloc() as tbl->entry_size + dev->neigh_priv_len where "dev" is a net_device. But the driver does not setup dev->neigh_priv_len and we read beyond the neigh entry allocated memory, so the patch in the next mail fixes this. Signed-off-by: Konstantin Khorenko Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 904b42becd45..5d47a51e74eb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -9772,6 +9772,9 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) ether_addr_copy(netdev->dev_addr, mac_addr); ether_addr_copy(netdev->perm_addr, mac_addr); + /* i40iw_net_event() reads 16 bytes from neigh->primary_key */ + netdev->neigh_priv_len = sizeof(u32) * 4; + netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; /* Setup netdev TC information */ From 71d357aa1cda88994f11a98c8262f9103c304691 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 3 Dec 2018 13:54:38 +0800 Subject: [PATCH 1773/2608] igb: Fix an issue that PME is not enabled during runtime suspend [ Upstream commit 1fb3a7a75e2efcc83ef21f2434069cddd6fae6f5 ] I210 ethernet card doesn't wakeup when a cable gets plugged. It's because its PME is not set. Since commit 42eca2302146 ("PCI: Don't touch card regs after runtime suspend D3"), if the PCI state is saved, pci_pm_runtime_suspend() stops calling pci_finish_runtime_suspend(), which enables the PCI PME. To fix the issue, let's not to save PCI states when it's runtime suspend, to let the PCI subsystem enables PME. Fixes: 42eca2302146 ("PCI: Don't touch card regs after runtime suspend D3") Signed-off-by: Kai-Heng Feng Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/igb_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 1c027f9d9af5..8892ea5cbb01 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -7950,9 +7950,11 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, rtnl_unlock(); #ifdef CONFIG_PM - retval = pci_save_state(pdev); - if (retval) - return retval; + if (!runtime) { + retval = pci_save_state(pdev); + if (retval) + return retval; + } #endif status = rd32(E1000_STATUS); From 74a65f50b9edd3dbd59614df6b7726ca49ff7808 Mon Sep 17 00:00:00 2001 From: Lenny Szubowicz Date: Wed, 19 Dec 2018 11:50:52 -0500 Subject: [PATCH 1774/2608] ACPI/APEI: Clear GHES block_status before panic() [ Upstream commit 98cff8b23ed1c763a029ee81ea300df0d153d07d ] In __ghes_panic() clear the block status in the APEI generic error status block for that generic hardware error source before calling panic() to prevent a second panic() in the crash kernel for exactly the same fatal error. Otherwise ghes_probe(), running in the crash kernel, would see an unhandled error in the APEI generic error status block and panic again, thereby precluding any crash dump. Signed-off-by: Lenny Szubowicz Signed-off-by: David Arcari Tested-by: Tyler Baicar Acked-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/apei/ghes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index f14695e744d0..5889f6407fea 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -675,6 +675,8 @@ static void __ghes_panic(struct ghes *ghes) { __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus); + ghes_clear_estatus(ghes); + /* reboot to log the error! */ if (!panic_timeout) panic_timeout = ghes_panic_timeout; From 4ab6a0314538afb9421c6700aebed61fde7f378d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Thu, 20 Dec 2018 19:13:09 +0100 Subject: [PATCH 1775/2608] fbdev: fbcon: Fix unregister crash when more than one framebuffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2122b40580dd9d0620398739c773d07a7b7939d0 ] When unregistering fbdev using unregister_framebuffer(), any bound console will unbind automatically. This is working fine if this is the only framebuffer, resulting in a switch to the dummy console. However if there is a fb0 and I unregister fb1 having a bound console, I eventually get a crash. The fastest way for me to trigger the crash is to do a reboot, resulting in this splat: [ 76.478825] WARNING: CPU: 0 PID: 527 at linux/kernel/workqueue.c:1442 __queue_work+0x2d4/0x41c [ 76.478849] Modules linked in: raspberrypi_hwmon gpio_backlight backlight bcm2835_rng rng_core [last unloaded: tinydrm] [ 76.478916] CPU: 0 PID: 527 Comm: systemd-udevd Not tainted 4.20.0-rc4+ #4 [ 76.478933] Hardware name: BCM2835 [ 76.478949] Backtrace: [ 76.478995] [] (dump_backtrace) from [] (show_stack+0x20/0x24) [ 76.479022] r6:00000000 r5:c0bc73be r4:00000000 r3:6fb5bf81 [ 76.479060] [] (show_stack) from [] (dump_stack+0x20/0x28) [ 76.479102] [] (dump_stack) from [] (__warn+0xec/0x12c) [ 76.479134] [] (__warn) from [] (warn_slowpath_null+0x4c/0x58) [ 76.479165] r9:c0eb6944 r8:00000001 r7:c0e927f8 r6:c0bc73be r5:000005a2 r4:c0139e84 [ 76.479197] [] (warn_slowpath_null) from [] (__queue_work+0x2d4/0x41c) [ 76.479222] r6:d7666a00 r5:c0e918ee r4:dbc4e700 [ 76.479251] [] (__queue_work) from [] (queue_work_on+0x60/0x88) [ 76.479281] r10:c0496bf8 r9:00000100 r8:c0e92ae0 r7:00000001 r6:d9403700 r5:d7666a00 [ 76.479298] r4:20000113 [ 76.479348] [] (queue_work_on) from [] (cursor_timer_handler+0x30/0x54) [ 76.479374] r7:d8a8fabc r6:c0e08088 r5:d8afdc5c r4:d8a8fabc [ 76.479413] [] (cursor_timer_handler) from [] (call_timer_fn+0x100/0x230) [ 76.479435] r4:c0e9192f r3:d758a340 [ 76.479465] [] (call_timer_fn) from [] (expire_timers+0x10c/0x12c) [ 76.479495] r10:40000000 r9:c0e9192f r8:c0e92ae0 r7:d8afdccc r6:c0e19280 r5:c0496bf8 [ 76.479513] r4:d8a8fabc [ 76.479541] [] (expire_timers) from [] (run_timer_softirq+0xa8/0x184) [ 76.479570] r9:00000001 r8:c0e19280 r7:00000000 r6:c0e08088 r5:c0e1a3e0 r4:c0e19280 [ 76.479603] [] (run_timer_softirq) from [] (__do_softirq+0x1ac/0x3fc) [ 76.479632] r10:c0e91680 r9:d8afc020 r8:0000000a r7:00000100 r6:00000001 r5:00000002 [ 76.479650] r4:c0eb65ec [ 76.479686] [] (__do_softirq) from [] (irq_exit+0xe8/0x168) [ 76.479716] r10:d8d1a9b0 r9:d8afc000 r8:00000001 r7:d949c000 r6:00000000 r5:c0e8b3f0 [ 76.479734] r4:00000000 [ 76.479764] [] (irq_exit) from [] (__handle_domain_irq+0x94/0xb0) [ 76.479793] [] (__handle_domain_irq) from [] (bcm2835_handle_irq+0x3c/0x48) [ 76.479823] r8:d8afdebc r7:d8afddfc r6:ffffffff r5:c0e089f8 r4:d8afddc8 r3:d8afddc8 [ 76.479851] [] (bcm2835_handle_irq) from [] (__irq_svc+0x70/0x98) The problem is in the console rebinding in fbcon_fb_unbind(). It uses the virtual console index as the new framebuffer index to bind the console(s) to. The correct way is to use the con2fb_map lookup table to find the framebuffer index. Fixes: cfafca8067c6 ("fbdev: fbcon: console unregistration from unregister_framebuffer") Signed-off-by: Noralf Trønnes Reviewed-by: Mikulas Patocka Acked-by: Daniel Vetter Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin --- drivers/video/fbdev/core/fbcon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 04612f938bab..85787119bfbf 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -3041,7 +3041,7 @@ static int fbcon_fb_unbind(int idx) for (i = first_fb_vc; i <= last_fb_vc; i++) { if (con2fb_map[i] != idx && con2fb_map[i] != -1) { - new_idx = i; + new_idx = con2fb_map[i]; break; } } From 409985215a566e738e583295ea9a4f79a1d296d0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 28 Nov 2018 09:27:04 +0000 Subject: [PATCH 1776/2608] powerpc/mm: Fix reporting of kernel execute faults on the 8xx [ Upstream commit ffca395b11c4a5a6df6d6345f794b0e3d578e2d0 ] On the 8xx, no-execute is set via PPP bits in the PTE. Therefore a no-exec fault generates DSISR_PROTFAULT error bits, not DSISR_NOEXEC_OR_G. This patch adds DSISR_PROTFAULT in the test mask. Fixes: d3ca587404b3 ("powerpc/mm: Fix reporting of kernel execute faults") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/mm/fault.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 6e1e39035380..52863deed65d 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -215,7 +215,9 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) static bool bad_kernel_fault(bool is_exec, unsigned long error_code, unsigned long address) { - if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT))) { + /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */ + if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT | + DSISR_PROTFAULT))) { printk_ratelimited(KERN_CRIT "kernel tried to execute" " exec-protected page (%lx) -" "exploit attempt? (uid: %d)\n", From 967b24de179fc81e4b9ef385e4632381d18e3e45 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 9 Dec 2018 20:50:50 +0100 Subject: [PATCH 1777/2608] pinctrl: meson: meson8: fix the GPIO function for the GPIOAO pins [ Upstream commit 42f9b48cc5402be11d2364275eb18c257d2a79e8 ] The GPIOAO pins (as well as the two exotic GPIO_BSD_EN and GPIO_TEST_N) only belong to the pin controller in the AO domain. With the current definition these pins cannot be referred to in .dts files as group (which is possible on GXBB and GXL for example). Add a separate "gpio_aobus" function to fix the mapping between the pin controller and the GPIO pins in the AO domain. This is similar to how the GXBB and GXL drivers implement this functionality. Fixes: 9dab1868ec0db4 ("pinctrl: amlogic: Make driver independent from two-domain configuration") Signed-off-by: Martin Blumenstingl Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/meson/pinctrl-meson8.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pinctrl/meson/pinctrl-meson8.c b/drivers/pinctrl/meson/pinctrl-meson8.c index 970f6f14502c..591b01657378 100644 --- a/drivers/pinctrl/meson/pinctrl-meson8.c +++ b/drivers/pinctrl/meson/pinctrl-meson8.c @@ -808,7 +808,9 @@ static const char * const gpio_groups[] = { "BOOT_5", "BOOT_6", "BOOT_7", "BOOT_8", "BOOT_9", "BOOT_10", "BOOT_11", "BOOT_12", "BOOT_13", "BOOT_14", "BOOT_15", "BOOT_16", "BOOT_17", "BOOT_18", +}; +static const char * const gpio_aobus_groups[] = { "GPIOAO_0", "GPIOAO_1", "GPIOAO_2", "GPIOAO_3", "GPIOAO_4", "GPIOAO_5", "GPIOAO_6", "GPIOAO_7", "GPIOAO_8", "GPIOAO_9", "GPIOAO_10", "GPIOAO_11", @@ -1030,6 +1032,7 @@ static struct meson_pmx_func meson8_cbus_functions[] = { }; static struct meson_pmx_func meson8_aobus_functions[] = { + FUNCTION(gpio_aobus), FUNCTION(uart_ao), FUNCTION(remote), FUNCTION(i2c_slave_ao), From 391e1989424d73cde3ab8762e0fcd5ced098b569 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 9 Dec 2018 20:50:51 +0100 Subject: [PATCH 1778/2608] pinctrl: meson: meson8b: fix the GPIO function for the GPIOAO pins [ Upstream commit 2b745ac3cceb8fc1d9985990c8241a821ea97e53 ] The GPIOAO pins (as well as the two exotic GPIO_BSD_EN and GPIO_TEST_N) only belong to the pin controller in the AO domain. With the current definition these pins cannot be referred to in .dts files as group (which is possible on GXBB and GXL for example). Add a separate "gpio_aobus" function to fix the mapping between the pin controller and the GPIO pins in the AO domain. This is similar to how the GXBB and GXL drivers implement this functionality. Fixes: 9dab1868ec0db4 ("pinctrl: amlogic: Make driver independent from two-domain configuration") Signed-off-by: Martin Blumenstingl Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/meson/pinctrl-meson8b.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson8b.c b/drivers/pinctrl/meson/pinctrl-meson8b.c index 71f216b5b0b9..a6fff215e60f 100644 --- a/drivers/pinctrl/meson/pinctrl-meson8b.c +++ b/drivers/pinctrl/meson/pinctrl-meson8b.c @@ -649,16 +649,18 @@ static const char * const gpio_groups[] = { "BOOT_10", "BOOT_11", "BOOT_12", "BOOT_13", "BOOT_14", "BOOT_15", "BOOT_16", "BOOT_17", "BOOT_18", - "GPIOAO_0", "GPIOAO_1", "GPIOAO_2", "GPIOAO_3", - "GPIOAO_4", "GPIOAO_5", "GPIOAO_6", "GPIOAO_7", - "GPIOAO_8", "GPIOAO_9", "GPIOAO_10", "GPIOAO_11", - "GPIOAO_12", "GPIOAO_13", "GPIO_BSD_EN", "GPIO_TEST_N", - "DIF_0_P", "DIF_0_N", "DIF_1_P", "DIF_1_N", "DIF_2_P", "DIF_2_N", "DIF_3_P", "DIF_3_N", "DIF_4_P", "DIF_4_N" }; +static const char * const gpio_aobus_groups[] = { + "GPIOAO_0", "GPIOAO_1", "GPIOAO_2", "GPIOAO_3", + "GPIOAO_4", "GPIOAO_5", "GPIOAO_6", "GPIOAO_7", + "GPIOAO_8", "GPIOAO_9", "GPIOAO_10", "GPIOAO_11", + "GPIOAO_12", "GPIOAO_13", "GPIO_BSD_EN", "GPIO_TEST_N" +}; + static const char * const sd_a_groups[] = { "sd_d0_a", "sd_d1_a", "sd_d2_a", "sd_d3_a", "sd_clk_a", "sd_cmd_a" @@ -874,6 +876,7 @@ static struct meson_pmx_func meson8b_cbus_functions[] = { }; static struct meson_pmx_func meson8b_aobus_functions[] = { + FUNCTION(gpio_aobus), FUNCTION(uart_ao), FUNCTION(uart_ao_b), FUNCTION(i2c_slave_ao), From 9698e2687b226b0e8d44e79c657870f6ac10b041 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 19 Dec 2018 12:06:13 +0100 Subject: [PATCH 1779/2608] KVM: x86: svm: report MSR_IA32_MCG_EXT_CTL as unsupported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e87555e550cef4941579cd879759a7c0dee24e68 ] AMD doesn't seem to implement MSR_IA32_MCG_EXT_CTL and svm code in kvm knows nothing about it, however, this MSR is among emulated_msrs and thus returned with KVM_GET_MSR_INDEX_LIST. The consequent KVM_GET_MSRS, of course, fails. Report the MSR as unsupported to not confuse userspace. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin --- arch/x86/kvm/svm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4dc79d139810..656ac12f5439 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5319,6 +5319,13 @@ static bool svm_cpu_has_accelerated_tpr(void) static bool svm_has_emulated_msr(int index) { + switch (index) { + case MSR_IA32_MCG_EXT_CTL: + return false; + default: + break; + } + return true; } From e7226c67a17dd86c57b5f15bb4daf4358948fb7d Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Mon, 20 Aug 2018 13:47:32 +0530 Subject: [PATCH 1780/2608] powerpc/fadump: Do not allow hot-remove memory from fadump reserved area. [ Upstream commit 0db6896ff6332ba694f1e61b93ae3b2640317633 ] For fadump to work successfully there should not be any holes in reserved memory ranges where kernel has asked firmware to move the content of old kernel memory in event of crash. Now that fadump uses CMA for reserved area, this memory area is now not protected from hot-remove operations unless it is cma allocated. Hence, fadump service can fail to re-register after the hot-remove operation, if hot-removed memory belongs to fadump reserved region. To avoid this make sure that memory from fadump reserved area is not hot-removable if fadump is registered. However, if user still wants to remove that memory, he can do so by manually stopping fadump service before hot-remove operation. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/fadump.h | 2 +- arch/powerpc/kernel/fadump.c | 10 ++++++++-- arch/powerpc/platforms/pseries/hotplug-memory.c | 7 +++++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 1e7a33592e29..15bc07a31c46 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -200,7 +200,7 @@ struct fad_crash_memory_ranges { unsigned long long size; }; -extern int is_fadump_boot_memory_area(u64 addr, ulong size); +extern int is_fadump_memory_area(u64 addr, ulong size); extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data); extern int fadump_reserve_mem(void); diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 5a6470383ca3..62d7ef6508de 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -117,13 +117,19 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, /* * If fadump is registered, check if the memory provided - * falls within boot memory area. + * falls within boot memory area and reserved memory area. */ -int is_fadump_boot_memory_area(u64 addr, ulong size) +int is_fadump_memory_area(u64 addr, ulong size) { + u64 d_start = fw_dump.reserve_dump_area_start; + u64 d_end = d_start + fw_dump.reserve_dump_area_size; + if (!fw_dump.dump_registered) return 0; + if (((addr + size) > d_start) && (addr <= d_end)) + return 1; + return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size; } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 1d48ab424bd9..93e09f108ca1 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -441,8 +441,11 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb) phys_addr = lmb->base_addr; #ifdef CONFIG_FA_DUMP - /* Don't hot-remove memory that falls in fadump boot memory area */ - if (is_fadump_boot_memory_area(phys_addr, block_sz)) + /* + * Don't hot-remove memory that falls in fadump boot memory area + * and memory that is reserved for capturing old kernel memory. + */ + if (is_fadump_memory_area(phys_addr, block_sz)) return false; #endif From f70123c6d3accc024445eca6b9ee01c1fe2b80d8 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Fri, 14 Dec 2018 14:34:43 -0800 Subject: [PATCH 1781/2608] kvm: Change offset in kvm_write_guest_offset_cached to unsigned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7a86dab8cf2f0fdf508f3555dddfc236623bff60 ] Since the offset is added directly to the hva from the gfn_to_hva_cache, a negative offset could result in an out of bounds write. The existing BUG_ON only checks for addresses beyond the end of the gfn_to_hva_cache, not for addresses before the start of the gfn_to_hva_cache. Note that all current call sites have non-negative offsets. Fixes: 4ec6e8636256 ("kvm: Introduce kvm_write_guest_offset_cached()") Reported-by: Cfir Cohen Signed-off-by: Jim Mattson Reviewed-by: Cfir Cohen Reviewed-by: Peter Shier Reviewed-by: Krish Sadhukhan Reviewed-by: Sean Christopherson Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin --- include/linux/kvm_host.h | 3 ++- virt/kvm/kvm_main.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b6962ae6237e..4f7f19c1dc0a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -685,7 +685,8 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned long len); int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, int offset, unsigned long len); + void *data, unsigned int offset, + unsigned long len); int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4f35f0dfe681..bbc34e87d88f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1962,7 +1962,8 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, int offset, unsigned long len) + void *data, unsigned int offset, + unsigned long len) { struct kvm_memslots *slots = kvm_memslots(kvm); int r; From d9783bcebb990207bc510c55664118a524218de4 Mon Sep 17 00:00:00 2001 From: Chris Perl Date: Mon, 17 Dec 2018 10:56:38 -0500 Subject: [PATCH 1782/2608] NFS: nfs_compare_mount_options always compare auth flavors. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 594d1644cd59447f4fceb592448d5cd09eb09b5e ] This patch removes the check from nfs_compare_mount_options to see if a `sec' option was passed for the current mount before comparing auth flavors and instead just always compares auth flavors. Consider the following scenario: You have a server with the address 192.168.1.1 and two exports /export/a and /export/b. The first export supports `sys' and `krb5' security, the second just `sys'. Assume you start with no mounts from the server. The following results in EIOs being returned as the kernel nfs client incorrectly thinks it can share the underlying `struct nfs_server's: $ mkdir /tmp/{a,b} $ sudo mount -t nfs -o vers=3,sec=krb5 192.168.1.1:/export/a /tmp/a $ sudo mount -t nfs -o vers=3 192.168.1.1:/export/b /tmp/b $ df >/dev/null df: ‘/tmp/b’: Input/output error Signed-off-by: Chris Perl Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 38de09b08e96..3c4aeb83e1c4 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2401,8 +2401,7 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n goto Ebusy; if (a->acdirmax != b->acdirmax) goto Ebusy; - if (b->auth_info.flavor_len > 0 && - clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) + if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) goto Ebusy; return 1; Ebusy: From 58ddc0c67d113a1081e3c017682b475b75a6bf2c Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 21 Dec 2018 13:01:33 -0600 Subject: [PATCH 1783/2608] hwmon: (lm80) fix a missing check of the status of SMBus read [ Upstream commit c9c63915519b1def7043b184680f33c24cd49d7b ] If lm80_read_value() fails, it returns a negative number instead of the correct read data. Therefore, we should avoid using the data if it fails. The fix checks if lm80_read_value() fails, and if so, returns with the error number. Signed-off-by: Kangjie Lu [groeck: One variable for return values is enough] Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/lm80.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c index 08e3945a6fbf..04f9df0d2341 100644 --- a/drivers/hwmon/lm80.c +++ b/drivers/hwmon/lm80.c @@ -360,9 +360,11 @@ static ssize_t set_fan_div(struct device *dev, struct device_attribute *attr, struct i2c_client *client = data->client; unsigned long min, val; u8 reg; - int err = kstrtoul(buf, 10, &val); - if (err < 0) - return err; + int rv; + + rv = kstrtoul(buf, 10, &val); + if (rv < 0) + return rv; /* Save fan_min */ mutex_lock(&data->update_lock); @@ -390,8 +392,11 @@ static ssize_t set_fan_div(struct device *dev, struct device_attribute *attr, return -EINVAL; } - reg = (lm80_read_value(client, LM80_REG_FANDIV) & - ~(3 << (2 * (nr + 1)))) | (data->fan_div[nr] << (2 * (nr + 1))); + rv = lm80_read_value(client, LM80_REG_FANDIV); + if (rv < 0) + return rv; + reg = (rv & ~(3 << (2 * (nr + 1)))) + | (data->fan_div[nr] << (2 * (nr + 1))); lm80_write_value(client, LM80_REG_FANDIV, reg); /* Restore fan_min */ From 1812be7e56c7c4447c2facdc52d81a872158a957 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 21 Dec 2018 13:10:39 -0600 Subject: [PATCH 1784/2608] hwmon: (lm80) fix a missing check of bus read in lm80 probe [ Upstream commit 9aa3aa15f4c2f74f47afd6c5db4b420fadf3f315 ] In lm80_probe(), if lm80_read_value() fails, it returns a negative error number which is stored to data->fan[f_min] and will be further used. We should avoid using the data if the read fails. The fix checks if lm80_read_value() fails, and if so, returns with the error number. Signed-off-by: Kangjie Lu Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/lm80.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c index 04f9df0d2341..0e30fa00204c 100644 --- a/drivers/hwmon/lm80.c +++ b/drivers/hwmon/lm80.c @@ -628,6 +628,7 @@ static int lm80_probe(struct i2c_client *client, struct device *dev = &client->dev; struct device *hwmon_dev; struct lm80_data *data; + int rv; data = devm_kzalloc(dev, sizeof(struct lm80_data), GFP_KERNEL); if (!data) @@ -640,8 +641,14 @@ static int lm80_probe(struct i2c_client *client, lm80_init_client(client); /* A few vars need to be filled upon startup */ - data->fan[f_min][0] = lm80_read_value(client, LM80_REG_FAN_MIN(1)); - data->fan[f_min][1] = lm80_read_value(client, LM80_REG_FAN_MIN(2)); + rv = lm80_read_value(client, LM80_REG_FAN_MIN(1)); + if (rv < 0) + return rv; + data->fan[f_min][0] = rv; + rv = lm80_read_value(client, LM80_REG_FAN_MIN(2)); + if (rv < 0) + return rv; + data->fan[f_min][1] = rv; hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name, data, lm80_groups); From 4a38ed76fbeca874c20914e0515f08c8fc7ee0af Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 19 Oct 2018 15:21:08 +1100 Subject: [PATCH 1785/2608] seq_buf: Make seq_buf_puts() null-terminate the buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0464ed24380905d640030d368cd84a4e4d1e15e2 ] Currently seq_buf_puts() will happily create a non null-terminated string for you in the buffer. This is particularly dangerous if the buffer is on the stack. For example: char buf[8]; char secret = "secret"; struct seq_buf s; seq_buf_init(&s, buf, sizeof(buf)); seq_buf_puts(&s, "foo"); printk("Message is %s\n", buf); Can result in: Message is fooªªªªªsecret We could require all users to memset() their buffer to zero before use. But that seems likely to be forgotten and lead to bugs. Instead we can change seq_buf_puts() to always leave the buffer in a null-terminated state. The only downside is that this makes the buffer 1 character smaller for seq_buf_puts(), but that seems like a good trade off. Link: http://lkml.kernel.org/r/20181019042109.8064-1-mpe@ellerman.id.au Acked-by: Kees Cook Signed-off-by: Michael Ellerman Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- lib/seq_buf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/seq_buf.c b/lib/seq_buf.c index 11f2ae0f9099..6aabb609dd87 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -144,9 +144,13 @@ int seq_buf_puts(struct seq_buf *s, const char *str) WARN_ON(s->size == 0); + /* Add 1 to len for the trailing null byte which must be there */ + len += 1; + if (seq_buf_can_fit(s, len)) { memcpy(s->buffer + s->len, str, len); - s->len += len; + /* Don't count the trailing null byte against the capacity */ + s->len += len - 1; return 0; } seq_buf_set_overflow(s); From 0515902af807e35a61a59eb58632180015f6521d Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 10 Dec 2018 16:49:29 -0700 Subject: [PATCH 1786/2608] crypto: ux500 - Use proper enum in cryp_set_dma_transfer [ Upstream commit 9d880c5945c748d8edcac30965f3349a602158c4 ] Clang warns when one enumerated type is implicitly converted to another: drivers/crypto/ux500/cryp/cryp_core.c:559:5: warning: implicit conversion from enumeration type 'enum dma_data_direction' to different enumeration type 'enum dma_transfer_direction' [-Wenum-conversion] direction, DMA_CTRL_ACK); ^~~~~~~~~ drivers/crypto/ux500/cryp/cryp_core.c:583:5: warning: implicit conversion from enumeration type 'enum dma_data_direction' to different enumeration type 'enum dma_transfer_direction' [-Wenum-conversion] direction, ^~~~~~~~~ 2 warnings generated. dmaengine_prep_slave_sg expects an enum from dma_transfer_direction. Because we know the value of the dma_data_direction enum from the switch statement, we can just use the proper value from dma_transfer_direction so there is no more conversion. DMA_TO_DEVICE = DMA_MEM_TO_DEV = 1 DMA_FROM_DEVICE = DMA_DEV_TO_MEM = 2 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/ux500/cryp/cryp_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c index 790f7cadc1ed..efebc484e371 100644 --- a/drivers/crypto/ux500/cryp/cryp_core.c +++ b/drivers/crypto/ux500/cryp/cryp_core.c @@ -555,7 +555,7 @@ static int cryp_set_dma_transfer(struct cryp_ctx *ctx, desc = dmaengine_prep_slave_sg(channel, ctx->device->dma.sg_src, ctx->device->dma.sg_src_len, - direction, DMA_CTRL_ACK); + DMA_MEM_TO_DEV, DMA_CTRL_ACK); break; case DMA_FROM_DEVICE: @@ -579,7 +579,7 @@ static int cryp_set_dma_transfer(struct cryp_ctx *ctx, desc = dmaengine_prep_slave_sg(channel, ctx->device->dma.sg_dst, ctx->device->dma.sg_dst_len, - direction, + DMA_DEV_TO_MEM, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); From 8a3c04ccce589ba538f7f6641989f5cb234629ea Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 10 Dec 2018 16:49:54 -0700 Subject: [PATCH 1787/2608] crypto: ux500 - Use proper enum in hash_set_dma_transfer [ Upstream commit 5ac93f808338f4dd465402e91869702eb87db241 ] Clang warns when one enumerated type is implicitly converted to another: drivers/crypto/ux500/hash/hash_core.c:169:4: warning: implicit conversion from enumeration type 'enum dma_data_direction' to different enumeration type 'enum dma_transfer_direction' [-Wenum-conversion] direction, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); ^~~~~~~~~ 1 warning generated. dmaengine_prep_slave_sg expects an enum from dma_transfer_direction. We know that the only direction supported by this function is DMA_TO_DEVICE because of the check at the top of this function so we can just use the equivalent value from dma_transfer_direction. DMA_TO_DEVICE = DMA_MEM_TO_DEV = 1 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/ux500/hash/hash_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 9acccad26928..17c8e2b28c42 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -165,7 +165,7 @@ static int hash_set_dma_transfer(struct hash_ctx *ctx, struct scatterlist *sg, __func__); desc = dmaengine_prep_slave_sg(channel, ctx->device->dma.sg, ctx->device->dma.sg_len, - direction, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); + DMA_MEM_TO_DEV, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); if (!desc) { dev_err(ctx->device->dev, "%s: dmaengine_prep_slave_sg() failed!\n", __func__); From b0d08e077d8148a45a108acc602e5b0ac865a84f Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Mon, 17 Dec 2018 10:47:48 +0100 Subject: [PATCH 1788/2608] MIPS: ralink: Select CONFIG_CPU_MIPSR2_IRQ_VI on MT7620/8 [ Upstream commit 0b15394475e3bcaf35ca4bf22fc55d56df67224e ] Testing has shown, that when using mainline U-Boot on MT7688 based boards, the system may hang or crash while mounting the root-fs. The main issue here is that mainline U-Boot configures EBase to a value near the end of system memory. And with CONFIG_CPU_MIPSR2_IRQ_VI disabled, trap_init() will not allocate a new area to place the exception handler. The original value will be used and the handler will be copied to this location, which might already be used by some userspace application. The MT7688 supports VI - its config3 register is 0x00002420, so VInt (Bit 5) is set. But without setting CONFIG_CPU_MIPSR2_IRQ_VI this bit will not be evaluated to result in "cpu_has_vi" being set. This patch now selects CONFIG_CPU_MIPSR2_IRQ_VI on MT7620/8 which results trap_init() to allocate some memory for the exception handler. Please note that this issue was not seen with the Mediatek U-Boot version, as it does not touch EBase (stays at default of 0x8000.0000). This is strictly also not correct as the kernel (_text) resides here. Signed-off-by: Stefan Roese [paul.burton@mips.com: s/beeing/being/] Signed-off-by: Paul Burton Cc: John Crispin Cc: Daniel Schwierzeck Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Signed-off-by: Sasha Levin --- arch/mips/ralink/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/ralink/Kconfig b/arch/mips/ralink/Kconfig index f26736b7080b..fae36f0371d3 100644 --- a/arch/mips/ralink/Kconfig +++ b/arch/mips/ralink/Kconfig @@ -39,6 +39,7 @@ choice config SOC_MT7620 bool "MT7620/8" + select CPU_MIPSR2_IRQ_VI select HW_HAS_PCI config SOC_MT7621 From 254c1913cb652d4227fcff87d3518a22e9001062 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 13 Dec 2018 08:06:16 +1000 Subject: [PATCH 1789/2608] cifs: check ntwrk_buf_start for NULL before dereferencing it [ Upstream commit 59a63e479ce36a3f24444c3a36efe82b78e4a8e0 ] RHBZ: 1021460 There is an issue where when multiple threads open/close the same directory ntwrk_buf_start might end up being NULL, causing the call to smbCalcSize later to oops with a NULL deref. The real bug is why this happens and why this can become NULL for an open cfile, which should not be allowed. This patch tries to avoid a oops until the time when we fix the underlying issue. Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/readdir.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index ef24b4527459..68183872bf8b 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -655,7 +655,14 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, /* scan and find it */ int i; char *cur_ent; - char *end_of_smb = cfile->srch_inf.ntwrk_buf_start + + char *end_of_smb; + + if (cfile->srch_inf.ntwrk_buf_start == NULL) { + cifs_dbg(VFS, "ntwrk_buf_start is NULL during readdir\n"); + return -EIO; + } + + end_of_smb = cfile->srch_inf.ntwrk_buf_start + server->ops->calc_smb_size( cfile->srch_inf.ntwrk_buf_start); From e8f13f32f436e3fae2b43a433ec5c677e0e2f5f9 Mon Sep 17 00:00:00 2001 From: Anton Ivanov Date: Wed, 5 Dec 2018 12:37:41 +0000 Subject: [PATCH 1790/2608] um: Avoid marking pages with "changed protection" [ Upstream commit 8892d8545f2d0342b9c550defbfb165db237044b ] Changing protection is a very high cost operation in UML because in addition to an extra syscall it also interrupts mmap merge sequences generated by the tlb. While the condition is not particularly common it is worth avoiding. Signed-off-by: Anton Ivanov Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- arch/um/include/asm/pgtable.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 7485398d0737..9c04562310b3 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -197,12 +197,17 @@ static inline pte_t pte_mkold(pte_t pte) static inline pte_t pte_wrprotect(pte_t pte) { - pte_clear_bits(pte, _PAGE_RW); + if (likely(pte_get_bits(pte, _PAGE_RW))) + pte_clear_bits(pte, _PAGE_RW); + else + return pte; return(pte_mknewprot(pte)); } static inline pte_t pte_mkread(pte_t pte) { + if (unlikely(pte_get_bits(pte, _PAGE_USER))) + return pte; pte_set_bits(pte, _PAGE_USER); return(pte_mknewprot(pte)); } @@ -221,6 +226,8 @@ static inline pte_t pte_mkyoung(pte_t pte) static inline pte_t pte_mkwrite(pte_t pte) { + if (unlikely(pte_get_bits(pte, _PAGE_RW))) + return pte; pte_set_bits(pte, _PAGE_RW); return(pte_mknewprot(pte)); } From ba87bdade1366d650e00ff7b1524dba8bd4d71f5 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 25 Dec 2018 01:56:14 -0600 Subject: [PATCH 1791/2608] niu: fix missing checks of niu_pci_eeprom_read [ Upstream commit 26fd962bde0b15e54234fe762d86bc0349df1de4 ] niu_pci_eeprom_read() may fail, so we should check its return value before using the read data. Signed-off-by: Kangjie Lu Acked-by: Shannon Nelson Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/sun/niu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index e92f41d20a2c..411a69bea1d4 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -8119,6 +8119,8 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end) start += 3; prop_len = niu_pci_eeprom_read(np, start + 4); + if (prop_len < 0) + return prop_len; err = niu_pci_vpd_get_propname(np, start + 5, namebuf, 64); if (err < 0) return err; @@ -8163,8 +8165,12 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end) netif_printk(np, probe, KERN_DEBUG, np->dev, "VPD_SCAN: Reading in property [%s] len[%d]\n", namebuf, prop_len); - for (i = 0; i < prop_len; i++) - *prop_buf++ = niu_pci_eeprom_read(np, off + i); + for (i = 0; i < prop_len; i++) { + err = niu_pci_eeprom_read(np, off + i); + if (err >= 0) + *prop_buf = err; + ++prop_buf; + } } start += len; From 0f4d87ae4236363d3a3992630e8fbcca3dd56b8d Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Tue, 18 Dec 2018 16:39:24 +0530 Subject: [PATCH 1792/2608] f2fs: fix sbi->extent_list corruption issue [ Upstream commit e4589fa545e0020dbbc3c9bde35f35f949901392 ] When there is a failure in f2fs_fill_super() after/during the recovery of fsync'd nodes, it frees the current sbi and retries again. This time the mount is successful, but the files that got recovered before retry, still holds the extent tree, whose extent nodes list is corrupted since sbi and sbi->extent_list is freed up. The list_del corruption issue is observed when the file system is getting unmounted and when those recoverd files extent node is being freed up in the below context. list_del corruption. prev->next should be fffffff1e1ef5480, but was (null) <...> kernel BUG at kernel/msm-4.14/lib/list_debug.c:53! lr : __list_del_entry_valid+0x94/0xb4 pc : __list_del_entry_valid+0x94/0xb4 <...> Call trace: __list_del_entry_valid+0x94/0xb4 __release_extent_node+0xb0/0x114 __free_extent_tree+0x58/0x7c f2fs_shrink_extent_tree+0xdc/0x3b0 f2fs_leave_shrinker+0x28/0x7c f2fs_put_super+0xfc/0x1e0 generic_shutdown_super+0x70/0xf4 kill_block_super+0x2c/0x5c kill_f2fs_super+0x44/0x50 deactivate_locked_super+0x60/0x8c deactivate_super+0x68/0x74 cleanup_mnt+0x40/0x78 __cleanup_mnt+0x1c/0x28 task_work_run+0x48/0xd0 do_notify_resume+0x678/0xe98 work_pending+0x8/0x14 Fix this by not creating extents for those recovered files if shrinker is not registered yet. Once mount is successful and shrinker is registered, those files can have extents again. Signed-off-by: Sahitya Tummala Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/f2fs.h | 11 ++++++++++- fs/f2fs/shrinker.c | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3f1a44696036..634165fb64f1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2294,10 +2294,19 @@ static inline bool is_dot_dotdot(const struct qstr *str) static inline bool f2fs_may_extent_tree(struct inode *inode) { - if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) || + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (!test_opt(sbi, EXTENT_CACHE) || is_inode_flag_set(inode, FI_NO_EXTENT)) return false; + /* + * for recovered files during mount do not create extents + * if shrinker is not registered. + */ + if (list_empty(&sbi->s_list)) + return false; + return S_ISREG(inode->i_mode); } diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 5c60fc28ec75..ec71d2e29a15 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -138,6 +138,6 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi) f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi)); spin_lock(&f2fs_list_lock); - list_del(&sbi->s_list); + list_del_init(&sbi->s_list); spin_unlock(&f2fs_list_lock); } From 4c317b2ffd74fb510f1009003ecd97ebd60cfdcd Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Thu, 13 Dec 2018 15:17:37 +0100 Subject: [PATCH 1793/2608] cgroup: fix parsing empty mount option string [ Upstream commit e250d91d65750a0c0c62483ac4f9f357e7317617 ] This fixes the case where all mount options specified are consumed by an LSM and all that's left is an empty string. In this case cgroupfs should accept the string and not fail. How to reproduce (with SELinux enabled): # umount /sys/fs/cgroup/unified # mount -o context=system_u:object_r:cgroup_t:s0 -t cgroup2 cgroup2 /sys/fs/cgroup/unified mount: /sys/fs/cgroup/unified: wrong fs type, bad option, bad superblock on cgroup2, missing codepage or helper program, or other error. # dmesg | tail -n 1 [ 31.575952] cgroup: cgroup2: unknown option "" Fixes: 67e9c74b8a87 ("cgroup: replace __DEVEL__sane_behavior with cgroup2 fs type") [NOTE: should apply on top of commit 5136f6365ce3 ("cgroup: implement "nsdelegate" mount option"), older versions need manual rebase] Suggested-by: Stephen Smalley Signed-off-by: Ondrej Mosnacek Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- kernel/cgroup/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 109c32c56de7..21bbfc09e395 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1692,7 +1692,7 @@ static int parse_cgroup_root_flags(char *data, unsigned int *root_flags) *root_flags = 0; - if (!data) + if (!data || *data == '\0') return 0; while ((token = strsep(&data, ",")) != NULL) { From 6c16674b6bae46fb371bf5bb4243f02c3b14a53f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 28 Dec 2018 00:31:25 -0800 Subject: [PATCH 1794/2608] scripts/decode_stacktrace: only strip base path when a prefix of the path [ Upstream commit 67a28de47faa83585dd644bd4c31e5a1d9346c50 ] Running something like: decodecode vmlinux . leads to interested results where not only the leading "." gets stripped from the displayed paths, but also anywhere in the string, displaying something like: kvm_vcpu_check_block (arch/arm64/kvm/virt/kvm/kvm_mainc:2141) which doesn't help further processing. Fix it by only stripping the base path if it is a prefix of the path. Link: http://lkml.kernel.org/r/20181210174659.31054-3-marc.zyngier@arm.com Signed-off-by: Marc Zyngier Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- scripts/decode_stacktrace.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh index 64220e36ce3b..98a7d63a723e 100755 --- a/scripts/decode_stacktrace.sh +++ b/scripts/decode_stacktrace.sh @@ -78,7 +78,7 @@ parse_symbol() { fi # Strip out the base of the path - code=${code//$basepath/""} + code=${code//^$basepath/""} # In the case of inlines, move everything to same line code=${code//$'\n'/' '} From 46afec640faa6ede9417db87c98d6addf77c2173 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Fri, 28 Dec 2018 00:32:57 -0800 Subject: [PATCH 1795/2608] ocfs2: don't clear bh uptodate for block read [ Upstream commit 70306d9dce75abde855cefaf32b3f71eed8602a3 ] For sync io read in ocfs2_read_blocks_sync(), first clear bh uptodate flag and submit the io, second wait io done, last check whether bh uptodate, if not return io error. If two sync io for the same bh were issued, it could be the first io done and set uptodate flag, but just before check that flag, the second io came in and cleared uptodate, then ocfs2_read_blocks_sync() for the first io will return IO error. Indeed it's not necessary to clear uptodate flag, as the io end handler end_buffer_read_sync() will set or clear it based on io succeed or failed. The following message was found from a nfs server but the underlying storage returned no error. [4106438.567376] (nfsd,7146,3):ocfs2_get_suballoc_slot_bit:2780 ERROR: read block 1238823695 failed -5 [4106438.567569] (nfsd,7146,3):ocfs2_get_suballoc_slot_bit:2812 ERROR: status = -5 [4106438.567611] (nfsd,7146,3):ocfs2_test_inode_bit:2894 ERROR: get alloc slot and bit failed -5 [4106438.567643] (nfsd,7146,3):ocfs2_test_inode_bit:2932 ERROR: status = -5 [4106438.567675] (nfsd,7146,3):ocfs2_get_dentry:94 ERROR: test inode bit failed -5 Same issue in non sync read ocfs2_read_blocks(), fixed it as well. Link: http://lkml.kernel.org/r/20181121020023.3034-4-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Reviewed-by: Changwei Ge Reviewed-by: Yiwen Jiang Cc: Joel Becker Cc: Joseph Qi Cc: Jun Piao Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/buffer_head_io.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 1d098c3c00e0..9f8250df99f1 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -152,7 +152,6 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, #endif } - clear_buffer_uptodate(bh); get_bh(bh); /* for end_buffer_read_sync() */ bh->b_end_io = end_buffer_read_sync; submit_bh(REQ_OP_READ, 0, bh); @@ -306,7 +305,6 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, continue; } - clear_buffer_uptodate(bh); get_bh(bh); /* for end_buffer_read_sync() */ if (validate) set_buffer_needs_validate(bh); From 9a0234891ee9849d60aed3865e8ce5dd914c799c Mon Sep 17 00:00:00 2001 From: Larry Chen Date: Fri, 28 Dec 2018 00:32:46 -0800 Subject: [PATCH 1796/2608] ocfs2: improve ocfs2 Makefile [ Upstream commit 9e6aea22802b5684c7e1d69822aeb0844dd01953 ] Included file path was hard-wired in the ocfs2 makefile, which might causes some confusion when compiling ocfs2 as an external module. Say if we compile ocfs2 module as following. cp -r /kernel/tree/fs/ocfs2 /other/dir/ocfs2 cd /other/dir/ocfs2 make -C /path/to/kernel_source M=`pwd` modules Acutally, the compiler wil try to find included file in /kernel/tree/fs/ocfs2, rather than the directory /other/dir/ocfs2. To fix this little bug, we introduce the var $(src) provided by kbuild. $(src) means the absolute path of the running kbuild file. Link: http://lkml.kernel.org/r/20181108085546.15149-1-lchen@suse.com Signed-off-by: Larry Chen Reviewed-by: Andrew Morton Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/Makefile | 2 +- fs/ocfs2/dlm/Makefile | 2 +- fs/ocfs2/dlmfs/Makefile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 99ee093182cb..cc9b32b9db7c 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-y := -Ifs/ocfs2 +ccflags-y := -I$(src) obj-$(CONFIG_OCFS2_FS) += \ ocfs2.o \ diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile index bd1aab1f49a4..ef2854422a6e 100644 --- a/fs/ocfs2/dlm/Makefile +++ b/fs/ocfs2/dlm/Makefile @@ -1,4 +1,4 @@ -ccflags-y := -Ifs/ocfs2 +ccflags-y := -I$(src)/.. obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile index eed3db8c5b49..33431a0296a3 100644 --- a/fs/ocfs2/dlmfs/Makefile +++ b/fs/ocfs2/dlmfs/Makefile @@ -1,4 +1,4 @@ -ccflags-y := -Ifs/ocfs2 +ccflags-y := -I$(src)/.. obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o From 02594fb480000b3d16f27675412b8922471dab55 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 26 Dec 2018 22:09:34 +0800 Subject: [PATCH 1797/2608] isdn: hisax: hfc_pci: Fix a possible concurrency use-after-free bug in HFCPCI_l1hw() [ Upstream commit 7418e6520f22a2e35815122fa5a53d5bbfa2c10f ] In drivers/isdn/hisax/hfc_pci.c, the functions hfcpci_interrupt() and HFCPCI_l1hw() may be concurrently executed. HFCPCI_l1hw() line 1173: if (!cs->tx_skb) hfcpci_interrupt() line 942: spin_lock_irqsave(); line 1066: dev_kfree_skb_irq(cs->tx_skb); Thus, a possible concurrency use-after-free bug may occur in HFCPCI_l1hw(). To fix these bugs, the calls to spin_lock_irqsave() and spin_unlock_irqrestore() are added in HFCPCI_l1hw(), to protect the access to cs->tx_skb. Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/isdn/hisax/hfc_pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c index f9ca35cc32b1..b42d27a4c950 100644 --- a/drivers/isdn/hisax/hfc_pci.c +++ b/drivers/isdn/hisax/hfc_pci.c @@ -1169,11 +1169,13 @@ HFCPCI_l1hw(struct PStack *st, int pr, void *arg) if (cs->debug & L1_DEB_LAPD) debugl1(cs, "-> PH_REQUEST_PULL"); #endif + spin_lock_irqsave(&cs->lock, flags); if (!cs->tx_skb) { test_and_clear_bit(FLG_L1_PULL_REQ, &st->l1.Flags); st->l1.l1l2(st, PH_PULL | CONFIRM, NULL); } else test_and_set_bit(FLG_L1_PULL_REQ, &st->l1.Flags); + spin_unlock_irqrestore(&cs->lock, flags); break; case (HW_RESET | REQUEST): spin_lock_irqsave(&cs->lock, flags); From e5727e4feb221f40d70b8c91e116543c491c83c1 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 26 Dec 2018 20:15:13 -0600 Subject: [PATCH 1798/2608] gdrom: fix a memory leak bug [ Upstream commit 093c48213ee37c3c3ff1cf5ac1aa2a9d8bc66017 ] In probe_gdrom(), the buffer pointed by 'gd.cd_info' is allocated through kzalloc() and is used to hold the information of the gdrom device. To register and unregister the device, the pointer 'gd.cd_info' is passed to the functions register_cdrom() and unregister_cdrom(), respectively. However, this buffer is not freed after it is used, which can cause a memory leak bug. This patch simply frees the buffer 'gd.cd_info' in exit_gdrom() to fix the above issue. Signed-off-by: Wenwen Wang Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/cdrom/gdrom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index ae3a7537cf0f..72cd96a8eb19 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -889,6 +889,7 @@ static void __exit exit_gdrom(void) platform_device_unregister(pd); platform_driver_unregister(&gdrom_driver); kfree(gd.toc); + kfree(gd.cd_info); } module_init(init_gdrom); From 922c8a5ef7ad6863e7235634c44c94f657fbdebc Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 27 Dec 2018 18:29:09 -0600 Subject: [PATCH 1799/2608] fsl/fman: Use GFP_ATOMIC in {memac,tgec}_add_hash_mac_address() [ Upstream commit 0d9c9a238faf925823bde866182c663b6d734f2e ] These functions are called from atomic context: [ 9.150239] BUG: sleeping function called from invalid context at /home/scott/git/linux/mm/slab.h:421 [ 9.158159] in_atomic(): 1, irqs_disabled(): 0, pid: 4432, name: ip [ 9.163128] CPU: 8 PID: 4432 Comm: ip Not tainted 4.20.0-rc2-00169-g63d86876f324 #29 [ 9.163130] Call Trace: [ 9.170701] [c0000002e899a980] [c0000000009c1068] .dump_stack+0xa8/0xec (unreliable) [ 9.177140] [c0000002e899aa10] [c00000000007a7b4] .___might_sleep+0x138/0x164 [ 9.184440] [c0000002e899aa80] [c0000000001d5bac] .kmem_cache_alloc_trace+0x238/0x30c [ 9.191216] [c0000002e899ab40] [c00000000065ea1c] .memac_add_hash_mac_address+0x104/0x198 [ 9.199464] [c0000002e899abd0] [c00000000065a788] .set_multi+0x1c8/0x218 [ 9.206242] [c0000002e899ac80] [c0000000006615ec] .dpaa_set_rx_mode+0xdc/0x17c [ 9.213544] [c0000002e899ad00] [c00000000083d2b0] .__dev_set_rx_mode+0x80/0xd4 [ 9.219535] [c0000002e899ad90] [c00000000083d334] .dev_set_rx_mode+0x30/0x54 [ 9.225271] [c0000002e899ae10] [c00000000083d4a0] .__dev_open+0x148/0x1c8 [ 9.230751] [c0000002e899aeb0] [c00000000083d934] .__dev_change_flags+0x19c/0x1e0 [ 9.230755] [c0000002e899af60] [c00000000083d9a4] .dev_change_flags+0x2c/0x80 [ 9.242752] [c0000002e899aff0] [c0000000008554ec] .do_setlink+0x350/0xf08 [ 9.248228] [c0000002e899b170] [c000000000857ad0] .rtnl_newlink+0x588/0x7e0 [ 9.253965] [c0000002e899b740] [c000000000852424] .rtnetlink_rcv_msg+0x3e0/0x498 [ 9.261440] [c0000002e899b820] [c000000000884790] .netlink_rcv_skb+0x134/0x14c [ 9.267607] [c0000002e899b8e0] [c000000000851840] .rtnetlink_rcv+0x18/0x2c [ 9.274558] [c0000002e899b950] [c000000000883c8c] .netlink_unicast+0x214/0x318 [ 9.281163] [c0000002e899ba00] [c000000000884220] .netlink_sendmsg+0x348/0x444 [ 9.287076] [c0000002e899bae0] [c00000000080d13c] .sock_sendmsg+0x2c/0x54 [ 9.287080] [c0000002e899bb50] [c0000000008106c0] .___sys_sendmsg+0x2d0/0x2d8 [ 9.298375] [c0000002e899bd30] [c000000000811a80] .__sys_sendmsg+0x5c/0xb0 [ 9.303939] [c0000002e899be20] [c0000000000006b0] system_call+0x60/0x6c Signed-off-by: Scott Wood Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fman/fman_memac.c | 2 +- drivers/net/ethernet/freescale/fman/fman_tgec.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index c0296880feba..75ce773c21a6 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -927,7 +927,7 @@ int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr) hash = get_mac_addr_hash_code(addr) & HASH_CTRL_ADDR_MASK; /* Create element to be added to the driver hash table */ - hash_entry = kmalloc(sizeof(*hash_entry), GFP_KERNEL); + hash_entry = kmalloc(sizeof(*hash_entry), GFP_ATOMIC); if (!hash_entry) return -ENOMEM; hash_entry->addr = addr; diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.c b/drivers/net/ethernet/freescale/fman/fman_tgec.c index 4b0f3a50b293..e575259d20f4 100644 --- a/drivers/net/ethernet/freescale/fman/fman_tgec.c +++ b/drivers/net/ethernet/freescale/fman/fman_tgec.c @@ -551,7 +551,7 @@ int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr) hash = (crc >> TGEC_HASH_MCAST_SHIFT) & TGEC_HASH_ADR_MSK; /* Create element to be added to the driver hash table */ - hash_entry = kmalloc(sizeof(*hash_entry), GFP_KERNEL); + hash_entry = kmalloc(sizeof(*hash_entry), GFP_ATOMIC); if (!hash_entry) return -ENOMEM; hash_entry->addr = addr; From d541fb2af57d22220e8823a5307cf4cb06a25f2d Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 31 Dec 2018 16:44:09 +1100 Subject: [PATCH 1800/2608] block/swim3: Fix -EBUSY error when re-opening device after unmount [ Upstream commit 296dcc40f2f2e402facf7cd26cf3f2c8f4b17d47 ] When the block device is opened with FMODE_EXCL, ref_count is set to -1. This value doesn't get reset when the device is closed which means the device cannot be opened again. Fix this by checking for refcount <= 0 in the release method. Reported-and-tested-by: Stan Johnson Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Finn Thain Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/swim3.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 0d7527c6825a..2f7acdb830c3 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1027,7 +1027,11 @@ static void floppy_release(struct gendisk *disk, fmode_t mode) struct swim3 __iomem *sw = fs->swim3; mutex_lock(&swim3_mutex); - if (fs->ref_count > 0 && --fs->ref_count == 0) { + if (fs->ref_count > 0) + --fs->ref_count; + else if (fs->ref_count == -1) + fs->ref_count = 0; + if (fs->ref_count == 0) { swim3_action(fs, MOTOR_OFF); out_8(&sw->control_bic, 0xff); swim3_select(fs, RELAX); From 7bb72b163f989628bffa78bd2424b3c3a21f86c7 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Sun, 21 Oct 2018 23:58:48 +0200 Subject: [PATCH 1801/2608] thermal: bcm2835: enable hwmon explicitly [ Upstream commit d56c19d07e0bc3ceff366a49b7d7a2440c967b1b ] By defaul of-based thermal driver do not enable hwmon. This patch does this explicitly, so that the temperature can be read through the common hwmon sysfs. Signed-off-by: Matthias Brugger Acked-by: Stefan Wahren Signed-off-by: Eduardo Valentin Signed-off-by: Sasha Levin --- drivers/thermal/broadcom/bcm2835_thermal.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/thermal/broadcom/bcm2835_thermal.c b/drivers/thermal/broadcom/bcm2835_thermal.c index 23ad4f9f2143..24b006a95142 100644 --- a/drivers/thermal/broadcom/bcm2835_thermal.c +++ b/drivers/thermal/broadcom/bcm2835_thermal.c @@ -27,6 +27,8 @@ #include #include +#include "../thermal_hwmon.h" + #define BCM2835_TS_TSENSCTL 0x00 #define BCM2835_TS_TSENSSTAT 0x04 @@ -275,6 +277,15 @@ static int bcm2835_thermal_probe(struct platform_device *pdev) platform_set_drvdata(pdev, tz); + /* + * Thermal_zone doesn't enable hwmon as default, + * enable it here + */ + tz->tzp->no_hwmon = false; + err = thermal_add_hwmon_sysfs(tz); + if (err) + goto err_tz; + bcm2835_thermal_debugfs(pdev); return 0; From a67a554d53d627cd12232c15ef2b159808410ccb Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 4 Dec 2018 19:38:28 -0800 Subject: [PATCH 1802/2608] kdb: Don't back trace on a cpu that didn't round up [ Upstream commit 162bc7f5afd75b72acbe3c5f3488ef7e64a3fe36 ] If you have a CPU that fails to round up and then run 'btc' you'll end up crashing in kdb becaue we dereferenced NULL. Let's add a check. It's wise to also set the task to NULL when leaving the debugger so that if we fail to round up on a later entry into the debugger we won't backtrace a stale task. Signed-off-by: Douglas Anderson Acked-by: Daniel Thompson Signed-off-by: Daniel Thompson Signed-off-by: Sasha Levin --- kernel/debug/debug_core.c | 4 ++++ kernel/debug/kdb/kdb_bt.c | 11 ++++++++++- kernel/debug/kdb/kdb_debugger.c | 7 ------- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 65c0f1363788..94aa9ae0007a 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -535,6 +535,8 @@ return_normal: arch_kgdb_ops.correct_hw_break(); if (trace_on) tracing_on(); + kgdb_info[cpu].debuggerinfo = NULL; + kgdb_info[cpu].task = NULL; kgdb_info[cpu].exception_state &= ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE); kgdb_info[cpu].enter_kgdb--; @@ -667,6 +669,8 @@ kgdb_restore: if (trace_on) tracing_on(); + kgdb_info[cpu].debuggerinfo = NULL; + kgdb_info[cpu].task = NULL; kgdb_info[cpu].exception_state &= ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE); kgdb_info[cpu].enter_kgdb--; diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 7921ae4fca8d..7e2379aa0a1e 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -186,7 +186,16 @@ kdb_bt(int argc, const char **argv) kdb_printf("btc: cpu status: "); kdb_parse("cpu\n"); for_each_online_cpu(cpu) { - sprintf(buf, "btt 0x%px\n", KDB_TSK(cpu)); + void *kdb_tsk = KDB_TSK(cpu); + + /* If a CPU failed to round up we could be here */ + if (!kdb_tsk) { + kdb_printf("WARNING: no task for cpu %ld\n", + cpu); + continue; + } + + sprintf(buf, "btt 0x%px\n", kdb_tsk); kdb_parse(buf); touch_nmi_watchdog(); } diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index 15e1a7af5dd0..53a0df6e4d92 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c @@ -118,13 +118,6 @@ int kdb_stub(struct kgdb_state *ks) kdb_bp_remove(); KDB_STATE_CLEAR(DOING_SS); KDB_STATE_SET(PAGER); - /* zero out any offline cpu data */ - for_each_present_cpu(i) { - if (!cpu_online(i)) { - kgdb_info[i].debuggerinfo = NULL; - kgdb_info[i].task = NULL; - } - } if (ks->err_code == DIE_OOPS || reason == KDB_REASON_OOPS) { ks->pass_exception = 1; KDB_FLAG_SET(CATASTROPHIC); From 919fc06bf18b4e26d8d68f249f968b36c2e45dea Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Sun, 23 Dec 2018 23:26:44 -0800 Subject: [PATCH 1803/2608] thermal: generic-adc: Fix adc to temp interpolation [ Upstream commit 9d216211fded20fff301d0317af3238d8383634c ] First correct the edge case to return the last element if we're outside the range, rather than at the last element, so that interpolation is not omitted for points between the two last entries in the table. Then correct the formula to perform linear interpolation based the two points surrounding the read ADC value. The indices for temp are kept as "hi" and "lo" to pair with the adc indices, but there's no requirement that the temperature is provided in descendent order. mult_frac() is used to prevent issues with overflowing the int. Cc: Laxman Dewangan Signed-off-by: Bjorn Andersson Signed-off-by: Eduardo Valentin Signed-off-by: Sasha Levin --- drivers/thermal/thermal-generic-adc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/thermal-generic-adc.c b/drivers/thermal/thermal-generic-adc.c index 73f55d6a1721..ad601e5b4175 100644 --- a/drivers/thermal/thermal-generic-adc.c +++ b/drivers/thermal/thermal-generic-adc.c @@ -26,7 +26,7 @@ struct gadc_thermal_info { static int gadc_thermal_adc_to_temp(struct gadc_thermal_info *gti, int val) { - int temp, adc_hi, adc_lo; + int temp, temp_hi, temp_lo, adc_hi, adc_lo; int i; for (i = 0; i < gti->nlookup_table; i++) { @@ -36,13 +36,17 @@ static int gadc_thermal_adc_to_temp(struct gadc_thermal_info *gti, int val) if (i == 0) { temp = gti->lookup_table[0]; - } else if (i >= (gti->nlookup_table - 1)) { + } else if (i >= gti->nlookup_table) { temp = gti->lookup_table[2 * (gti->nlookup_table - 1)]; } else { adc_hi = gti->lookup_table[2 * i - 1]; adc_lo = gti->lookup_table[2 * i + 1]; - temp = gti->lookup_table[2 * i]; - temp -= ((val - adc_lo) * 1000) / (adc_hi - adc_lo); + + temp_hi = gti->lookup_table[2 * i - 2]; + temp_lo = gti->lookup_table[2 * i]; + + temp = temp_hi + mult_frac(temp_lo - temp_hi, val - adc_hi, + adc_lo - adc_hi); } return temp; From 0c15d464c84891fec9363041c4431b26c510caf9 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Mon, 24 Dec 2018 15:39:14 -0600 Subject: [PATCH 1804/2608] HID: lenovo: Add checks to fix of_led_classdev_register [ Upstream commit 6ae16dfb61bce538d48b7fe98160fada446056c5 ] In lenovo_probe_tpkbd(), the function of_led_classdev_register() could return an error value that is unchecked. The fix adds these checks. Signed-off-by: Aditya Pakki Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-lenovo.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index 643b6eb54442..eacc76d2ab96 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -743,7 +743,9 @@ static int lenovo_probe_tpkbd(struct hid_device *hdev) data_pointer->led_mute.brightness_get = lenovo_led_brightness_get_tpkbd; data_pointer->led_mute.brightness_set = lenovo_led_brightness_set_tpkbd; data_pointer->led_mute.dev = dev; - led_classdev_register(dev, &data_pointer->led_mute); + ret = led_classdev_register(dev, &data_pointer->led_mute); + if (ret < 0) + goto err; data_pointer->led_micmute.name = name_micmute; data_pointer->led_micmute.brightness_get = @@ -751,7 +753,11 @@ static int lenovo_probe_tpkbd(struct hid_device *hdev) data_pointer->led_micmute.brightness_set = lenovo_led_brightness_set_tpkbd; data_pointer->led_micmute.dev = dev; - led_classdev_register(dev, &data_pointer->led_micmute); + ret = led_classdev_register(dev, &data_pointer->led_micmute); + if (ret < 0) { + led_classdev_unregister(&data_pointer->led_mute); + goto err; + } lenovo_features_set_tpkbd(hdev); From 53015f1e04aceb34380118085581b0bc83a5bf22 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 3 Jan 2019 15:26:31 -0800 Subject: [PATCH 1805/2608] kernel/hung_task.c: break RCU locks based on jiffies [ Upstream commit 304ae42739b108305f8d7b3eb3c1aec7c2b643a9 ] check_hung_uninterruptible_tasks() is currently calling rcu_lock_break() for every 1024 threads. But check_hung_task() is very slow if printk() was called, and is very fast otherwise. If many threads within some 1024 threads called printk(), the RCU grace period might be extended enough to trigger RCU stall warnings. Therefore, calling rcu_lock_break() for every some fixed jiffies will be safer. Link: http://lkml.kernel.org/r/1544800658-11423-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Acked-by: Paul E. McKenney Cc: Petr Mladek Cc: Sergey Senozhatsky Cc: Dmitry Vyukov Cc: "Rafael J. Wysocki" Cc: Vitaly Kuznetsov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- kernel/hung_task.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 32b479468e4d..0cec1241e26f 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -33,7 +33,7 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; * is disabled during the critical section. It also controls the size of * the RCU grace period. So it needs to be upper-bound. */ -#define HUNG_TASK_BATCHING 1024 +#define HUNG_TASK_LOCK_BREAK (HZ / 10) /* * Zero means infinite timeout - no checking done: @@ -164,7 +164,7 @@ static bool rcu_lock_break(struct task_struct *g, struct task_struct *t) static void check_hung_uninterruptible_tasks(unsigned long timeout) { int max_count = sysctl_hung_task_check_count; - int batch_count = HUNG_TASK_BATCHING; + unsigned long last_break = jiffies; struct task_struct *g, *t; /* @@ -179,10 +179,10 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) for_each_process_thread(g, t) { if (!max_count--) goto unlock; - if (!--batch_count) { - batch_count = HUNG_TASK_BATCHING; + if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) { if (!rcu_lock_break(g, t)) goto unlock; + last_break = jiffies; } /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ if (t->state == TASK_UNINTERRUPTIBLE) From ff9c3ae8c88265322d986d7917ca14fb3f89f227 Mon Sep 17 00:00:00 2001 From: Cheng Lin Date: Thu, 3 Jan 2019 15:26:13 -0800 Subject: [PATCH 1806/2608] proc/sysctl: fix return error for proc_doulongvec_minmax() [ Upstream commit 09be178400829dddc1189b50a7888495dd26aa84 ] If the number of input parameters is less than the total parameters, an EINVAL error will be returned. For example, we use proc_doulongvec_minmax to pass up to two parameters with kern_table: { .procname = "monitor_signals", .data = &monitor_sigs, .maxlen = 2*sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, Reproduce: When passing two parameters, it's work normal. But passing only one parameter, an error "Invalid argument"(EINVAL) is returned. [root@cl150 ~]# echo 1 2 > /proc/sys/kernel/monitor_signals [root@cl150 ~]# cat /proc/sys/kernel/monitor_signals 1 2 [root@cl150 ~]# echo 3 > /proc/sys/kernel/monitor_signals -bash: echo: write error: Invalid argument [root@cl150 ~]# echo $? 1 [root@cl150 ~]# cat /proc/sys/kernel/monitor_signals 3 2 [root@cl150 ~]# The following is the result after apply this patch. No error is returned when the number of input parameters is less than the total parameters. [root@cl150 ~]# echo 1 2 > /proc/sys/kernel/monitor_signals [root@cl150 ~]# cat /proc/sys/kernel/monitor_signals 1 2 [root@cl150 ~]# echo 3 > /proc/sys/kernel/monitor_signals [root@cl150 ~]# echo $? 0 [root@cl150 ~]# cat /proc/sys/kernel/monitor_signals 3 2 [root@cl150 ~]# There are three processing functions dealing with digital parameters, __do_proc_dointvec/__do_proc_douintvec/__do_proc_doulongvec_minmax. This patch deals with __do_proc_doulongvec_minmax, just as __do_proc_dointvec does, adding a check for parameters 'left'. In __do_proc_douintvec, its code implementation explicitly does not support multiple inputs. static int __do_proc_douintvec(...){ ... /* * Arrays are not supported, keep this simple. *Do not* add * support for them. */ if (vleft != 1) { *lenp = 0; return -EINVAL; } ... } So, just __do_proc_doulongvec_minmax has the problem. And most use of proc_doulongvec_minmax/proc_doulongvec_ms_jiffies_minmax just have one parameter. Link: http://lkml.kernel.org/r/1544081775-15720-1-git-send-email-cheng.lin130@zte.com.cn Signed-off-by: Cheng Lin Acked-by: Luis Chamberlain Reviewed-by: Kees Cook Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- kernel/sysctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d330b1ce3b94..3ad00bf90b3d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2708,6 +2708,8 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int bool neg; left -= proc_skip_spaces(&p); + if (!left) + break; err = proc_get_long(&p, &left, &val, &neg, proc_wspace_sep, From 31a38a0c02359ca773d3c86756fd5324826fa866 Mon Sep 17 00:00:00 2001 From: "Liu, Chuansheng" Date: Thu, 3 Jan 2019 15:26:27 -0800 Subject: [PATCH 1807/2608] kernel/hung_task.c: force console verbose before panic [ Upstream commit 168e06f7937d96c7222037d8a05565e8a6eb00fe ] Based on commit 401c636a0eeb ("kernel/hung_task.c: show all hung tasks before panic"), we could get the call stack of hung task. However, if the console loglevel is not high, we still can not see the useful panic information in practice, and in most cases users don't set console loglevel to high level. This patch is to force console verbose before system panic, so that the real useful information can be seen in the console, instead of being like the following, which doesn't have hung task information. INFO: task init:1 blocked for more than 120 seconds. Tainted: G U W 4.19.0-quilt-2e5dc0ac-g51b6c21d76cc #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. Kernel panic - not syncing: hung_task: blocked tasks CPU: 2 PID: 479 Comm: khungtaskd Tainted: G U W 4.19.0-quilt-2e5dc0ac-g51b6c21d76cc #1 Call Trace: dump_stack+0x4f/0x65 panic+0xde/0x231 watchdog+0x290/0x410 kthread+0x12c/0x150 ret_from_fork+0x35/0x40 reboot: panic mode set: p,w Kernel Offset: 0x34000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) Link: http://lkml.kernel.org/r/27240C0AC20F114CBF8149A2696CBE4A6015B675@SHSMSX101.ccr.corp.intel.com Signed-off-by: Chuansheng Liu Reviewed-by: Petr Mladek Reviewed-by: Sergey Senozhatsky Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- kernel/hung_task.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 0cec1241e26f..f9aaf4994062 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -103,8 +103,11 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) trace_sched_process_hang(t); - if (!sysctl_hung_task_warnings && !sysctl_hung_task_panic) - return; + if (sysctl_hung_task_panic) { + console_verbose(); + hung_task_show_lock = true; + hung_task_call_panic = true; + } /* * Ok, the task did not get scheduled for more than 2 minutes, @@ -126,11 +129,6 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) } touch_nmi_watchdog(); - - if (sysctl_hung_task_panic) { - hung_task_show_lock = true; - hung_task_call_panic = true; - } } /* From fad3ec7ce4fc7c2b862014357be2dd660e5a5d3d Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 3 Jan 2019 15:27:09 -0800 Subject: [PATCH 1808/2608] fs/epoll: drop ovflist branch prediction [ Upstream commit 76699a67f3041ff4c7af6d6ee9be2bfbf1ffb671 ] The ep->ovflist is a secondary ready-list to temporarily store events that might occur when doing sproc without holding the ep->wq.lock. This accounts for every time we check for ready events and also send events back to userspace; both callbacks, particularly the latter because of copy_to_user, can account for a non-trivial time. As such, the unlikely() check to see if the pointer is being used, seems both misleading and sub-optimal. In fact, we go to an awful lot of trouble to sync both lists, and populating the ovflist is far from an uncommon scenario. For example, profiling a concurrent epoll_wait(2) benchmark, with CONFIG_PROFILE_ANNOTATED_BRANCHES shows that for a two threads a 33% incorrect rate was seen; and when incrementally increasing the number of epoll instances (which is used, for example for multiple queuing load balancing models), up to a 90% incorrect rate was seen. Similarly, by deleting the prediction, 3% throughput boost was seen across incremental threads. Link: http://lkml.kernel.org/r/20181108051006.18751-4-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Reviewed-by: Andrew Morton Cc: Al Viro Cc: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/eventpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 2fabd19cdeea..c291bf61afb9 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1167,7 +1167,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v * semantics). All the events that happen during that period of time are * chained in ep->ovflist and requeued later on. */ - if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) { + if (ep->ovflist != EP_UNACTIVE_PTR) { if (epi->next == EP_UNACTIVE_PTR) { epi->next = ep->ovflist; ep->ovflist = epi; From 56ade33b1bf76c243cd0da96701b06e2c42d5a45 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 3 Jan 2019 15:28:07 -0800 Subject: [PATCH 1809/2608] exec: load_script: don't blindly truncate shebang string [ Upstream commit 8099b047ecc431518b9bb6bdbba3549bbecdc343 ] load_script() simply truncates bprm->buf and this is very wrong if the length of shebang string exceeds BINPRM_BUF_SIZE-2. This can silently truncate i_arg or (worse) we can execute the wrong binary if buf[2:126] happens to be the valid executable path. Change load_script() to return ENOEXEC if it can't find '\n' or zero in bprm->buf. Note that '\0' can come from either prepare_binprm()->memset() or from kernel_read(), we do not care. Link: http://lkml.kernel.org/r/20181112160931.GA28463@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Acked-by: Michal Hocko Cc: Ben Woodard Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/binfmt_script.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 7cde3f46ad26..d0078cbb718b 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -42,10 +42,14 @@ static int load_script(struct linux_binprm *bprm) fput(bprm->file); bprm->file = NULL; - bprm->buf[BINPRM_BUF_SIZE - 1] = '\0'; - if ((cp = strchr(bprm->buf, '\n')) == NULL) - cp = bprm->buf+BINPRM_BUF_SIZE-1; + for (cp = bprm->buf+2;; cp++) { + if (cp >= bprm->buf + BINPRM_BUF_SIZE) + return -ENOEXEC; + if (!*cp || (*cp == '\n')) + break; + } *cp = '\0'; + while (cp > bprm->buf) { cp--; if ((*cp == ' ') || (*cp == '\t')) From 179c71c5a21d3d614a58da600882984b77eaaa32 Mon Sep 17 00:00:00 2001 From: Du Changbin Date: Thu, 3 Jan 2019 15:28:27 -0800 Subject: [PATCH 1810/2608] scripts/gdb: fix lx-version string output [ Upstream commit b058809bfc8faeb7b7cae047666e23375a060059 ] A bug is present in GDB which causes early string termination when parsing variables. This has been reported [0], but we should ensure that we can support at least basic printing of the core kernel strings. For current gdb version (has been tested with 7.3 and 8.1), 'lx-version' only prints one character. (gdb) lx-version L(gdb) This can be fixed by casting 'linux_banner' as (char *). (gdb) lx-version Linux version 4.19.0-rc1+ (changbin@acer) (gcc version 7.3.0 (Ubuntu 7.3.0-16ubuntu3)) #21 SMP Sat Sep 1 21:43:30 CST 2018 [0] https://sourceware.org/bugzilla/show_bug.cgi?id=20077 [kbingham@kernel.org: add detail to commit message] Link: http://lkml.kernel.org/r/20181111162035.8356-1-kieran.bingham@ideasonboard.com Fixes: 2d061d999424 ("scripts/gdb: add version command") Signed-off-by: Du Changbin Signed-off-by: Kieran Bingham Acked-by: Jan Kiszka Cc: Jan Kiszka Cc: Jason Wessel Cc: Daniel Thompson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- scripts/gdb/linux/proc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gdb/linux/proc.py b/scripts/gdb/linux/proc.py index 086d27223c0c..0aebd7565b03 100644 --- a/scripts/gdb/linux/proc.py +++ b/scripts/gdb/linux/proc.py @@ -41,7 +41,7 @@ class LxVersion(gdb.Command): def invoke(self, arg, from_tty): # linux_banner should contain a newline - gdb.write(gdb.parse_and_eval("linux_banner").string()) + gdb.write(gdb.parse_and_eval("(char *)linux_banner").string()) LxVersion() From 304df171cc6610f120acb5b2cba5a9c179fbcfd0 Mon Sep 17 00:00:00 2001 From: Eduardo Valentin Date: Wed, 2 Jan 2019 00:34:03 +0000 Subject: [PATCH 1811/2608] thermal: hwmon: inline helpers when CONFIG_THERMAL_HWMON is not set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 03334ba8b425b2ad275c8f390cf83c7b081c3095 upstream. Avoid warnings like this: thermal_hwmon.h:29:1: warning: ‘thermal_remove_hwmon_sysfs’ defined but not used [-Wunused-function] thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) Fixes: 0dd88793aacd ("thermal: hwmon: move hwmon support to single file") Reviewed-by: Geert Uytterhoeven Signed-off-by: Eduardo Valentin Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/thermal_hwmon.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/thermal_hwmon.h b/drivers/thermal/thermal_hwmon.h index c798fdb2ae43..f97f76691bd0 100644 --- a/drivers/thermal/thermal_hwmon.h +++ b/drivers/thermal/thermal_hwmon.h @@ -34,13 +34,13 @@ int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz); void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz); #else -static int +static inline int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) { return 0; } -static void +static inline void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) { } From 9b4631e508728e1e7064f31557e25b433cc97f4a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Jan 2019 11:39:41 -0800 Subject: [PATCH 1812/2608] dccp: fool proof ccid_hc_[rt]x_parse_options() [ Upstream commit 9b1f19d810e92d6cdc68455fbc22d9f961a58ce1 ] Similarly to commit 276bdb82dedb ("dccp: check ccid before dereferencing") it is wise to test for a NULL ccid. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 5.0.0-rc3+ #37 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:ccid_hc_tx_parse_options net/dccp/ccid.h:205 [inline] RIP: 0010:dccp_parse_options+0x8d9/0x12b0 net/dccp/options.c:233 Code: c5 0f b6 75 b3 80 38 00 0f 85 d6 08 00 00 48 b9 00 00 00 00 00 fc ff df 48 8b 45 b8 4c 8b b8 f8 07 00 00 4c 89 f8 48 c1 e8 03 <80> 3c 08 00 0f 85 95 08 00 00 48 b8 00 00 00 00 00 fc ff df 4d 8b kobject: 'loop5' (0000000080f78fc1): kobject_uevent_env RSP: 0018:ffff8880a94df0b8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8880858ac723 RCX: dffffc0000000000 RDX: 0000000000000100 RSI: 0000000000000007 RDI: 0000000000000001 RBP: ffff8880a94df140 R08: 0000000000000001 R09: ffff888061b83a80 R10: ffffed100c370752 R11: ffff888061b83a97 R12: 0000000000000026 R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8880ae700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f0defa33518 CR3: 000000008db5e000 CR4: 00000000001406e0 kobject: 'loop5' (0000000080f78fc1): fill_kobj_path: path = '/devices/virtual/block/loop5' DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: dccp_rcv_state_process+0x2b6/0x1af6 net/dccp/input.c:654 dccp_v4_do_rcv+0x100/0x190 net/dccp/ipv4.c:688 sk_backlog_rcv include/net/sock.h:936 [inline] __sk_receive_skb+0x3a9/0xea0 net/core/sock.c:473 dccp_v4_rcv+0x10cb/0x1f80 net/dccp/ipv4.c:880 ip_protocol_deliver_rcu+0xb6/0xa20 net/ipv4/ip_input.c:208 ip_local_deliver_finish+0x23b/0x390 net/ipv4/ip_input.c:234 NF_HOOK include/linux/netfilter.h:289 [inline] NF_HOOK include/linux/netfilter.h:283 [inline] ip_local_deliver+0x1f0/0x740 net/ipv4/ip_input.c:255 dst_input include/net/dst.h:450 [inline] ip_rcv_finish+0x1f4/0x2f0 net/ipv4/ip_input.c:414 NF_HOOK include/linux/netfilter.h:289 [inline] NF_HOOK include/linux/netfilter.h:283 [inline] ip_rcv+0xed/0x620 net/ipv4/ip_input.c:524 __netif_receive_skb_one_core+0x160/0x210 net/core/dev.c:4973 __netif_receive_skb+0x2c/0x1c0 net/core/dev.c:5083 process_backlog+0x206/0x750 net/core/dev.c:5923 napi_poll net/core/dev.c:6346 [inline] net_rx_action+0x76d/0x1930 net/core/dev.c:6412 __do_softirq+0x30b/0xb11 kernel/softirq.c:292 run_ksoftirqd kernel/softirq.c:654 [inline] run_ksoftirqd+0x8e/0x110 kernel/softirq.c:646 smpboot_thread_fn+0x6ab/0xa10 kernel/smpboot.c:164 kthread+0x357/0x430 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 Modules linked in: ---[ end trace 58a0ba03bea2c376 ]--- RIP: 0010:ccid_hc_tx_parse_options net/dccp/ccid.h:205 [inline] RIP: 0010:dccp_parse_options+0x8d9/0x12b0 net/dccp/options.c:233 Code: c5 0f b6 75 b3 80 38 00 0f 85 d6 08 00 00 48 b9 00 00 00 00 00 fc ff df 48 8b 45 b8 4c 8b b8 f8 07 00 00 4c 89 f8 48 c1 e8 03 <80> 3c 08 00 0f 85 95 08 00 00 48 b8 00 00 00 00 00 fc ff df 4d 8b RSP: 0018:ffff8880a94df0b8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8880858ac723 RCX: dffffc0000000000 RDX: 0000000000000100 RSI: 0000000000000007 RDI: 0000000000000001 RBP: ffff8880a94df140 R08: 0000000000000001 R09: ffff888061b83a80 R10: ffffed100c370752 R11: ffff888061b83a97 R12: 0000000000000026 R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8880ae700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f0defa33518 CR3: 0000000009871000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Gerrit Renker Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ccid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 6eb837a47b5c..baaaeb2b2c42 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -202,7 +202,7 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { - if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL) + if (!ccid || !ccid->ccid_ops->ccid_hc_tx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len); } @@ -214,7 +214,7 @@ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { - if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL) + if (!ccid || !ccid->ccid_ops->ccid_hc_rx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len); } From 25805f7e25a3a1f22b39e811a06582e2143054be Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Wed, 30 Jan 2019 06:59:00 -0800 Subject: [PATCH 1813/2608] enic: fix checksum validation for IPv6 [ Upstream commit 7596175e99b3d4bce28022193efd954c201a782a ] In case of IPv6 pkts, ipv4_csum_ok is 0. Because of this, driver does not set skb->ip_summed. So IPv6 rx checksum is not offloaded. Signed-off-by: Govindarajulu Varadarajan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cisco/enic/enic_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 03f4fee1bbc9..ced348e15a63 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1393,7 +1393,8 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, * csum is correct or is zero. */ if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc && - tcp_udp_csum_ok && ipv4_csum_ok && outer_csum_ok) { + tcp_udp_csum_ok && outer_csum_ok && + (ipv4_csum_ok || ipv6)) { skb->ip_summed = CHECKSUM_UNNECESSARY; skb->csum_level = encap; } From aef376197f6a1f01b84130523d03a49bfb340bb8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 4 Feb 2019 11:20:29 +0100 Subject: [PATCH 1814/2608] net: dp83640: expire old TX-skb [ Upstream commit 53bc8d2af08654659abfadfd3e98eb9922ff787c ] During sendmsg() a cloned skb is saved via dp83640_txtstamp() in ->tx_queue. After the NIC sends this packet, the PHY will reply with a timestamp for that TX packet. If the cable is pulled at the right time I don't see that packet. It might gets flushed as part of queue shutdown on NIC's side. Once the link is up again then after the next sendmsg() we enqueue another skb in dp83640_txtstamp() and have two on the list. Then the PHY will send a reply and decode_txts() attaches it to the first skb on the list. No crash occurs since refcounting works but we are one packet behind. linuxptp/ptp4l usually closes the socket and opens a new one (in such a timeout case) so those "stale" replies never get there. However it does not resume normal operation anymore. Purge old skbs in decode_txts(). Fixes: cb646e2b02b2 ("ptp: Added a clock driver for the National Semiconductor PHYTER.") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Kurt Kanzenbach Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/dp83640.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 26fbbd3ffe33..afebdc2f0b94 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -893,14 +893,14 @@ static void decode_txts(struct dp83640_private *dp83640, struct phy_txts *phy_txts) { struct skb_shared_hwtstamps shhwtstamps; + struct dp83640_skb_info *skb_info; struct sk_buff *skb; - u64 ns; u8 overflow; + u64 ns; /* We must already have the skb that triggered this. */ - +again: skb = skb_dequeue(&dp83640->tx_queue); - if (!skb) { pr_debug("have timestamp but tx_queue empty\n"); return; @@ -915,6 +915,11 @@ static void decode_txts(struct dp83640_private *dp83640, } return; } + skb_info = (struct dp83640_skb_info *)skb->cb; + if (time_after(jiffies, skb_info->tmo)) { + kfree_skb(skb); + goto again; + } ns = phy2txts(phy_txts); memset(&shhwtstamps, 0, sizeof(shhwtstamps)); @@ -1466,6 +1471,7 @@ static bool dp83640_rxtstamp(struct phy_device *phydev, static void dp83640_txtstamp(struct phy_device *phydev, struct sk_buff *skb, int type) { + struct dp83640_skb_info *skb_info = (struct dp83640_skb_info *)skb->cb; struct dp83640_private *dp83640 = phydev->priv; switch (dp83640->hwts_tx_en) { @@ -1478,6 +1484,7 @@ static void dp83640_txtstamp(struct phy_device *phydev, /* fall through */ case HWTSTAMP_TX_ON: skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + skb_info->tmo = jiffies + SKB_TIMESTAMP_TIMEOUT; skb_queue_tail(&dp83640->tx_queue, skb); break; From 7763f0cce072cef87ac68acc5ce28f46bfe15cf5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Feb 2019 08:36:06 -0800 Subject: [PATCH 1815/2608] rxrpc: bad unlock balance in rxrpc_recvmsg [ Upstream commit 6dce3c20ac429e7a651d728e375853370c796e8d ] When either "goto wait_interrupted;" or "goto wait_error;" paths are taken, socket lock has already been released. This patch fixes following syzbot splat : WARNING: bad unlock balance detected! 5.0.0-rc4+ #59 Not tainted ------------------------------------- syz-executor223/8256 is trying to release lock (sk_lock-AF_RXRPC) at: [] rxrpc_recvmsg+0x6d3/0x3099 net/rxrpc/recvmsg.c:598 but there are no more locks to release! other info that might help us debug this: 1 lock held by syz-executor223/8256: #0: 00000000fa9ed0f4 (slock-AF_RXRPC){+...}, at: spin_lock_bh include/linux/spinlock.h:334 [inline] #0: 00000000fa9ed0f4 (slock-AF_RXRPC){+...}, at: release_sock+0x20/0x1c0 net/core/sock.c:2798 stack backtrace: CPU: 1 PID: 8256 Comm: syz-executor223 Not tainted 5.0.0-rc4+ #59 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_unlock_imbalance_bug kernel/locking/lockdep.c:3391 [inline] print_unlock_imbalance_bug.cold+0x114/0x123 kernel/locking/lockdep.c:3368 __lock_release kernel/locking/lockdep.c:3601 [inline] lock_release+0x67e/0xa00 kernel/locking/lockdep.c:3860 sock_release_ownership include/net/sock.h:1471 [inline] release_sock+0x183/0x1c0 net/core/sock.c:2808 rxrpc_recvmsg+0x6d3/0x3099 net/rxrpc/recvmsg.c:598 sock_recvmsg_nosec net/socket.c:794 [inline] sock_recvmsg net/socket.c:801 [inline] sock_recvmsg+0xd0/0x110 net/socket.c:797 __sys_recvfrom+0x1ff/0x350 net/socket.c:1845 __do_sys_recvfrom net/socket.c:1863 [inline] __se_sys_recvfrom net/socket.c:1859 [inline] __x64_sys_recvfrom+0xe1/0x1a0 net/socket.c:1859 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x446379 Code: e8 2c b3 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b 09 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fe5da89fd98 EFLAGS: 00000246 ORIG_RAX: 000000000000002d RAX: ffffffffffffffda RBX: 00000000006dbc28 RCX: 0000000000446379 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00000000006dbc20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dbc2c R13: 0000000000000000 R14: 0000000000000000 R15: 20c49ba5e353f7cf Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: Eric Dumazet Cc: David Howells Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/recvmsg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index abcf48026d99..b74cde2fd214 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -588,6 +588,7 @@ error_requeue_call: } error_no_call: release_sock(&rx->sk); +error_trace: trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); return ret; @@ -596,7 +597,7 @@ wait_interrupted: wait_error: finish_wait(sk_sleep(&rx->sk), &wait); call = NULL; - goto error_no_call; + goto error_trace; } /** From c9680f2a0390997959afb1c8f450b7a91e08b266 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 1 Feb 2019 11:28:16 +0300 Subject: [PATCH 1816/2608] skge: potential memory corruption in skge_get_regs() [ Upstream commit 294c149a209c6196c2de85f512b52ef50f519949 ] The "p" buffer is 0x4000 bytes long. B3_RI_WTO_R1 is 0x190. The value of "regs->len" is in the 1-0x4000 range. The bug here is that "regs->len - B3_RI_WTO_R1" can be a negative value which would lead to memory corruption and an abrupt crash. Fixes: c3f8be961808 ("[PATCH] skge: expand ethtool debug register dump") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/skge.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index eef35bf3e849..5d00be3aac73 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -152,8 +152,10 @@ static void skge_get_regs(struct net_device *dev, struct ethtool_regs *regs, memset(p, 0, regs->len); memcpy_fromio(p, io, B3_RAM_ADDR); - memcpy_fromio(p + B3_RI_WTO_R1, io + B3_RI_WTO_R1, - regs->len - B3_RI_WTO_R1); + if (regs->len > B3_RI_WTO_R1) { + memcpy_fromio(p + B3_RI_WTO_R1, io + B3_RI_WTO_R1, + regs->len - B3_RI_WTO_R1); + } } /* Wake on Lan only supported on Yukon chips with rev 1 or above */ From b28b106b1fda3e05986e4d945cac0c6f24c5e85a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 31 Jan 2019 08:47:10 -0800 Subject: [PATCH 1817/2608] rds: fix refcount bug in rds_sock_addref [ Upstream commit 6fa19f5637a6c22bc0999596bcc83bdcac8a4fa6 ] syzbot was able to catch a bug in rds [1] The issue here is that the socket might be found in a hash table but that its refcount has already be set to 0 by another cpu. We need to use refcount_inc_not_zero() to be safe here. [1] refcount_t: increment on 0; use-after-free. WARNING: CPU: 1 PID: 23129 at lib/refcount.c:153 refcount_inc_checked lib/refcount.c:153 [inline] WARNING: CPU: 1 PID: 23129 at lib/refcount.c:153 refcount_inc_checked+0x61/0x70 lib/refcount.c:151 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 23129 Comm: syz-executor3 Not tainted 5.0.0-rc4+ #53 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1db/0x2d0 lib/dump_stack.c:113 panic+0x2cb/0x65c kernel/panic.c:214 __warn.cold+0x20/0x48 kernel/panic.c:571 report_bug+0x263/0x2b0 lib/bug.c:186 fixup_bug arch/x86/kernel/traps.c:178 [inline] fixup_bug arch/x86/kernel/traps.c:173 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:271 do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:290 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:973 RIP: 0010:refcount_inc_checked lib/refcount.c:153 [inline] RIP: 0010:refcount_inc_checked+0x61/0x70 lib/refcount.c:151 Code: 1d 51 63 c8 06 31 ff 89 de e8 eb 1b f2 fd 84 db 75 dd e8 a2 1a f2 fd 48 c7 c7 60 9f 81 88 c6 05 31 63 c8 06 01 e8 af 65 bb fd <0f> 0b eb c1 90 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 41 54 49 RSP: 0018:ffff8880a0cbf1e8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffc90006113000 RDX: 000000000001047d RSI: ffffffff81685776 RDI: 0000000000000005 RBP: ffff8880a0cbf1f8 R08: ffff888097c9e100 R09: ffffed1015ce5021 R10: ffffed1015ce5020 R11: ffff8880ae728107 R12: ffff8880723c20c0 R13: ffff8880723c24b0 R14: dffffc0000000000 R15: ffffed1014197e64 sock_hold include/net/sock.h:647 [inline] rds_sock_addref+0x19/0x20 net/rds/af_rds.c:675 rds_find_bound+0x97c/0x1080 net/rds/bind.c:82 rds_recv_incoming+0x3be/0x1430 net/rds/recv.c:362 rds_loop_xmit+0xf3/0x2a0 net/rds/loop.c:96 rds_send_xmit+0x1355/0x2a10 net/rds/send.c:355 rds_sendmsg+0x323c/0x44e0 net/rds/send.c:1368 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg+0xdd/0x130 net/socket.c:631 __sys_sendto+0x387/0x5f0 net/socket.c:1788 __do_sys_sendto net/socket.c:1800 [inline] __se_sys_sendto net/socket.c:1796 [inline] __x64_sys_sendto+0xe1/0x1a0 net/socket.c:1796 do_syscall_64+0x1a3/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x458089 Code: 6d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 3b b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fc266df8c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 0000000000458089 RDX: 0000000000000000 RSI: 00000000204b3fff RDI: 0000000000000005 RBP: 000000000073bf00 R08: 00000000202b4000 R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc266df96d4 R13: 00000000004c56e4 R14: 00000000004d94a8 R15: 00000000ffffffff Fixes: cc4dfb7f70a3 ("rds: fix two RCU related problems") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Sowmini Varadhan Cc: Santosh Shilimkar Cc: rds-devel@oss.oracle.com Cc: Cong Wang Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/bind.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/rds/bind.c b/net/rds/bind.c index 48257d3a4201..4f1427c3452d 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -62,10 +62,10 @@ struct rds_sock *rds_find_bound(__be32 addr, __be16 port) rcu_read_lock(); rs = rhashtable_lookup(&bind_hash_table, &key, ht_parms); - if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) - rds_sock_addref(rs); - else + if (rs && (sock_flag(rds_rs_to_sk(rs), SOCK_DEAD) || + !refcount_inc_not_zero(&rds_rs_to_sk(rs)->sk_refcnt))) rs = NULL; + rcu_read_unlock(); rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, From 578c6d28fd781b8bcea85c01a1b6569176f09e71 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 1 Feb 2019 13:23:38 -0800 Subject: [PATCH 1818/2608] net: systemport: Fix WoL with password after deep sleep [ Upstream commit 8dfb8d2cceb76b74ad5b58cc65c75994329b4d5e ] Broadcom STB chips support a deep sleep mode where all register contents are lost. Because we were stashing the MagicPacket password into some of these registers a suspend into that deep sleep then a resumption would not lead to being able to wake-up from MagicPacket with password again. Fix this by keeping a software copy of the password and program it during suspend. Fixes: 83e82f4c706b ("net: systemport: add Wake-on-LAN support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bcmsysport.c | 25 +++++++++------------- drivers/net/ethernet/broadcom/bcmsysport.h | 2 ++ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 45462557e51c..ed3edb17fd09 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -519,7 +519,6 @@ static void bcm_sysport_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct bcm_sysport_priv *priv = netdev_priv(dev); - u32 reg; wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE; wol->wolopts = priv->wolopts; @@ -527,11 +526,7 @@ static void bcm_sysport_get_wol(struct net_device *dev, if (!(priv->wolopts & WAKE_MAGICSECURE)) return; - /* Return the programmed SecureOn password */ - reg = umac_readl(priv, UMAC_PSW_MS); - put_unaligned_be16(reg, &wol->sopass[0]); - reg = umac_readl(priv, UMAC_PSW_LS); - put_unaligned_be32(reg, &wol->sopass[2]); + memcpy(wol->sopass, priv->sopass, sizeof(priv->sopass)); } static int bcm_sysport_set_wol(struct net_device *dev, @@ -547,13 +542,8 @@ static int bcm_sysport_set_wol(struct net_device *dev, if (wol->wolopts & ~supported) return -EINVAL; - /* Program the SecureOn password */ - if (wol->wolopts & WAKE_MAGICSECURE) { - umac_writel(priv, get_unaligned_be16(&wol->sopass[0]), - UMAC_PSW_MS); - umac_writel(priv, get_unaligned_be32(&wol->sopass[2]), - UMAC_PSW_LS); - } + if (wol->wolopts & WAKE_MAGICSECURE) + memcpy(priv->sopass, wol->sopass, sizeof(priv->sopass)); /* Flag the device and relevant IRQ as wakeup capable */ if (wol->wolopts) { @@ -2221,12 +2211,17 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv) unsigned int timeout = 1000; u32 reg; - /* Password has already been programmed */ reg = umac_readl(priv, UMAC_MPD_CTRL); reg |= MPD_EN; reg &= ~PSW_EN; - if (priv->wolopts & WAKE_MAGICSECURE) + if (priv->wolopts & WAKE_MAGICSECURE) { + /* Program the SecureOn password */ + umac_writel(priv, get_unaligned_be16(&priv->sopass[0]), + UMAC_PSW_MS); + umac_writel(priv, get_unaligned_be32(&priv->sopass[2]), + UMAC_PSW_LS); reg |= PSW_EN; + } umac_writel(priv, reg, UMAC_MPD_CTRL); /* Make sure RBUF entered WoL mode as result */ diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 86ae751ccb5c..3df4a48b8eac 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -11,6 +11,7 @@ #ifndef __BCM_SYSPORT_H #define __BCM_SYSPORT_H +#include #include /* Receive/transmit descriptor format */ @@ -754,6 +755,7 @@ struct bcm_sysport_priv { unsigned int crc_fwd:1; u16 rev; u32 wolopts; + u8 sopass[SOPASS_MAX]; unsigned int wol_irq_disabled:1; /* MIB related fields */ From cc4ac4602a4e4c42c3b3eed5e11daa8bf7019db9 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 3 Dec 2018 22:14:04 -0800 Subject: [PATCH 1819/2608] net/mlx5e: Force CHECKSUM_UNNECESSARY for short ethernet frames [ Upstream commit e8c8b53ccaff568fef4c13a6ccaf08bf241aa01a ] When an ethernet frame is padded to meet the minimum ethernet frame size, the padding octets are not covered by the hardware checksum. Fortunately the padding octets are usually zero's, which don't affect checksum. However, we have a switch which pads non-zero octets, this causes kernel hardware checksum fault repeatedly. Prior to: commit '88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE ...")' skb checksum was forced to be CHECKSUM_NONE when padding is detected. After it, we need to keep skb->csum updated, like what we do for RXFCS. However, fixing up CHECKSUM_COMPLETE requires to verify and parse IP headers, it is not worthy the effort as the packets are so small that CHECKSUM_COMPLETE can't save anything. Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends"), Cc: Eric Dumazet Cc: Tariq Toukan Cc: Nikola Ciprich Signed-off-by: Cong Wang Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 8b7b52c7512e..eec7c2ef067a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -646,6 +646,8 @@ static u32 mlx5e_get_fcs(const struct sk_buff *skb) return __get_unaligned_cpu32(fcs_bytes); } +#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) + static inline void mlx5e_handle_csum(struct net_device *netdev, struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, @@ -661,6 +663,17 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; } + /* CQE csum doesn't cover padding octets in short ethernet + * frames. And the pad field is appended prior to calculating + * and appending the FCS field. + * + * Detecting these padded frames requires to verify and parse + * IP headers, so we simply force all those small frames to be + * CHECKSUM_UNNECESSARY even if they are not padded. + */ + if (short_frame(skb->len)) + goto csum_unnecessary; + if (is_first_ethertype_ip(skb)) { skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); @@ -672,6 +685,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; } +csum_unnecessary: if (likely((cqe->hds_ip_ext & CQE_L3_OK) && (cqe->hds_ip_ext & CQE_L4_OK))) { skb->ip_summed = CHECKSUM_UNNECESSARY; From 6aab49c5c7ac19350434e2e1144d7822e2adc8b8 Mon Sep 17 00:00:00 2001 From: Rundong Ge Date: Sat, 2 Feb 2019 14:29:35 +0000 Subject: [PATCH 1820/2608] net: dsa: slave: Don't propagate flag changes on down slave interfaces [ Upstream commit 17ab4f61b8cd6f9c38e9d0b935d86d73b5d0d2b5 ] The unbalance of master's promiscuity or allmulti will happen after ifdown and ifup a slave interface which is in a bridge. When we ifdown a slave interface , both the 'dsa_slave_close' and 'dsa_slave_change_rx_flags' will clear the master's flags. The flags of master will be decrease twice. In the other hand, if we ifup the slave interface again, since the slave's flags were cleared the 'dsa_slave_open' won't set the master's flag, only 'dsa_slave_change_rx_flags' that triggered by 'br_add_if' will set the master's flags. The flags of master is increase once. Only propagating flag changes when a slave interface is up makes sure this does not happen. The 'vlan_dev_change_rx_flags' had the same problem and was fixed, and changes here follows that fix. Fixes: 91da11f870f0 ("net: Distributed Switch Architecture protocol support") Signed-off-by: Rundong Ge Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dsa/slave.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 242e74b9d454..b14d530a32b1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -156,10 +156,14 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change) struct dsa_slave_priv *p = netdev_priv(dev); struct net_device *master = dsa_master_netdev(p); - if (change & IFF_ALLMULTI) - dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) - dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1); + if (dev->flags & IFF_UP) { + if (change & IFF_ALLMULTI) + dev_set_allmulti(master, + dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(master, + dev->flags & IFF_PROMISC ? 1 : -1); + } } static void dsa_slave_set_rx_mode(struct net_device *dev) From bbc0621ff3ed26b06769345a75c1802d53822efb Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 5 Feb 2019 16:29:40 +0000 Subject: [PATCH 1821/2608] ALSA: compress: Fix stop handling on compressed capture streams commit 4f2ab5e1d13d6aa77c55f4914659784efd776eb4 upstream. It is normal user behaviour to start, stop, then start a stream again without closing it. Currently this works for compressed playback streams but not capture ones. The states on a compressed capture stream go directly from OPEN to PREPARED, unlike a playback stream which moves to SETUP and waits for a write of data before moving to PREPARED. Currently however, when a stop is sent the state is set to SETUP for both types of streams. This leaves a capture stream in the situation where a new start can't be sent as that requires the state to be PREPARED and a new set_params can't be sent as that requires the state to be OPEN. The only option being to close the stream, and then reopen. Correct this issues by allowing snd_compr_drain_notify to set the state depending on the stream direction, as we already do in set_params. Fixes: 49bb6402f1aa ("ALSA: compress_core: Add support for capture streams") Signed-off-by: Charles Keepax Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- include/sound/compress_driver.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/sound/compress_driver.h b/include/sound/compress_driver.h index 9924bc9cbc7c..392bac18398b 100644 --- a/include/sound/compress_driver.h +++ b/include/sound/compress_driver.h @@ -186,7 +186,11 @@ static inline void snd_compr_drain_notify(struct snd_compr_stream *stream) if (snd_BUG_ON(!stream)) return; - stream->runtime->state = SNDRV_PCM_STATE_SETUP; + if (stream->direction == SND_COMPRESS_PLAYBACK) + stream->runtime->state = SNDRV_PCM_STATE_SETUP; + else + stream->runtime->state = SNDRV_PCM_STATE_PREPARED; + wake_up(&stream->runtime->sleep); } From c201e435e15f3595e508f630644ed0424cfc6666 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Jan 2019 17:46:03 +0100 Subject: [PATCH 1822/2608] ALSA: hda - Serialize codec registrations commit 305a0ade180981686eec1f92aa6252a7c6ebb1cf upstream. In the current code, the codec registration may happen both at the codec bind time and the end of the controller probe time. In a rare occasion, they race with each other, leading to Oops due to the still uninitialized card device. This patch introduces a simple flag to prevent the codec registration at the codec bind time as long as the controller probe is going on. The controller probe invokes snd_card_register() that does the whole registration task, and we don't need to register each piece beforehand. Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_bind.c | 3 ++- sound/pci/hda/hda_codec.h | 1 + sound/pci/hda/hda_intel.c | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c index d361bb77ca00..8db1890605f6 100644 --- a/sound/pci/hda/hda_bind.c +++ b/sound/pci/hda/hda_bind.c @@ -109,7 +109,8 @@ static int hda_codec_driver_probe(struct device *dev) err = snd_hda_codec_build_controls(codec); if (err < 0) goto error_module; - if (codec->card->registered) { + /* only register after the bus probe finished; otherwise it's racy */ + if (!codec->bus->bus_probing && codec->card->registered) { err = snd_card_register(codec->card); if (err < 0) goto error_module; diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h index 681c360f29f9..3812238e00d5 100644 --- a/sound/pci/hda/hda_codec.h +++ b/sound/pci/hda/hda_codec.h @@ -68,6 +68,7 @@ struct hda_bus { unsigned int response_reset:1; /* controller was reset */ unsigned int in_reset:1; /* during reset operation */ unsigned int no_response_fallback:1; /* don't fallback at RIRB error */ + unsigned int bus_probing :1; /* during probing process */ int primary_dig_out_type; /* primary digital out PCM type */ unsigned int mixer_assigned; /* codec addr for mixer name */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index d8e80b6f5a6b..afa591cf840a 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2236,6 +2236,7 @@ static int azx_probe_continue(struct azx *chip) int val; int err; + to_hda_bus(bus)->bus_probing = 1; hda->probe_continued = 1; /* bind with i915 if needed */ @@ -2341,6 +2342,7 @@ i915_power_fail: if (err < 0) hda->init_failed = 1; complete_all(&hda->probe_wait); + to_hda_bus(bus)->bus_probing = 0; return err; } From 65f222bb370ebc02d5035a4d890df0bfd7343c0d Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sat, 12 Jan 2019 02:39:05 +0100 Subject: [PATCH 1823/2608] fuse: call pipe_buf_release() under pipe lock commit 9509941e9c534920ccc4771ae70bd6cbbe79df1c upstream. Some of the pipe_buf_release() handlers seem to assume that the pipe is locked - in particular, anon_pipe_buf_release() accesses pipe->tmp_page without taking any extra locks. From a glance through the callers of pipe_buf_release(), it looks like FUSE is the only one that calls pipe_buf_release() without having the pipe locked. This bug should only lead to a memory leak, nothing terrible. Fixes: dd3bb14f44a6 ("fuse: support splice() writing to fuse device") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f7280c44cd4b..b29861854de4 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2024,8 +2024,10 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, ret = fuse_dev_do_write(fud, &cs, len); + pipe_lock(pipe); for (idx = 0; idx < nbuf; idx++) pipe_buf_release(pipe, &bufs[idx]); + pipe_unlock(pipe); out: kfree(bufs); From b928e93d864ccaa136e3e75959385291b8bb0880 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 16 Jan 2019 10:27:59 +0100 Subject: [PATCH 1824/2608] fuse: decrement NR_WRITEBACK_TEMP on the right page commit a2ebba824106dabe79937a9f29a875f837e1b6d4 upstream. NR_WRITEBACK_TEMP is accounted on the temporary page in the request, not the page cache page. Fixes: 8b284dc47291 ("fuse: writepages: handle same page rewrites") Cc: # v3.13 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 52514a64dcd6..19ea122a7d03 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1777,7 +1777,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, spin_unlock(&fc->lock); dec_wb_stat(&bdi->wb, WB_WRITEBACK); - dec_node_page_state(page, NR_WRITEBACK_TEMP); + dec_node_page_state(new_req->pages[0], NR_WRITEBACK_TEMP); wb_writeout_inc(&bdi->wb); fuse_writepage_free(fc, new_req); fuse_request_free(new_req); From 266a69895b89b8dd5f7501cd3de07fb935a1862c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 16 Jan 2019 10:27:59 +0100 Subject: [PATCH 1825/2608] fuse: handle zero sized retrieve correctly commit 97e1532ef81acb31c30f9e75bf00306c33a77812 upstream. Dereferencing req->page_descs[0] will Oops if req->max_pages is zero. Reported-by: syzbot+c1e36d30ee3416289cc0@syzkaller.appspotmail.com Tested-by: syzbot+c1e36d30ee3416289cc0@syzkaller.appspotmail.com Fixes: b2430d7567a3 ("fuse: add per-page descriptor to fuse_req") Cc: # v3.9 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index b29861854de4..63fd33383413 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1691,7 +1691,6 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, req->in.h.nodeid = outarg->nodeid; req->in.numargs = 2; req->in.argpages = 1; - req->page_descs[0].offset = offset; req->end = fuse_retrieve_end; index = outarg->offset >> PAGE_SHIFT; @@ -1706,6 +1705,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, this_num = min_t(unsigned, num, PAGE_SIZE - offset); req->pages[req->num_pages] = page; + req->page_descs[req->num_pages].offset = offset; req->page_descs[req->num_pages].length = this_num; req->num_pages++; From 63ca78580e6ee1e5e5c8d4f00e2c4860211caf77 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 23 Jan 2019 09:26:00 +0100 Subject: [PATCH 1826/2608] dmaengine: bcm2835: Fix interrupt race on RT commit f7da7782aba92593f7b82f03d2409a1c5f4db91b upstream. If IRQ handlers are threaded (either because CONFIG_PREEMPT_RT_BASE is enabled or "threadirqs" was passed on the command line) and if system load is sufficiently high that wakeup latency of IRQ threads degrades, SPI DMA transactions on the BCM2835 occasionally break like this: ks8851 spi0.0: SPI transfer timed out bcm2835-dma 3f007000.dma: DMA transfer could not be terminated ks8851 spi0.0 eth2: ks8851_rdfifo: spi_sync() failed The root cause is an assumption made by the DMA driver which is documented in a code comment in bcm2835_dma_terminate_all(): /* * Stop DMA activity: we assume the callback will not be called * after bcm_dma_abort() returns (even if it does, it will see * c->desc is NULL and exit.) */ That assumption falls apart if the IRQ handler bcm2835_dma_callback() is threaded: A client may terminate a descriptor and issue a new one before the IRQ handler had a chance to run. In fact the IRQ handler may miss an *arbitrary* number of descriptors. The result is the following race condition: 1. A descriptor finishes, its interrupt is deferred to the IRQ thread. 2. A client calls dma_terminate_async() which sets channel->desc = NULL. 3. The client issues a new descriptor. Because channel->desc is NULL, bcm2835_dma_issue_pending() immediately starts the descriptor. 4. Finally the IRQ thread runs and writes BCM2835_DMA_INT to the CS register to acknowledge the interrupt. This clears the ACTIVE flag, so the newly issued descriptor is paused in the middle of the transaction. Because channel->desc is not NULL, the IRQ thread finalizes the descriptor and tries to start the next one. I see two possible solutions: The first is to call synchronize_irq() in bcm2835_dma_issue_pending() to wait until the IRQ thread has finished before issuing a new descriptor. The downside of this approach is unnecessary latency if clients desire rapidly terminating and re-issuing descriptors and don't have any use for an IRQ callback. (The SPI TX DMA channel is a case in point.) A better alternative is to make the IRQ thread recognize that it has missed descriptors and avoid finalizing the newly issued descriptor. So first of all, set the ACTIVE flag when acknowledging the interrupt. This keeps a newly issued descriptor running. If the descriptor was finished, the channel remains idle despite the ACTIVE flag being set. However the ACTIVE flag can then no longer be used to check whether the channel is idle, so instead check whether the register containing the current control block address is zero and finalize the current descriptor only if so. That way, there is no impact on latency and throughput if the client doesn't care for the interrupt: Only minimal additional overhead is introduced for non-cyclic descriptors as one further MMIO read is necessary per interrupt to check for idleness of the channel. Cyclic descriptors are sped up slightly by removing one MMIO write per interrupt. Fixes: 96286b576690 ("dmaengine: Add support for BCM2835") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v3.14+ Cc: Frank Pavlic Cc: Martin Sperl Cc: Florian Meier Cc: Clive Messer Cc: Matthias Reichl Tested-by: Stefan Wahren Acked-by: Florian Kauer Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/bcm2835-dma.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 6204cc32d09c..e5e37583967d 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -421,7 +421,12 @@ static int bcm2835_dma_abort(void __iomem *chan_base) long int timeout = 10000; cs = readl(chan_base + BCM2835_DMA_CS); - if (!(cs & BCM2835_DMA_ACTIVE)) + + /* + * A zero control block address means the channel is idle. + * (The ACTIVE flag in the CS register is not a reliable indicator.) + */ + if (!readl(chan_base + BCM2835_DMA_ADDR)) return 0; /* Write 0 to the active bit - Pause the DMA */ @@ -485,8 +490,15 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data) spin_lock_irqsave(&c->vc.lock, flags); - /* Acknowledge interrupt */ - writel(BCM2835_DMA_INT, c->chan_base + BCM2835_DMA_CS); + /* + * Clear the INT flag to receive further interrupts. Keep the channel + * active in case the descriptor is cyclic or in case the client has + * already terminated the descriptor and issued a new one. (May happen + * if this IRQ handler is threaded.) If the channel is finished, it + * will remain idle despite the ACTIVE flag being set. + */ + writel(BCM2835_DMA_INT | BCM2835_DMA_ACTIVE, + c->chan_base + BCM2835_DMA_CS); d = c->desc; @@ -494,11 +506,7 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data) if (d->cyclic) { /* call the cyclic callback */ vchan_cyclic_callback(&d->vd); - - /* Keep the DMA engine running */ - writel(BCM2835_DMA_ACTIVE, - c->chan_base + BCM2835_DMA_CS); - } else { + } else if (!readl(c->chan_base + BCM2835_DMA_ADDR)) { vchan_cookie_complete(&c->desc->vd); bcm2835_dma_start_desc(c); } @@ -806,11 +814,7 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan) list_del_init(&c->node); spin_unlock(&d->lock); - /* - * Stop DMA activity: we assume the callback will not be called - * after bcm_dma_abort() returns (even if it does, it will see - * c->desc is NULL and exit.) - */ + /* stop DMA activity */ if (c->desc) { bcm2835_dma_desc_free(&c->desc->vd); c->desc = NULL; @@ -818,8 +822,7 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan) /* Wait for stopping */ while (--timeout) { - if (!(readl(c->chan_base + BCM2835_DMA_CS) & - BCM2835_DMA_ACTIVE)) + if (!readl(c->chan_base + BCM2835_DMA_ADDR)) break; cpu_relax(); From f9f256b174b24b710e3b84760276a47177b1967a Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 23 Jan 2019 09:26:00 +0100 Subject: [PATCH 1827/2608] dmaengine: bcm2835: Fix abort of transactions commit 9e528c799d17a4ac37d788c81440b50377dd592d upstream. There are multiple issues with bcm2835_dma_abort() (which is called on termination of a transaction): * The algorithm to abort the transaction first pauses the channel by clearing the ACTIVE flag in the CS register, then waits for the PAUSED flag to clear. Page 49 of the spec documents the latter as follows: "Indicates if the DMA is currently paused and not transferring data. This will occur if the active bit has been cleared [...]" https://www.raspberrypi.org/app/uploads/2012/02/BCM2835-ARM-Peripherals.pdf So the function is entering an infinite loop because it is waiting for PAUSED to clear which is always set due to the function having cleared the ACTIVE flag. The only thing that's saving it from itself is the upper bound of 10000 loop iterations. The code comment says that the intention is to "wait for any current AXI transfer to complete", so the author probably wanted to check the WAITING_FOR_OUTSTANDING_WRITES flag instead. Amend the function accordingly. * The CS register is only read at the beginning of the function. It needs to be read again after pausing the channel and before checking for outstanding writes, otherwise writes which were issued between the register read at the beginning of the function and pausing the channel may not be waited for. * The function seeks to abort the transfer by writing 0 to the NEXTCONBK register and setting the ABORT and ACTIVE flags. Thereby, the 0 in NEXTCONBK is sought to be loaded into the CONBLK_AD register. However experimentation has shown this approach to not work: The CONBLK_AD register remains the same as before and the CS register contains 0x00000030 (PAUSED | DREQ_STOPS_DMA). In other words, the control block is not aborted but merely paused and it will be resumed once the next DMA transaction is started. That is absolutely not the desired behavior. A simpler approach is to set the channel's RESET flag instead. This reliably zeroes the NEXTCONBK as well as the CS register. It requires less code and only a single MMIO write. This is also what popular user space DMA drivers do, e.g.: https://github.com/metachris/RPIO/blob/master/source/c_pwm/pwm.c Note that the spec is contradictory whether the NEXTCONBK register is writeable at all. On the one hand, page 41 claims: "The value loaded into the NEXTCONBK register can be overwritten so that the linked list of Control Block data structures can be dynamically altered. However it is only safe to do this when the DMA is paused." On the other hand, page 40 specifies: "Only three registers in each channel's register set are directly writeable (CS, CONBLK_AD and DEBUG). The other registers (TI, SOURCE_AD, DEST_AD, TXFR_LEN, STRIDE & NEXTCONBK), are automatically loaded from a Control Block data structure held in external memory." Fixes: 96286b576690 ("dmaengine: Add support for BCM2835") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v3.14+ Cc: Frank Pavlic Cc: Martin Sperl Cc: Florian Meier Cc: Clive Messer Cc: Matthias Reichl Tested-by: Stefan Wahren Acked-by: Florian Kauer Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/bcm2835-dma.c | 41 +++++++++------------------------------ 1 file changed, 9 insertions(+), 32 deletions(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index e5e37583967d..6ba53bbd0e16 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -415,13 +415,11 @@ static void bcm2835_dma_fill_cb_chain_with_sg( } } -static int bcm2835_dma_abort(void __iomem *chan_base) +static int bcm2835_dma_abort(struct bcm2835_chan *c) { - unsigned long cs; + void __iomem *chan_base = c->chan_base; long int timeout = 10000; - cs = readl(chan_base + BCM2835_DMA_CS); - /* * A zero control block address means the channel is idle. * (The ACTIVE flag in the CS register is not a reliable indicator.) @@ -433,25 +431,16 @@ static int bcm2835_dma_abort(void __iomem *chan_base) writel(0, chan_base + BCM2835_DMA_CS); /* Wait for any current AXI transfer to complete */ - while ((cs & BCM2835_DMA_ISPAUSED) && --timeout) { + while ((readl(chan_base + BCM2835_DMA_CS) & + BCM2835_DMA_WAITING_FOR_WRITES) && --timeout) cpu_relax(); - cs = readl(chan_base + BCM2835_DMA_CS); - } - /* We'll un-pause when we set of our next DMA */ + /* Peripheral might be stuck and fail to signal AXI write responses */ if (!timeout) - return -ETIMEDOUT; - - if (!(cs & BCM2835_DMA_ACTIVE)) - return 0; - - /* Terminate the control block chain */ - writel(0, chan_base + BCM2835_DMA_NEXTCB); - - /* Abort the whole DMA */ - writel(BCM2835_DMA_ABORT | BCM2835_DMA_ACTIVE, - chan_base + BCM2835_DMA_CS); + dev_err(c->vc.chan.device->dev, + "failed to complete outstanding writes\n"); + writel(BCM2835_DMA_RESET, chan_base + BCM2835_DMA_CS); return 0; } @@ -804,7 +793,6 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan) struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); struct bcm2835_dmadev *d = to_bcm2835_dma_dev(c->vc.chan.device); unsigned long flags; - int timeout = 10000; LIST_HEAD(head); spin_lock_irqsave(&c->vc.lock, flags); @@ -818,18 +806,7 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan) if (c->desc) { bcm2835_dma_desc_free(&c->desc->vd); c->desc = NULL; - bcm2835_dma_abort(c->chan_base); - - /* Wait for stopping */ - while (--timeout) { - if (!readl(c->chan_base + BCM2835_DMA_ADDR)) - break; - - cpu_relax(); - } - - if (!timeout) - dev_err(d->ddev.dev, "DMA transfer could not be terminated\n"); + bcm2835_dma_abort(c); } vchan_get_all_descriptors(&c->vc, &head); From 26dd015c0e1fd6b2249262ef5d3977e8fab14641 Mon Sep 17 00:00:00 2001 From: Leonid Iziumtsev Date: Tue, 15 Jan 2019 17:15:23 +0000 Subject: [PATCH 1828/2608] dmaengine: imx-dma: fix wrong callback invoke commit 341198eda723c8c1cddbb006a89ad9e362502ea2 upstream. Once the "ld_queue" list is not empty, next descriptor will migrate into "ld_active" list. The "desc" variable will be overwritten during that transition. And later the dmaengine_desc_get_callback_invoke() will use it as an argument. As result we invoke wrong callback. That behaviour was in place since: commit fcaaba6c7136 ("dmaengine: imx-dma: fix callback path in tasklet"). But after commit 4cd13c21b207 ("softirq: Let ksoftirqd do its job") things got worse, since possible delay between tasklet_schedule() from DMA irq handler and actual tasklet function execution got bigger. And that gave more time for new DMA request to be submitted and to be put into "ld_queue" list. It has been noticed that DMA issue is causing problems for "mxc-mmc" driver. While stressing the system with heavy network traffic and writing/reading to/from sd card simultaneously the timeout may happen: 10013000.sdhci: mxcmci_watchdog: read time out (status = 0x30004900) That often lead to file system corruption. Signed-off-by: Leonid Iziumtsev Signed-off-by: Vinod Koul Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/dma/imx-dma.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index f681df8f0ed3..cb37730f9272 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -623,7 +623,7 @@ static void imxdma_tasklet(unsigned long data) { struct imxdma_channel *imxdmac = (void *)data; struct imxdma_engine *imxdma = imxdmac->imxdma; - struct imxdma_desc *desc; + struct imxdma_desc *desc, *next_desc; unsigned long flags; spin_lock_irqsave(&imxdma->lock, flags); @@ -653,10 +653,10 @@ static void imxdma_tasklet(unsigned long data) list_move_tail(imxdmac->ld_active.next, &imxdmac->ld_free); if (!list_empty(&imxdmac->ld_queue)) { - desc = list_first_entry(&imxdmac->ld_queue, struct imxdma_desc, - node); + next_desc = list_first_entry(&imxdmac->ld_queue, + struct imxdma_desc, node); list_move_tail(imxdmac->ld_queue.next, &imxdmac->ld_active); - if (imxdma_xfer_desc(desc) < 0) + if (imxdma_xfer_desc(next_desc) < 0) dev_warn(imxdma->dev, "%s: channel: %d couldn't xfer desc\n", __func__, imxdmac->channel); } From 3d8343b78e31c14a16a3c8ba1a6c04ae884d0adb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 29 Jan 2019 23:15:12 +0100 Subject: [PATCH 1829/2608] futex: Handle early deadlock return correctly commit 1a1fb985f2e2b85ec0d3dc2e519ee48389ec2434 upstream. commit 56222b212e8e ("futex: Drop hb->lock before enqueueing on the rtmutex") changed the locking rules in the futex code so that the hash bucket lock is not longer held while the waiter is enqueued into the rtmutex wait list. This made the lock and the unlock path symmetric, but unfortunately the possible early exit from __rt_mutex_proxy_start() due to a detected deadlock was not updated accordingly. That allows a concurrent unlocker to observe inconsitent state which triggers the warning in the unlock path. futex_lock_pi() futex_unlock_pi() lock(hb->lock) queue(hb_waiter) lock(hb->lock) lock(rtmutex->wait_lock) unlock(hb->lock) // acquired hb->lock hb_waiter = futex_top_waiter() lock(rtmutex->wait_lock) __rt_mutex_proxy_start() ---> fail remove(rtmutex_waiter); ---> returns -EDEADLOCK unlock(rtmutex->wait_lock) // acquired wait_lock wake_futex_pi() rt_mutex_next_owner() --> returns NULL --> WARN lock(hb->lock) unqueue(hb_waiter) The problem is caused by the remove(rtmutex_waiter) in the failure case of __rt_mutex_proxy_start() as this lets the unlocker observe a waiter in the hash bucket but no waiter on the rtmutex, i.e. inconsistent state. The original commit handles this correctly for the other early return cases (timeout, signal) by delaying the removal of the rtmutex waiter until the returning task reacquired the hash bucket lock. Treat the failure case of __rt_mutex_proxy_start() in the same way and let the existing cleanup code handle the eventual handover of the rtmutex gracefully. The regular rt_mutex_proxy_start() gains the rtmutex waiter removal for the failure case, so that the other callsites are still operating correctly. Add proper comments to the code so all these details are fully documented. Thanks to Peter for helping with the analysis and writing the really valuable code comments. Fixes: 56222b212e8e ("futex: Drop hb->lock before enqueueing on the rtmutex") Reported-by: Heiko Carstens Co-developed-by: Peter Zijlstra Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Tested-by: Heiko Carstens Cc: Martin Schwidefsky Cc: linux-s390@vger.kernel.org Cc: Stefan Liebler Cc: Sebastian Sewior Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1901292311410.1950@nanos.tec.linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 28 ++++++++++++++++++---------- kernel/locking/rtmutex.c | 37 ++++++++++++++++++++++++++++++++----- 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 046cd780d057..abe04a2bb5b9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2811,35 +2811,39 @@ retry_private: * and BUG when futex_unlock_pi() interleaves with this. * * Therefore acquire wait_lock while holding hb->lock, but drop the - * latter before calling rt_mutex_start_proxy_lock(). This still fully - * serializes against futex_unlock_pi() as that does the exact same - * lock handoff sequence. + * latter before calling __rt_mutex_start_proxy_lock(). This + * interleaves with futex_unlock_pi() -- which does a similar lock + * handoff -- such that the latter can observe the futex_q::pi_state + * before __rt_mutex_start_proxy_lock() is done. */ raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); spin_unlock(q.lock_ptr); + /* + * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter + * such that futex_unlock_pi() is guaranteed to observe the waiter when + * it sees the futex_q::pi_state. + */ ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); if (ret) { if (ret == 1) ret = 0; - - spin_lock(q.lock_ptr); - goto no_block; + goto cleanup; } - if (unlikely(to)) hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); +cleanup: spin_lock(q.lock_ptr); /* - * If we failed to acquire the lock (signal/timeout), we must + * If we failed to acquire the lock (deadlock/signal/timeout), we must * first acquire the hb->lock before removing the lock from the - * rt_mutex waitqueue, such that we can keep the hb and rt_mutex - * wait lists consistent. + * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait + * lists consistent. * * In particular; it is important that futex_unlock_pi() can not * observe this inconsistency. @@ -2963,6 +2967,10 @@ retry: * there is no point where we hold neither; and therefore * wake_futex_pi() must observe a state consistent with what we * observed. + * + * In particular; this forces __rt_mutex_start_proxy() to + * complete such that we're guaranteed to observe the + * rt_waiter. Also see the WARN in wake_futex_pi(). */ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); spin_unlock(&hb->lock); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 4ad35718f123..71c554a9e17f 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1726,12 +1726,33 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, rt_mutex_set_owner(lock, NULL); } +/** + * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task + * @lock: the rt_mutex to take + * @waiter: the pre-initialized rt_mutex_waiter + * @task: the task to prepare + * + * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock + * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. + * + * NOTE: does _NOT_ remove the @waiter on failure; must either call + * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this. + * + * Returns: + * 0 - task blocked on lock + * 1 - acquired the lock for task, caller should wake it up + * <0 - error + * + * Special API call for PI-futex support. + */ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task) { int ret; + lockdep_assert_held(&lock->wait_lock); + if (try_to_take_rt_mutex(lock, task, NULL)) return 1; @@ -1749,9 +1770,6 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, ret = 0; } - if (unlikely(ret)) - remove_waiter(lock, waiter); - debug_rt_mutex_print_deadlock(waiter); return ret; @@ -1763,12 +1781,18 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, * @waiter: the pre-initialized rt_mutex_waiter * @task: the task to prepare * + * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock + * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. + * + * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter + * on failure. + * * Returns: * 0 - task blocked on lock * 1 - acquired the lock for task, caller should wake it up * <0 - error * - * Special API call for FUTEX_REQUEUE_PI support. + * Special API call for PI-futex support. */ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, @@ -1778,6 +1802,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, raw_spin_lock_irq(&lock->wait_lock); ret = __rt_mutex_start_proxy_lock(lock, waiter, task); + if (unlikely(ret)) + remove_waiter(lock, waiter); raw_spin_unlock_irq(&lock->wait_lock); return ret; @@ -1845,7 +1871,8 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, * @lock: the rt_mutex we were woken on * @waiter: the pre-initialized rt_mutex_waiter * - * Attempt to clean up after a failed rt_mutex_wait_proxy_lock(). + * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or + * rt_mutex_wait_proxy_lock(). * * Unless we acquired the lock; we're still enqueued on the wait-list and can * in fact still be granted ownership until we're removed. Therefore we can From 2f2456fe6ae34cc2baa40cba73a1755b05c6adf9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 Jan 2019 10:02:33 +0000 Subject: [PATCH 1830/2608] irqchip/gic-v3-its: Plug allocation race for devices sharing a DevID commit 9791ec7df0e7b4d80706ccea8f24b6542f6059e9 upstream. On systems or VMs where multiple devices share a single DevID (because they sit behind a PCI bridge, or because the HW is broken in funky ways), we reuse the save its_device structure in order to reflect this. It turns out that there is a distinct lack of locking when looking up the its_device, and two device being probed concurrently can result in double allocations. That's obviously not nice. A solution for this is to have a per-ITS mutex that serializes device allocation. A similar issue exists on the freeing side, which can run concurrently with the allocation. On top of now taking the appropriate lock, we also make sure that a shared device is never freed, as we have no way to currently track the life cycle of such object. Reported-by: Zheng Xiang Tested-by: Zheng Xiang Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3-its.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 7638ca03fb1f..d8ecc90ed1b5 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -87,9 +87,14 @@ struct its_baser { * The ITS structure - contains most of the infrastructure, with the * top-level MSI domain, the command queue, the collections, and the * list of devices writing to it. + * + * dev_alloc_lock has to be taken for device allocations, while the + * spinlock must be taken to parse data structures such as the device + * list. */ struct its_node { raw_spinlock_t lock; + struct mutex dev_alloc_lock; struct list_head entry; void __iomem *base; phys_addr_t phys_base; @@ -138,6 +143,7 @@ struct its_device { void *itt; u32 nr_ites; u32 device_id; + bool shared; }; static struct { @@ -2109,6 +2115,7 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, struct its_device *its_dev; struct msi_domain_info *msi_info; u32 dev_id; + int err = 0; /* * We ignore "dev" entierely, and rely on the dev_id that has @@ -2131,6 +2138,7 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, return -EINVAL; } + mutex_lock(&its->dev_alloc_lock); its_dev = its_find_device(its, dev_id); if (its_dev) { /* @@ -2138,18 +2146,22 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, * another alias (PCI bridge of some sort). No need to * create the device. */ + its_dev->shared = true; pr_debug("Reusing ITT for devID %x\n", dev_id); goto out; } its_dev = its_create_device(its, dev_id, nvec, true); - if (!its_dev) - return -ENOMEM; + if (!its_dev) { + err = -ENOMEM; + goto out; + } pr_debug("ITT %d entries, %d bits\n", nvec, ilog2(nvec)); out: + mutex_unlock(&its->dev_alloc_lock); info->scratchpad[0].ptr = its_dev; - return 0; + return err; } static struct msi_domain_ops its_msi_domain_ops = { @@ -2252,6 +2264,7 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, { struct irq_data *d = irq_domain_get_irq_data(domain, virq); struct its_device *its_dev = irq_data_get_irq_chip_data(d); + struct its_node *its = its_dev->its; int i; for (i = 0; i < nr_irqs; i++) { @@ -2266,8 +2279,14 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, irq_domain_reset_irq_data(data); } - /* If all interrupts have been freed, start mopping the floor */ - if (bitmap_empty(its_dev->event_map.lpi_map, + mutex_lock(&its->dev_alloc_lock); + + /* + * If all interrupts have been freed, start mopping the + * floor. This is conditionned on the device not being shared. + */ + if (!its_dev->shared && + bitmap_empty(its_dev->event_map.lpi_map, its_dev->event_map.nr_lpis)) { its_lpi_free_chunks(its_dev->event_map.lpi_map, its_dev->event_map.lpi_base, @@ -2279,6 +2298,8 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, its_free_device(its_dev); } + mutex_unlock(&its->dev_alloc_lock); + irq_domain_free_irqs_parent(domain, virq, nr_irqs); } @@ -2966,6 +2987,7 @@ static int __init its_probe_one(struct resource *res, } raw_spin_lock_init(&its->lock); + mutex_init(&its->dev_alloc_lock); INIT_LIST_HEAD(&its->entry); INIT_LIST_HEAD(&its->its_device_list); typer = gic_read_typer(its_base + GITS_TYPER); From fb151544446b7f51d08b83dc30546298d7bdc666 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Wed, 16 Jan 2019 11:54:07 -0600 Subject: [PATCH 1831/2608] usb: phy: am335x: fix race condition in _probe commit a53469a68eb886e84dd8b69a1458a623d3591793 upstream. power off the phy should be done before populate the phy. Otherwise, am335x_init() could be called by the phy owner to power on the phy first, then am335x_phy_probe() turns off the phy again without the caller knowing it. Fixes: 2fc711d76352 ("usb: phy: am335x: Enable USB remote wakeup using PHY wakeup") Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Bin Liu Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-am335x.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/usb/phy/phy-am335x.c b/drivers/usb/phy/phy-am335x.c index 7e5aece769da..cb1382a52765 100644 --- a/drivers/usb/phy/phy-am335x.c +++ b/drivers/usb/phy/phy-am335x.c @@ -60,9 +60,6 @@ static int am335x_phy_probe(struct platform_device *pdev) if (ret) return ret; - ret = usb_add_phy_dev(&am_phy->usb_phy_gen.phy); - if (ret) - return ret; am_phy->usb_phy_gen.phy.init = am335x_init; am_phy->usb_phy_gen.phy.shutdown = am335x_shutdown; @@ -81,7 +78,7 @@ static int am335x_phy_probe(struct platform_device *pdev) device_set_wakeup_enable(dev, false); phy_ctrl_power(am_phy->phy_ctrl, am_phy->id, am_phy->dr_mode, false); - return 0; + return usb_add_phy_dev(&am_phy->usb_phy_gen.phy); } static int am335x_phy_remove(struct platform_device *pdev) From 09145ec8138ee67ba4565058b171c743ee2c1713 Mon Sep 17 00:00:00 2001 From: Tejas Joglekar Date: Tue, 22 Jan 2019 13:26:51 +0530 Subject: [PATCH 1832/2608] usb: dwc3: gadget: Handle 0 xfer length for OUT EP commit 1e19cdc8060227b0802bda6bc0bd22b23679ba32 upstream. For OUT endpoints, zero-length transfers require MaxPacketSize buffer as per the DWC_usb3 programming guide 3.30a section 4.2.3.3. This patch fixes this by explicitly checking zero length transfer to correctly pad up to MaxPacketSize. Fixes: c6267a51639b ("usb: dwc3: gadget: align transfers to wMaxPacketSize") Cc: stable@vger.kernel.org Signed-off-by: Tejas Joglekar Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 5b34994b7b08..2f96d2d0addd 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1114,7 +1114,7 @@ static void dwc3_prepare_one_trb_linear(struct dwc3_ep *dep, unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc); unsigned int rem = length % maxp; - if (rem && usb_endpoint_dir_out(dep->endpoint.desc)) { + if ((!length || rem) && usb_endpoint_dir_out(dep->endpoint.desc)) { struct dwc3 *dwc = dep->dwc; struct dwc3_trb *trb; From 565e332bfc70bb7b4c74020d90f53bdefc709616 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 22 Jan 2019 15:28:08 -0600 Subject: [PATCH 1833/2608] usb: gadget: udc: net2272: Fix bitwise and boolean operations commit 07c69f1148da7de3978686d3af9263325d9d60bd upstream. (!x & y) strikes again. Fix bitwise and boolean operations by enclosing the expression: intcsr & (1 << NET2272_PCI_IRQ) in parentheses, before applying the boolean operator '!'. Notice that this code has been there since 2011. So, it would be helpful if someone can double-check this. This issue was detected with the help of Coccinelle. Fixes: ceb80363b2ec ("USB: net2272: driver for PLX NET2272 USB device controller") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/net2272.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/net2272.c b/drivers/usb/gadget/udc/net2272.c index 8f85a51bd2b3..e0759a826b60 100644 --- a/drivers/usb/gadget/udc/net2272.c +++ b/drivers/usb/gadget/udc/net2272.c @@ -2096,7 +2096,7 @@ static irqreturn_t net2272_irq(int irq, void *_dev) #if defined(PLX_PCI_RDK2) /* see if PCI int for us by checking irqstat */ intcsr = readl(dev->rdk2.fpga_base_addr + RDK2_IRQSTAT); - if (!intcsr & (1 << NET2272_PCI_IRQ)) { + if (!(intcsr & (1 << NET2272_PCI_IRQ))) { spin_unlock(&dev->lock); return IRQ_NONE; } From f8b500d1097d05d99209732e4034e0d2bff07185 Mon Sep 17 00:00:00 2001 From: Paul Elder Date: Wed, 30 Jan 2019 08:13:21 -0600 Subject: [PATCH 1834/2608] usb: gadget: musb: fix short isoc packets with inventra dma commit c418fd6c01fbc5516a2cd1eaf1df1ec86869028a upstream. Handling short packets (length < max packet size) in the Inventra DMA engine in the MUSB driver causes the MUSB DMA controller to hang. An example of a problem that is caused by this problem is when streaming video out of a UVC gadget, only the first video frame is transferred. For short packets (mode-0 or mode-1 DMA), MUSB_TXCSR_TXPKTRDY must be set manually by the driver. This was previously done in musb_g_tx (musb_gadget.c), but incorrectly (all csr flags were cleared, and only MUSB_TXCSR_MODE and MUSB_TXCSR_TXPKTRDY were set). Fixing that problem allows some requests to be transferred correctly, but multiple requests were often put together in one USB packet, and caused problems if the packet size was not a multiple of 4. Instead, set MUSB_TXCSR_TXPKTRDY in dma_controller_irq (musbhsdma.c), just like host mode transfers. This topic was originally tackled by Nicolas Boichat [0] [1] and is discussed further at [2] as part of his GSoC project [3]. [0] https://groups.google.com/forum/?hl=en#!topic/beagleboard-gsoc/k8Azwfp75CU [1] https://gitorious.org/beagleboard-usbsniffer/beagleboard-usbsniffer-kernel/commit/b0be3b6cc195ba732189b04f1d43ec843c3e54c9?p=beagleboard-usbsniffer:beagleboard-usbsniffer-kernel.git;a=patch;h=b0be3b6cc195ba732189b04f1d43ec843c3e54c9 [2] http://beagleboard-usbsniffer.blogspot.com/2010/07/musb-isochronous-transfers-fixed.html [3] http://elinux.org/BeagleBoard/GSoC/USBSniffer Fixes: 550a7375fe72 ("USB: Add MUSB and TUSB support") Signed-off-by: Paul Elder Signed-off-by: Bin Liu Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_gadget.c | 13 +------------ drivers/usb/musb/musbhsdma.c | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 87f932d4b72c..1e431634589d 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -477,13 +477,10 @@ void musb_g_tx(struct musb *musb, u8 epnum) } if (request) { - u8 is_dma = 0; - bool short_packet = false; trace_musb_req_tx(req); if (dma && (csr & MUSB_TXCSR_DMAENAB)) { - is_dma = 1; csr |= MUSB_TXCSR_P_WZC_BITS; csr &= ~(MUSB_TXCSR_DMAENAB | MUSB_TXCSR_P_UNDERRUN | MUSB_TXCSR_TXPKTRDY | MUSB_TXCSR_AUTOSET); @@ -501,16 +498,8 @@ void musb_g_tx(struct musb *musb, u8 epnum) */ if ((request->zero && request->length) && (request->length % musb_ep->packet_sz == 0) - && (request->actual == request->length)) - short_packet = true; + && (request->actual == request->length)) { - if ((musb_dma_inventra(musb) || musb_dma_ux500(musb)) && - (is_dma && (!dma->desired_mode || - (request->actual & - (musb_ep->packet_sz - 1))))) - short_packet = true; - - if (short_packet) { /* * On DMA completion, FIFO may not be * available yet... diff --git a/drivers/usb/musb/musbhsdma.c b/drivers/usb/musb/musbhsdma.c index 3620073da58c..512108e22d2b 100644 --- a/drivers/usb/musb/musbhsdma.c +++ b/drivers/usb/musb/musbhsdma.c @@ -320,12 +320,10 @@ static irqreturn_t dma_controller_irq(int irq, void *private_data) channel->status = MUSB_DMA_STATUS_FREE; /* completed */ - if ((devctl & MUSB_DEVCTL_HM) - && (musb_channel->transmit) - && ((channel->desired_mode == 0) - || (channel->actual_len & - (musb_channel->max_packet_sz - 1))) - ) { + if (musb_channel->transmit && + (!channel->desired_mode || + (channel->actual_len % + musb_channel->max_packet_sz))) { u8 epnum = musb_channel->epnum; int offset = musb->io.ep_offset(epnum, MUSB_TXCSR); @@ -337,11 +335,14 @@ static irqreturn_t dma_controller_irq(int irq, void *private_data) */ musb_ep_select(mbase, epnum); txcsr = musb_readw(mbase, offset); - txcsr &= ~(MUSB_TXCSR_DMAENAB + if (channel->desired_mode == 1) { + txcsr &= ~(MUSB_TXCSR_DMAENAB | MUSB_TXCSR_AUTOSET); - musb_writew(mbase, offset, txcsr); - /* Send out the packet */ - txcsr &= ~MUSB_TXCSR_DMAMODE; + musb_writew(mbase, offset, txcsr); + /* Send out the packet */ + txcsr &= ~MUSB_TXCSR_DMAMODE; + txcsr |= MUSB_TXCSR_DMAENAB; + } txcsr |= MUSB_TXCSR_TXPKTRDY; musb_writew(mbase, offset, txcsr); } From 04f2d3f8be890be594ee936d8184af54662864b3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 30 Jan 2019 10:49:34 +0100 Subject: [PATCH 1835/2608] staging: speakup: fix tty-operation NULL derefs commit a1960e0f1639cb1f7a3d94521760fc73091f6640 upstream. The send_xchar() and tiocmset() tty operations are optional. Add the missing sanity checks to prevent user-space triggerable NULL-pointer dereferences. Fixes: 6b9ad1c742bf ("staging: speakup: add send_xchar, tiocmset and input functionality for tty") Cc: stable # 4.13 Cc: Okash Khawaja Cc: Samuel Thibault Signed-off-by: Johan Hovold Reviewed-by: Samuel Thibault Signed-off-by: Greg Kroah-Hartman --- drivers/staging/speakup/spk_ttyio.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/speakup/spk_ttyio.c b/drivers/staging/speakup/spk_ttyio.c index 4d7d8f2f66ea..71edd3cfe684 100644 --- a/drivers/staging/speakup/spk_ttyio.c +++ b/drivers/staging/speakup/spk_ttyio.c @@ -246,7 +246,8 @@ static void spk_ttyio_send_xchar(char ch) return; } - speakup_tty->ops->send_xchar(speakup_tty, ch); + if (speakup_tty->ops->send_xchar) + speakup_tty->ops->send_xchar(speakup_tty, ch); mutex_unlock(&speakup_tty_mutex); } @@ -258,7 +259,8 @@ static void spk_ttyio_tiocmset(unsigned int set, unsigned int clear) return; } - speakup_tty->ops->tiocmset(speakup_tty, set, clear); + if (speakup_tty->ops->tiocmset) + speakup_tty->ops->tiocmset(speakup_tty, set, clear); mutex_unlock(&speakup_tty_mutex); } From 9a3c75fb448ca6f24007123f869da77d1a8c6fde Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Wed, 30 Jan 2019 17:56:51 +0530 Subject: [PATCH 1836/2608] scsi: cxlflash: Prevent deadlock when adapter probe fails commit bb61b843ffd46978d7ca5095453e572714934eeb upstream. Presently when an error is encountered during probe of the cxlflash adapter, a deadlock is seen with cpu thread stuck inside cxlflash_remove(). Below is the trace of the deadlock as logged by khungtaskd: cxlflash 0006:00:00.0: cxlflash_probe: init_afu failed rc=-16 INFO: task kworker/80:1:890 blocked for more than 120 seconds. Not tainted 5.0.0-rc4-capi2-kexec+ #2 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/80:1 D 0 890 2 0x00000808 Workqueue: events work_for_cpu_fn Call Trace: 0x4d72136320 (unreliable) __switch_to+0x2cc/0x460 __schedule+0x2bc/0xac0 schedule+0x40/0xb0 cxlflash_remove+0xec/0x640 [cxlflash] cxlflash_probe+0x370/0x8f0 [cxlflash] local_pci_probe+0x6c/0x140 work_for_cpu_fn+0x38/0x60 process_one_work+0x260/0x530 worker_thread+0x280/0x5d0 kthread+0x1a8/0x1b0 ret_from_kernel_thread+0x5c/0x80 INFO: task systemd-udevd:5160 blocked for more than 120 seconds. The deadlock occurs as cxlflash_remove() is called from cxlflash_probe() without setting 'cxlflash_cfg->state' to STATE_PROBED and the probe thread starts to wait on 'cxlflash_cfg->reset_waitq'. Since the device was never successfully probed the 'cxlflash_cfg->state' never changes from STATE_PROBING hence the deadlock occurs. We fix this deadlock by setting the variable 'cxlflash_cfg->state' to STATE_PROBED in case an error occurs during cxlflash_probe() and just before calling cxlflash_remove(). Cc: stable@vger.kernel.org Fixes: c21e0bbfc485("cxlflash: Base support for IBM CXL Flash Adapter") Signed-off-by: Vaibhav Jain Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/cxlflash/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index 737314cac8d8..b37149e48c5c 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -3659,6 +3659,7 @@ static int cxlflash_probe(struct pci_dev *pdev, host->max_cmd_len = CXLFLASH_MAX_CDB_LEN; cfg = shost_priv(host); + cfg->state = STATE_PROBING; cfg->host = host; rc = alloc_mem(cfg); if (rc) { @@ -3741,6 +3742,7 @@ out: return rc; out_remove: + cfg->state = STATE_PROBED; cxlflash_remove(pdev); goto out; } From c39ebf3bde084e1e1b0e1566e62b2099b4e2cfb7 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 30 Jan 2019 16:42:12 -0800 Subject: [PATCH 1837/2608] scsi: aic94xx: fix module loading commit 42caa0edabd6a0a392ec36a5f0943924e4954311 upstream. The aic94xx driver is currently failing to load with errors like sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:03.0/0000:02:00.3/0000:07:02.0/revision' Because the PCI code had recently added a file named 'revision' to every PCI device. Fix this by renaming the aic94xx revision file to aic_revision. This is safe to do for us because as far as I can tell, there's nothing in userspace relying on the current aic94xx revision file so it can be renamed without breaking anything. Fixes: 702ed3be1b1b (PCI: Create revision file in sysfs) Cc: stable@vger.kernel.org Signed-off-by: James Bottomley Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aic94xx/aic94xx_init.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c index 4a4746cc6745..eb5ee0ec5a2f 100644 --- a/drivers/scsi/aic94xx/aic94xx_init.c +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -281,7 +281,7 @@ static ssize_t asd_show_dev_rev(struct device *dev, return snprintf(buf, PAGE_SIZE, "%s\n", asd_dev_rev[asd_ha->revision_id]); } -static DEVICE_ATTR(revision, S_IRUGO, asd_show_dev_rev, NULL); +static DEVICE_ATTR(aic_revision, S_IRUGO, asd_show_dev_rev, NULL); static ssize_t asd_show_dev_bios_build(struct device *dev, struct device_attribute *attr,char *buf) @@ -478,7 +478,7 @@ static int asd_create_dev_attrs(struct asd_ha_struct *asd_ha) { int err; - err = device_create_file(&asd_ha->pcidev->dev, &dev_attr_revision); + err = device_create_file(&asd_ha->pcidev->dev, &dev_attr_aic_revision); if (err) return err; @@ -500,13 +500,13 @@ err_update_bios: err_biosb: device_remove_file(&asd_ha->pcidev->dev, &dev_attr_bios_build); err_rev: - device_remove_file(&asd_ha->pcidev->dev, &dev_attr_revision); + device_remove_file(&asd_ha->pcidev->dev, &dev_attr_aic_revision); return err; } static void asd_remove_dev_attrs(struct asd_ha_struct *asd_ha) { - device_remove_file(&asd_ha->pcidev->dev, &dev_attr_revision); + device_remove_file(&asd_ha->pcidev->dev, &dev_attr_aic_revision); device_remove_file(&asd_ha->pcidev->dev, &dev_attr_bios_build); device_remove_file(&asd_ha->pcidev->dev, &dev_attr_pcba_sn); device_remove_file(&asd_ha->pcidev->dev, &dev_attr_update_bios); From ef1b3d4893cec543305d30e8160df8c096135950 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 29 Jan 2019 18:41:16 +0100 Subject: [PATCH 1838/2608] KVM: x86: work around leak of uninitialized stack contents (CVE-2019-7222) commit 353c0956a618a07ba4bbe7ad00ff29fe70e8412a upstream. Bugzilla: 1671930 Emulation of certain instructions (VMXON, VMCLEAR, VMPTRLD, VMWRITE with memory operand, INVEPT, INVVPID) can incorrectly inject a page fault when passed an operand that points to an MMIO address. The page fault will use uninitialized kernel stack memory as the CR2 and error code. The right behavior would be to abort the VM with a KVM_EXIT_INTERNAL_ERROR exit to userspace; however, it is not an easy fix, so for now just ensure that the error code and CR2 are zero. Embargoed until Feb 7th 2019. Reported-by: Felix Wilhelm Cc: stable@kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 867c22f8d59b..b0e7621ddf01 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4611,6 +4611,13 @@ int kvm_read_guest_virt(struct kvm_vcpu *vcpu, { u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + /* + * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED + * is returned, but our callers are not ready for that and they blindly + * call kvm_inject_page_fault. Ensure that they at least do not leak + * uninitialized kernel stack memory into cr2 and error code. + */ + memset(exception, 0, sizeof(*exception)); return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); } From 8c1b11bc3555b5d1207b0e179cbdd8b945e71e69 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sat, 26 Jan 2019 01:54:33 +0100 Subject: [PATCH 1839/2608] kvm: fix kvm_ioctl_create_device() reference counting (CVE-2019-6974) commit cfa39381173d5f969daf43582c95ad679189cbc9 upstream. kvm_ioctl_create_device() does the following: 1. creates a device that holds a reference to the VM object (with a borrowed reference, the VM's refcount has not been bumped yet) 2. initializes the device 3. transfers the reference to the device to the caller's file descriptor table 4. calls kvm_get_kvm() to turn the borrowed reference to the VM into a real reference The ownership transfer in step 3 must not happen before the reference to the VM becomes a proper, non-borrowed reference, which only happens in step 4. After step 3, an attacker can close the file descriptor and drop the borrowed reference, which can cause the refcount of the kvm object to drop to zero. This means that we need to grab a reference for the device before anon_inode_getfd(), otherwise the VM can disappear from under us. Fixes: 852b6d57dc7f ("kvm: add device control API") Cc: stable@kernel.org Signed-off-by: Jann Horn Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/kvm_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index bbc34e87d88f..9b79818758dc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2912,8 +2912,10 @@ static int kvm_ioctl_create_device(struct kvm *kvm, if (ops->init) ops->init(dev); + kvm_get_kvm(kvm); ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); if (ret < 0) { + kvm_put_kvm(kvm); mutex_lock(&kvm->lock); list_del(&dev->vm_node); mutex_unlock(&kvm->lock); @@ -2921,7 +2923,6 @@ static int kvm_ioctl_create_device(struct kvm *kvm, return ret; } - kvm_get_kvm(kvm); cd->fd = ret; return 0; } From 1c965b1b5ecc2c9e1d59b2514cedb6f7483a0241 Mon Sep 17 00:00:00 2001 From: Peter Shier Date: Thu, 11 Oct 2018 11:46:46 -0700 Subject: [PATCH 1840/2608] KVM: nVMX: unconditionally cancel preemption timer in free_nested (CVE-2019-7221) commit ecec76885bcfe3294685dc363fd1273df0d5d65f upstream. Bugzilla: 1671904 There are multiple code paths where an hrtimer may have been started to emulate an L1 VMX preemption timer that can result in a call to free_nested without an intervening L2 exit where the hrtimer is normally cancelled. Unconditionally cancel in free_nested to cover all cases. Embargoed until Feb 7th 2019. Signed-off-by: Peter Shier Reported-by: Jim Mattson Reviewed-by: Jim Mattson Reported-by: Felix Wilhelm Cc: stable@kernel.org Message-Id: <20181011184646.154065-1-pshier@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 16bb8e35605e..66feeae4c206 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7708,6 +7708,7 @@ static void free_nested(struct vcpu_vmx *vmx) if (!vmx->nested.vmxon) return; + hrtimer_cancel(&vmx->nested.preemption_timer); vmx->nested.vmxon = false; free_vpid(vmx->nested.vpid02); vmx->nested.posted_intr_nv = -1; From 5e1f1c1f5d00ffba3600ba1ffb3a1fc7dae0a375 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 30 Jan 2019 07:13:58 -0600 Subject: [PATCH 1841/2608] cpu/hotplug: Fix "SMT disabled by BIOS" detection for KVM commit b284909abad48b07d3071a9fc9b5692b3e64914b upstream. With the following commit: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS") ... the hotplug code attempted to detect when SMT was disabled by BIOS, in which case it reported SMT as permanently disabled. However, that code broke a virt hotplug scenario, where the guest is booted with only primary CPU threads, and a sibling is brought online later. The problem is that there doesn't seem to be a way to reliably distinguish between the HW "SMT disabled by BIOS" case and the virt "sibling not yet brought online" case. So the above-mentioned commit was a bit misguided, as it permanently disabled SMT for both cases, preventing future virt sibling hotplugs. Going back and reviewing the original problems which were attempted to be solved by that commit, when SMT was disabled in BIOS: 1) /sys/devices/system/cpu/smt/control showed "on" instead of "notsupported"; and 2) vmx_vm_init() was incorrectly showing the L1TF_MSG_SMT warning. I'd propose that we instead consider #1 above to not actually be a problem. Because, at least in the virt case, it's possible that SMT wasn't disabled by BIOS and a sibling thread could be brought online later. So it makes sense to just always default the smt control to "on" to allow for that possibility (assuming cpuid indicates that the CPU supports SMT). The real problem is #2, which has a simple fix: change vmx_vm_init() to query the actual current SMT state -- i.e., whether any siblings are currently online -- instead of looking at the SMT "control" sysfs value. So fix it by: a) reverting the original "fix" and its followup fix: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS") bc2d8d262cba ("cpu/hotplug: Fix SMT supported evaluation") and b) changing vmx_vm_init() to query the actual current SMT state -- instead of the sysfs control value -- to determine whether the L1TF warning is needed. This also requires the 'sched_smt_present' variable to exported, instead of 'cpu_smt_control'. Fixes: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS") Reported-by: Igor Mammedov Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Cc: Joe Mario Cc: Jiri Kosina Cc: Peter Zijlstra Cc: kvm@vger.kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/e3a85d585da28cc333ecbc1e78ee9216e6da9396.1548794349.git.jpoimboe@redhat.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kvm/vmx.c | 3 ++- include/linux/cpu.h | 2 -- kernel/cpu.c | 33 ++++----------------------------- kernel/sched/fair.c | 1 + kernel/smp.c | 2 -- 6 files changed, 8 insertions(+), 35 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 004e60470a77..ec7aedba3d74 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -68,7 +68,7 @@ void __init check_bugs(void) * identify_boot_cpu() initialized SMT support information, let the * core code know. */ - cpu_smt_check_topology_early(); + cpu_smt_check_topology(); if (!IS_ENABLED(CONFIG_SMP)) { pr_info("CPU: "); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 66feeae4c206..1f5de4314291 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -10120,7 +10121,7 @@ static int vmx_vm_init(struct kvm *kvm) * Warn upon starting the first VM in a potentially * insecure environment. */ - if (cpu_smt_control == CPU_SMT_ENABLED) + if (sched_smt_active()) pr_warn_once(L1TF_MSG_SMT); if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER) pr_warn_once(L1TF_MSG_L1D); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 2a378d261914..c7712e042aba 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -188,12 +188,10 @@ enum cpuhp_smt_control { #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) extern enum cpuhp_smt_control cpu_smt_control; extern void cpu_smt_disable(bool force); -extern void cpu_smt_check_topology_early(void); extern void cpu_smt_check_topology(void); #else # define cpu_smt_control (CPU_SMT_ENABLED) static inline void cpu_smt_disable(bool force) { } -static inline void cpu_smt_check_topology_early(void) { } static inline void cpu_smt_check_topology(void) { } #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 5c907d96e3dd..0171754db32b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -356,9 +356,6 @@ void __weak arch_smt_update(void) { } #ifdef CONFIG_HOTPLUG_SMT enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; -EXPORT_SYMBOL_GPL(cpu_smt_control); - -static bool cpu_smt_available __read_mostly; void __init cpu_smt_disable(bool force) { @@ -376,25 +373,11 @@ void __init cpu_smt_disable(bool force) /* * The decision whether SMT is supported can only be done after the full - * CPU identification. Called from architecture code before non boot CPUs - * are brought up. - */ -void __init cpu_smt_check_topology_early(void) -{ - if (!topology_smt_supported()) - cpu_smt_control = CPU_SMT_NOT_SUPPORTED; -} - -/* - * If SMT was disabled by BIOS, detect it here, after the CPUs have been - * brought online. This ensures the smt/l1tf sysfs entries are consistent - * with reality. cpu_smt_available is set to true during the bringup of non - * boot CPUs when a SMT sibling is detected. Note, this may overwrite - * cpu_smt_control's previous setting. + * CPU identification. Called from architecture code. */ void __init cpu_smt_check_topology(void) { - if (!cpu_smt_available) + if (!topology_smt_supported()) cpu_smt_control = CPU_SMT_NOT_SUPPORTED; } @@ -407,18 +390,10 @@ early_param("nosmt", smt_cmdline_disable); static inline bool cpu_smt_allowed(unsigned int cpu) { - if (topology_is_primary_thread(cpu)) + if (cpu_smt_control == CPU_SMT_ENABLED) return true; - /* - * If the CPU is not a 'primary' thread and the booted_once bit is - * set then the processor has SMT support. Store this information - * for the late check of SMT support in cpu_smt_check_topology(). - */ - if (per_cpu(cpuhp_state, cpu).booted_once) - cpu_smt_available = true; - - if (cpu_smt_control == CPU_SMT_ENABLED) + if (topology_is_primary_thread(cpu)) return true; /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f33b24080b1c..4d54c1fe9623 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5651,6 +5651,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) #ifdef CONFIG_SCHED_SMT DEFINE_STATIC_KEY_FALSE(sched_smt_present); +EXPORT_SYMBOL_GPL(sched_smt_present); static inline void set_idle_cores(int cpu, int val) { diff --git a/kernel/smp.c b/kernel/smp.c index 2d1da290f144..c94dd85c8d41 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -584,8 +584,6 @@ void __init smp_init(void) num_nodes, (num_nodes > 1 ? "s" : ""), num_cpus, (num_cpus > 1 ? "s" : "")); - /* Final decision about SMT support */ - cpu_smt_check_topology(); /* Any cleanup work */ smp_cpus_done(setup_max_cpus); } From 5d4dbc4593e9a338b0a9ced6be756627fe773276 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Sun, 27 Jan 2019 06:53:14 -0800 Subject: [PATCH 1842/2608] perf/x86/intel/uncore: Add Node ID mask commit 9e63a7894fd302082cf3627fe90844421a6cbe7f upstream. Some PCI uncore PMUs cannot be registered on an 8-socket system (HPE Superdome Flex). To understand which Socket the PCI uncore PMUs belongs to, perf retrieves the local Node ID of the uncore device from CPUNODEID(0xC0) of the PCI configuration space, and the mapping between Socket ID and Node ID from GIDNIDMAP(0xD4). The Socket ID can be calculated accordingly. The local Node ID is only available at bit 2:0, but current code doesn't mask it. If a BIOS doesn't clear the rest of the bits, an incorrect Node ID will be fetched. Filter the Node ID by adding a mask. Reported-by: Song Liu Tested-by: Song Liu Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: # v3.7+ Fixes: 7c94ee2e0917 ("perf/x86: Add Intel Nehalem and Sandy Bridge-EP uncore support") Link: https://lkml.kernel.org/r/1548600794-33162-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/uncore_snbep.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index a68aba8a482f..6b66285c6ced 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1221,6 +1221,8 @@ static struct pci_driver snbep_uncore_pci_driver = { .id_table = snbep_uncore_pci_ids, }; +#define NODE_ID_MASK 0x7 + /* * build pci bus to socket mapping */ @@ -1242,7 +1244,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool err = pci_read_config_dword(ubox_dev, nodeid_loc, &config); if (err) break; - nodeid = config; + nodeid = config & NODE_ID_MASK; /* get the Node ID mapping */ err = pci_read_config_dword(ubox_dev, idmap_loc, &config); if (err) From 96ae22dc8cd1c817cdd94a9bfaf72a7068c499ae Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 31 Jan 2019 16:33:41 -0800 Subject: [PATCH 1843/2608] x86/MCE: Initialize mce.bank in the case of a fatal error in mce_no_way_out() commit d28af26faa0b1daf3c692603d46bc4687c16f19e upstream. Internal injection testing crashed with a console log that said: mce: [Hardware Error]: CPU 7: Machine Check Exception: f Bank 0: bd80000000100134 This caused a lot of head scratching because the MCACOD (bits 15:0) of that status is a signature from an L1 data cache error. But Linux says that it found it in "Bank 0", which on this model CPU only reports L1 instruction cache errors. The answer was that Linux doesn't initialize "m->bank" in the case that it finds a fatal error in the mce_no_way_out() pre-scan of banks. If this was a local machine check, then this partially initialized struct mce is being passed to mce_panic(). Fix is simple: just initialize m->bank in the case of a fatal error. Fixes: 40c36e2741d7 ("x86/mce: Fix incorrect "Machine check from unknown source" message") Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Vishal Verma Cc: x86-ml Cc: stable@vger.kernel.org # v4.18 Note pre-v5.0 arch/x86/kernel/cpu/mce/core.c was called arch/x86/kernel/cpu/mcheck/mce.c Link: https://lkml.kernel.org/r/20190201003341.10638-1-tony.luck@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 98e4e4dc4a3b..54874e2b1d32 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -773,6 +773,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, quirk_no_way_out(i, m, regs); if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { + m->bank = i; mce_read_aux(m, i); *msg = tmp; return 1; From bbf850876fcff432c2ed1f6f7c4e144c2c22bd82 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 10 Jan 2019 14:27:45 +0000 Subject: [PATCH 1844/2608] perf/core: Don't WARN() for impossible ring-buffer sizes commit 9dff0aa95a324e262ffb03f425d00e4751f3294e upstream. The perf tool uses /proc/sys/kernel/perf_event_mlock_kb to determine how large its ringbuffer mmap should be. This can be configured to arbitrary values, which can be larger than the maximum possible allocation from kmalloc. When this is configured to a suitably large value (e.g. thanks to the perf fuzzer), attempting to use perf record triggers a WARN_ON_ONCE() in __alloc_pages_nodemask(): WARNING: CPU: 2 PID: 5666 at mm/page_alloc.c:4511 __alloc_pages_nodemask+0x3f8/0xbc8 Let's avoid this by checking that the requested allocation is possible before calling kzalloc. Reported-by: Julien Thierry Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Julien Thierry Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Link: https://lkml.kernel.org/r/20190110142745.25495-1-mark.rutland@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/ring_buffer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index c573c7339223..8b311340b241 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -719,6 +719,9 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) size = sizeof(struct ring_buffer); size += nr_pages * sizeof(void *); + if (order_base_2(size) >= MAX_ORDER) + goto fail; + rb = kzalloc(size, GFP_KERNEL); if (!rb) goto fail; From ee5470d3e1d8b6262cd7ac8b40d8242b2802d95f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 22 Jan 2019 17:34:39 -0600 Subject: [PATCH 1845/2608] perf tests evsel-tp-sched: Fix bitwise operator commit 489338a717a0dfbbd5a3fabccf172b78f0ac9015 upstream. Notice that the use of the bitwise OR operator '|' always leads to true in this particular case, which seems a bit suspicious due to the context in which this expression is being used. Fix this by using bitwise AND operator '&' instead. This bug was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@vger.kernel.org Fixes: 6a6cd11d4e57 ("perf test: Add test for the sched tracepoint format fields") Link: http://lkml.kernel.org/r/20190122233439.GA5868@embeddedor Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/tests/evsel-tp-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index 699561fa512c..67bcbf876776 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -17,7 +17,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name, return -1; } - is_signed = !!(field->flags | FIELD_IS_SIGNED); + is_signed = !!(field->flags & FIELD_IS_SIGNED); if (should_be_signed && !is_signed) { pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n", evsel->name, name, is_signed, should_be_signed); From cc616243765b9aea9baea1ed8d6adb39318a0c6b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 31 Jan 2019 17:43:16 +0800 Subject: [PATCH 1846/2608] serial: fix race between flush_to_ldisc and tty_open commit fedb5760648a291e949f2380d383b5b2d2749b5e upstream. There still is a race window after the commit b027e2298bd588 ("tty: fix data race between tty_init_dev and flush of buf"), and we encountered this crash issue if receive_buf call comes before tty initialization completes in tty_open and tty->driver_data may be NULL. CPU0 CPU1 ---- ---- tty_open tty_init_dev tty_ldisc_unlock schedule flush_to_ldisc receive_buf tty_port_default_receive_buf tty_ldisc_receive_buf n_tty_receive_buf_common __receive_buf uart_flush_chars uart_start /*tty->driver_data is NULL*/ tty->ops->open /*init tty->driver_data*/ it can be fixed by extending ldisc semaphore lock in tty_init_dev to driver_data initialized completely after tty->ops->open(), but this will lead to get lock on one function and unlock in some other function, and hard to maintain, so fix this race only by checking tty->driver_data when receiving, and return if tty->driver_data is NULL, and n_tty_receive_buf_common maybe calls uart_unthrottle, so add the same check. Because the tty layer knows nothing about the driver associated with the device, the tty layer can not do anything here, it is up to the tty driver itself to check for this type of race. Fix up the serial driver to correctly check to see if it is finished binding with the device when being called, and if not, abort the tty calls. [Description and problem report and testing from Li RongQing, I rewrote the patch to be in the serial layer, not in the tty core - gregkh] Reported-by: Li RongQing Tested-by: Li RongQing Signed-off-by: Wang Li Signed-off-by: Zhang Yu Signed-off-by: Li RongQing Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 94ac6c6e8fb8..51a58c367953 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -143,6 +143,9 @@ static void uart_start(struct tty_struct *tty) struct uart_port *port; unsigned long flags; + if (!state) + return; + port = uart_port_lock(state, flags); __uart_start(tty); uart_port_unlock(port, flags); @@ -2415,6 +2418,9 @@ static void uart_poll_put_char(struct tty_driver *driver, int line, char ch) struct uart_state *state = drv->state + line; struct uart_port *port; + if (!state) + return; + port = uart_port_ref(state); if (!port) return; From f43699b8f7ad220dd95dcab59a58e14efa6f6968 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 24 Jan 2019 23:51:21 +0200 Subject: [PATCH 1847/2608] serial: 8250_pci: Make PCI class test non fatal commit 824d17c57b0abbcb9128fb3f7327fae14761914b upstream. As has been reported the National Instruments serial cards have broken PCI class. The commit 7d8905d06405 ("serial: 8250_pci: Enable device after we check black list") made the PCI class check mandatory for the case when device is listed in a quirk list. Make PCI class test non fatal to allow broken card be enumerated. Fixes: 7d8905d06405 ("serial: 8250_pci: Enable device after we check black list") Cc: stable Reported-by: Guan Yung Tseng Tested-by: Guan Yung Tseng Tested-by: KHUENY.Gerhard Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 4986b4aebe80..790375b5eeb2 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -3425,6 +3425,11 @@ static int serial_pci_guess_board(struct pci_dev *dev, struct pciserial_board *board) { int num_iomem, num_port, first_port = -1, i; + int rc; + + rc = serial_pci_is_class_communication(dev); + if (rc) + return rc; /* * Should we try to make guesses for multiport serial devices later? @@ -3652,10 +3657,6 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) board = &pci_boards[ent->driver_data]; - rc = serial_pci_is_class_communication(dev); - if (rc) - return rc; - rc = serial_pci_is_blacklisted(dev); if (rc) return rc; From e74cfcb89e8f306da93230c373e4a667d670538c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 18 Oct 2017 16:17:18 -0400 Subject: [PATCH 1848/2608] nfsd4: fix cached replies to solo SEQUENCE compounds commit 085def3ade52f2ffe3e31f42e98c27dcc222dd37 upstream. Currently our handling of 4.1+ requests without "cachethis" set is confusing and not quite correct. Suppose a client sends a compound consisting of only a single SEQUENCE op, and it matches the seqid in a session slot (so it's a retry), but the previous request with that seqid did not have "cachethis" set. The obvious thing to do might be to return NFS4ERR_RETRY_UNCACHED_REP, but the protocol only allows that to be returned on the op following the SEQUENCE, and there is no such op in this case. The protocol permits us to cache replies even if the client didn't ask us to. And it's easy to do so in the case of solo SEQUENCE compounds. So, when we get a solo SEQUENCE, we can either return the previously cached reply or NFSERR_SEQ_FALSE_RETRY if we notice it differs in some way from the original call. Currently, we're returning a corrupt reply in the case a solo SEQUENCE matches a previous compound with more ops. This actually matters because the Linux client recently started doing this as a way to recover from lost replies to idempotent operations in the case the process doing the original reply was killed: in that case it's difficult to keep the original arguments around to do a real retry, and the client no longer cares what the result is anyway, but it would like to make sure that the slot's sequence id has been incremented, and the solo SEQUENCE assures that: if the server never got the original reply, it will increment the sequence id. If it did get the original reply, it won't increment, and nothing else that about the reply really matters much. But we can at least attempt to return valid xdr! Tested-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields Signed-off-by: Donald Buczek Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 20 +++++++++++++++----- fs/nfsd/state.h | 1 + fs/nfsd/xdr4.h | 13 +++++++++++-- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3cef6bfa09d4..177be048c17f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2331,14 +2331,16 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) dprintk("--> %s slot %p\n", __func__, slot); + slot->sl_flags |= NFSD4_SLOT_INITIALIZED; slot->sl_opcnt = resp->opcnt; slot->sl_status = resp->cstate.status; - slot->sl_flags |= NFSD4_SLOT_INITIALIZED; - if (nfsd4_not_cached(resp)) { - slot->sl_datalen = 0; + if (!nfsd4_cache_this(resp)) { + slot->sl_flags &= ~NFSD4_SLOT_CACHED; return; } + slot->sl_flags |= NFSD4_SLOT_CACHED; + base = resp->cstate.data_offset; slot->sl_datalen = buf->len - base; if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen)) @@ -2365,8 +2367,16 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, op = &args->ops[resp->opcnt - 1]; nfsd4_encode_operation(resp, op); - /* Return nfserr_retry_uncached_rep in next operation. */ - if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) { + if (slot->sl_flags & NFSD4_SLOT_CACHED) + return op->status; + if (args->opcnt == 1) { + /* + * The original operation wasn't a solo sequence--we + * always cache those--so this retry must not match the + * original: + */ + op->status = nfserr_seq_false_retry; + } else { op = &args->ops[resp->opcnt++]; op->status = nfserr_retry_uncached_rep; nfsd4_encode_operation(resp, op); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 005c911b34ac..2488b7df1b35 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -174,6 +174,7 @@ struct nfsd4_slot { #define NFSD4_SLOT_INUSE (1 << 0) #define NFSD4_SLOT_CACHETHIS (1 << 1) #define NFSD4_SLOT_INITIALIZED (1 << 2) +#define NFSD4_SLOT_CACHED (1 << 3) u8 sl_flags; char sl_data[]; }; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index aa4375eac475..f47c392cbd57 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -651,9 +651,18 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE; } -static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) +/* + * The session reply cache only needs to cache replies that the client + * actually asked us to. But it's almost free for us to cache compounds + * consisting of only a SEQUENCE op, so we may as well cache those too. + * Also, the protocol doesn't give us a convenient response in the case + * of a replay of a solo SEQUENCE op that wasn't cached + * (RETRY_UNCACHED_REP can only be returned in the second op of a + * compound). + */ +static inline bool nfsd4_cache_this(struct nfsd4_compoundres *resp) { - return !(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS) + return (resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS) || nfsd4_is_solo_sequence(resp); } From ff371bc83b1cfa868bebdd7a4164a26d47e7c023 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 17 Oct 2017 20:38:49 -0400 Subject: [PATCH 1849/2608] nfsd4: catch some false session retries commit 53da6a53e1d414e05759fa59b7032ee08f4e22d7 upstream. The spec allows us to return NFS4ERR_SEQ_FALSE_RETRY if we notice that the client is making a call that matches a previous (slot, seqid) pair but that *isn't* actually a replay, because some detail of the call doesn't actually match the previous one. Catching every such case is difficult, but we may as well catch a few easy ones. This also handles the case described in the previous patch, in a different way. The spec does however require us to catch the case where the difference is in the rpc credentials. This prevents somebody from snooping another user's replies by fabricating retries. (But the practical value of the attack is limited by the fact that the replies with the most sensitive data are READ replies, which are not normally cached.) Tested-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields Signed-off-by: Donald Buczek Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 37 ++++++++++++++++++++++++++++++++++++- fs/nfsd/state.h | 1 + 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 177be048c17f..94128643ec1a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1472,8 +1472,10 @@ free_session_slots(struct nfsd4_session *ses) { int i; - for (i = 0; i < ses->se_fchannel.maxreqs; i++) + for (i = 0; i < ses->se_fchannel.maxreqs; i++) { + free_svc_cred(&ses->se_slots[i]->sl_cred); kfree(ses->se_slots[i]); + } } /* @@ -2334,6 +2336,8 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) slot->sl_flags |= NFSD4_SLOT_INITIALIZED; slot->sl_opcnt = resp->opcnt; slot->sl_status = resp->cstate.status; + free_svc_cred(&slot->sl_cred); + copy_cred(&slot->sl_cred, &resp->rqstp->rq_cred); if (!nfsd4_cache_this(resp)) { slot->sl_flags &= ~NFSD4_SLOT_CACHED; @@ -3040,6 +3044,34 @@ static bool nfsd4_request_too_big(struct svc_rqst *rqstp, return xb->len > session->se_fchannel.maxreq_sz; } +static bool replay_matches_cache(struct svc_rqst *rqstp, + struct nfsd4_sequence *seq, struct nfsd4_slot *slot) +{ + struct nfsd4_compoundargs *argp = rqstp->rq_argp; + + if ((bool)(slot->sl_flags & NFSD4_SLOT_CACHETHIS) != + (bool)seq->cachethis) + return false; + /* + * If there's an error than the reply can have fewer ops than + * the call. But if we cached a reply with *more* ops than the + * call you're sending us now, then this new call is clearly not + * really a replay of the old one: + */ + if (slot->sl_opcnt < argp->opcnt) + return false; + /* This is the only check explicitly called by spec: */ + if (!same_creds(&rqstp->rq_cred, &slot->sl_cred)) + return false; + /* + * There may be more comparisons we could actually do, but the + * spec doesn't require us to catch every case where the calls + * don't match (that would require caching the call as well as + * the reply), so we don't bother. + */ + return true; +} + __be32 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -3099,6 +3131,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_seq_misordered; if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) goto out_put_session; + status = nfserr_seq_false_retry; + if (!replay_matches_cache(rqstp, seq, slot)) + goto out_put_session; cstate->slot = slot; cstate->session = session; cstate->clp = clp; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 2488b7df1b35..86aa92d200e1 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -169,6 +169,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) struct nfsd4_slot { u32 sl_seqid; __be32 sl_status; + struct svc_cred sl_cred; u32 sl_datalen; u16 sl_opcnt; #define NFSD4_SLOT_INUSE (1 << 0) From e36c4e23b970ca4c53812b4e9bf733ca17280f65 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 17 Jan 2019 12:42:16 -0800 Subject: [PATCH 1850/2608] IB/hfi1: Add limit test for RC/UC send via loopback commit 09ce351dff8e7636af0beb72cd4a86c3904a0500 upstream. Fix potential memory corruption and panic in loopback for IB_WR_SEND variants. The code blindly assumes the posted length will fit in the fetched rwqe, which is not a valid assumption. Fix by adding a limit test, and triggering the appropriate send completion and putting the QP in an error state. This mimics the handling for non-loopback QPs. Fixes: 15703461533a ("IB/{hfi1, qib, rdmavt}: Move ruc_loopback to rdmavt") Cc: #v4.20+ Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman Signed-off-by: Mike Marciniszyn --- drivers/infiniband/hw/hfi1/ruc.c | 7 ++++++- drivers/infiniband/hw/qib/qib_ruc.c | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 5866ccc0fc21..e8aaae4bd911 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -440,6 +440,8 @@ send: goto op_err; if (!ret) goto rnr_nak; + if (wqe->length > qp->r_len) + goto inv_err; break; case IB_WR_RDMA_WRITE_WITH_IMM: @@ -607,7 +609,10 @@ op_err: goto err; inv_err: - send_status = IB_WC_REM_INV_REQ_ERR; + send_status = + sqp->ibqp.qp_type == IB_QPT_RC ? + IB_WC_REM_INV_REQ_ERR : + IB_WC_SUCCESS; wc.status = IB_WC_LOC_QP_OP_ERR; goto err; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 53efbb0b40c4..dd812ad0d09f 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -425,6 +425,8 @@ again: goto op_err; if (!ret) goto rnr_nak; + if (wqe->length > qp->r_len) + goto inv_err; break; case IB_WR_RDMA_WRITE_WITH_IMM: @@ -585,7 +587,10 @@ op_err: goto err; inv_err: - send_status = IB_WC_REM_INV_REQ_ERR; + send_status = + sqp->ibqp.qp_type == IB_QPT_RC ? + IB_WC_REM_INV_REQ_ERR : + IB_WC_SUCCESS; wc.status = IB_WC_LOC_QP_OP_ERR; goto err; From db85eb4162da282b1c25b1d62ed01a434bfdd6d0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 19 Dec 2018 17:53:50 +0100 Subject: [PATCH 1851/2608] perf/x86/intel: Delay memory deallocation until x86_pmu_dead_cpu() commit 602cae04c4864bb3487dfe4c2126c8d9e7e1614a upstream. intel_pmu_cpu_prepare() allocated memory for ->shared_regs among other members of struct cpu_hw_events. This memory is released in intel_pmu_cpu_dying() which is wrong. The counterpart of the intel_pmu_cpu_prepare() callback is x86_pmu_dead_cpu(). Otherwise if the CPU fails on the UP path between CPUHP_PERF_X86_PREPARE and CPUHP_AP_PERF_X86_STARTING then it won't release the memory but allocate new memory on the next attempt to online the CPU (leaking the old memory). Also, if the CPU down path fails between CPUHP_AP_PERF_X86_STARTING and CPUHP_PERF_X86_PREPARE then the CPU will go back online but never allocate the memory that was released in x86_pmu_dying_cpu(). Make the memory allocation/free symmetrical in regard to the CPU hotplug notifier by moving the deallocation to intel_pmu_cpu_dead(). This started in commit: a7e3ed1e47011 ("perf: Add support for supplementary event registers"). In principle the bug was introduced in v2.6.39 (!), but it will almost certainly not backport cleanly across the big CPU hotplug rewrite between v4.7-v4.15... [ bigeasy: Added patch description. ] [ mingo: Added backporting guidance. ] Reported-by: He Zhe Signed-off-by: Peter Zijlstra (Intel) # With developer hat on Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) # With maintainer hat on Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: bp@alien8.de Cc: hpa@zytor.com Cc: jolsa@kernel.org Cc: kan.liang@linux.intel.com Cc: namhyung@kernel.org Cc: Fixes: a7e3ed1e47011 ("perf: Add support for supplementary event registers"). Link: https://lkml.kernel.org/r/20181219165350.6s3jvyxbibpvlhtq@linutronix.de Signed-off-by: Ingo Molnar [ He Zhe: Fixes conflict caused by missing disable_counter_freeze which is introduced since v4.20 af3bdb991a5cb. ] Signed-off-by: He Zhe Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7bb80151bfff..1cb5ff3ee728 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3419,6 +3419,11 @@ static void free_excl_cntrs(int cpu) } static void intel_pmu_cpu_dying(int cpu) +{ + fini_debug_store_on_cpu(cpu); +} + +static void intel_pmu_cpu_dead(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); struct intel_shared_regs *pc; @@ -3431,8 +3436,6 @@ static void intel_pmu_cpu_dying(int cpu) } free_excl_cntrs(cpu); - - fini_debug_store_on_cpu(cpu); } static void intel_pmu_sched_task(struct perf_event_context *ctx, @@ -3521,6 +3524,7 @@ static __initconst const struct x86_pmu core_pmu = { .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, + .cpu_dead = intel_pmu_cpu_dead, }; static struct attribute *intel_pmu_attrs[]; @@ -3560,6 +3564,8 @@ static __initconst const struct x86_pmu intel_pmu = { .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, + .cpu_dead = intel_pmu_cpu_dead, + .guest_get_msrs = intel_guest_get_msrs, .sched_task = intel_pmu_sched_task, }; From d2e843ea4804d2ab530065a033c4b71180463b0c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 2 Nov 2018 21:49:58 +0100 Subject: [PATCH 1852/2608] ath9k: dynack: make ewma estimation faster commit 0c60c490830a1a756c80f8de8d33d9c6359d4a36 upstream. In order to make propagation time estimation faster, use current sample as ewma output value during 'late ack' tracking Tested-by: Koen Vandeputte Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/ath9k.h | 2 +- drivers/net/wireless/ath/ath9k/dynack.c | 28 ++++++++++++++++++------- drivers/net/wireless/ath/ath9k/dynack.h | 6 ++++-- drivers/net/wireless/ath/ath9k/xmit.c | 5 +++-- 4 files changed, 29 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index cf076719c27e..f9339b5c3624 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -272,7 +272,7 @@ struct ath_node { #endif u8 key_idx[4]; - u32 ackto; + int ackto; struct list_head list; }; diff --git a/drivers/net/wireless/ath/ath9k/dynack.c b/drivers/net/wireless/ath/ath9k/dynack.c index cc0dc966c512..d2a7531ceafe 100644 --- a/drivers/net/wireless/ath/ath9k/dynack.c +++ b/drivers/net/wireless/ath/ath9k/dynack.c @@ -29,9 +29,13 @@ * ath_dynack_ewma - EWMA (Exponentially Weighted Moving Average) calculation * */ -static inline u32 ath_dynack_ewma(u32 old, u32 new) +static inline int ath_dynack_ewma(int old, int new) { - return (new * (EWMA_DIV - EWMA_LEVEL) + old * EWMA_LEVEL) / EWMA_DIV; + if (old > 0) + return (new * (EWMA_DIV - EWMA_LEVEL) + + old * EWMA_LEVEL) / EWMA_DIV; + else + return new; } /** @@ -82,10 +86,10 @@ static inline bool ath_dynack_bssidmask(struct ath_hw *ah, const u8 *mac) */ static void ath_dynack_compute_ackto(struct ath_hw *ah) { - struct ath_node *an; - u32 to = 0; - struct ath_dynack *da = &ah->dynack; struct ath_common *common = ath9k_hw_common(ah); + struct ath_dynack *da = &ah->dynack; + struct ath_node *an; + int to = 0; list_for_each_entry(an, &da->nodes, list) if (an->ackto > to) @@ -144,7 +148,8 @@ static void ath_dynack_compute_to(struct ath_hw *ah) an->ackto = ath_dynack_ewma(an->ackto, ackto); ath_dbg(ath9k_hw_common(ah), DYNACK, - "%pM to %u\n", dst, an->ackto); + "%pM to %d [%u]\n", dst, + an->ackto, ackto); if (time_is_before_jiffies(da->lto)) { ath_dynack_compute_ackto(ah); da->lto = jiffies + COMPUTE_TO; @@ -166,10 +171,12 @@ static void ath_dynack_compute_to(struct ath_hw *ah) * @ah: ath hw * @skb: socket buffer * @ts: tx status info + * @sta: station pointer * */ void ath_dynack_sample_tx_ts(struct ath_hw *ah, struct sk_buff *skb, - struct ath_tx_status *ts) + struct ath_tx_status *ts, + struct ieee80211_sta *sta) { u8 ridx; struct ieee80211_hdr *hdr; @@ -190,9 +197,16 @@ void ath_dynack_sample_tx_ts(struct ath_hw *ah, struct sk_buff *skb, ieee80211_is_assoc_resp(hdr->frame_control) || ieee80211_is_auth(hdr->frame_control)) { ath_dbg(common, DYNACK, "late ack\n"); + ath9k_hw_setslottime(ah, (LATEACK_TO - 3) / 2); ath9k_hw_set_ack_timeout(ah, LATEACK_TO); ath9k_hw_set_cts_timeout(ah, LATEACK_TO); + if (sta) { + struct ath_node *an; + + an = (struct ath_node *)sta->drv_priv; + an->ackto = -1; + } da->lto = jiffies + LATEACK_DELAY; } diff --git a/drivers/net/wireless/ath/ath9k/dynack.h b/drivers/net/wireless/ath/ath9k/dynack.h index 6d7bef976742..cf60224d40df 100644 --- a/drivers/net/wireless/ath/ath9k/dynack.h +++ b/drivers/net/wireless/ath/ath9k/dynack.h @@ -86,7 +86,8 @@ void ath_dynack_node_deinit(struct ath_hw *ah, struct ath_node *an); void ath_dynack_init(struct ath_hw *ah); void ath_dynack_sample_ack_ts(struct ath_hw *ah, struct sk_buff *skb, u32 ts); void ath_dynack_sample_tx_ts(struct ath_hw *ah, struct sk_buff *skb, - struct ath_tx_status *ts); + struct ath_tx_status *ts, + struct ieee80211_sta *sta); #else static inline void ath_dynack_init(struct ath_hw *ah) {} static inline void ath_dynack_node_init(struct ath_hw *ah, @@ -97,7 +98,8 @@ static inline void ath_dynack_sample_ack_ts(struct ath_hw *ah, struct sk_buff *skb, u32 ts) {} static inline void ath_dynack_sample_tx_ts(struct ath_hw *ah, struct sk_buff *skb, - struct ath_tx_status *ts) {} + struct ath_tx_status *ts, + struct ieee80211_sta *sta) {} #endif #endif /* DYNACK_H */ diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index fa64c1cc94ae..458c4f53ba5d 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -621,7 +621,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, if (bf == bf->bf_lastbf) ath_dynack_sample_tx_ts(sc->sc_ah, bf->bf_mpdu, - ts); + ts, sta); } ath_tx_complete_buf(sc, bf, txq, &bf_head, sta, ts, @@ -765,7 +765,8 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, memcpy(info->control.rates, bf->rates, sizeof(info->control.rates)); ath_tx_rc_status(sc, bf, ts, 1, txok ? 0 : 1, txok); - ath_dynack_sample_tx_ts(sc->sc_ah, bf->bf_mpdu, ts); + ath_dynack_sample_tx_ts(sc->sc_ah, bf->bf_mpdu, ts, + sta); } ath_tx_complete_buf(sc, bf, txq, bf_head, sta, ts, txok); } else From aa23996b28f10e54a74ff1261af044bec5b5c28a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 2 Nov 2018 21:49:57 +0100 Subject: [PATCH 1853/2608] ath9k: dynack: check da->enabled first in sampling routines commit 9d3d65a91f027b8a9af5e63752d9b78cb10eb92d upstream. Check da->enabled flag first in ath_dynack_sample_tx_ts and ath_dynack_sample_ack_ts routines in order to avoid useless processing Tested-by: Koen Vandeputte Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/dynack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/dynack.c b/drivers/net/wireless/ath/ath9k/dynack.c index d2a7531ceafe..6e236a485431 100644 --- a/drivers/net/wireless/ath/ath9k/dynack.c +++ b/drivers/net/wireless/ath/ath9k/dynack.c @@ -184,7 +184,7 @@ void ath_dynack_sample_tx_ts(struct ath_hw *ah, struct sk_buff *skb, struct ath_common *common = ath9k_hw_common(ah); struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - if ((info->flags & IEEE80211_TX_CTL_NO_ACK) || !da->enabled) + if (!da->enabled || (info->flags & IEEE80211_TX_CTL_NO_ACK)) return; spin_lock_bh(&da->qlock); @@ -266,7 +266,7 @@ void ath_dynack_sample_ack_ts(struct ath_hw *ah, struct sk_buff *skb, struct ath_common *common = ath9k_hw_common(ah); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - if (!ath_dynack_bssidmask(ah, hdr->addr1) || !da->enabled) + if (!da->enabled || !ath_dynack_bssidmask(ah, hdr->addr1)) return; spin_lock_bh(&da->qlock); From 383e9b61f85cc8e5f1fcb1493d95f826a2fa736e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 12 Feb 2019 19:46:14 +0100 Subject: [PATCH 1854/2608] Linux 4.14.99 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7f561ef954f2..3b10c8b542e2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 98 +SUBLEVEL = 99 EXTRAVERSION = NAME = Petit Gorille From 3ec492edfa17233b04a6525a731c4df82097ed32 Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Tue, 5 Feb 2019 16:52:51 +0100 Subject: [PATCH 1855/2608] mtd: rawnand: gpmi: fix MX28 bus master lockup problem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d5d27fd9826b59979b184ec288e4812abac0e988 upstream. Disable BCH soft reset according to MX23 erratum #2847 ("BCH soft reset may cause bus master lock up") for MX28 too. It has the same problem. Observed problem: once per 100,000+ MX28 reboots NAND read failed on DMA timeout errors: [ 1.770823] UBI: attaching mtd3 to ubi0 [ 2.768088] gpmi_nand: DMA timeout, last DMA :1 [ 3.958087] gpmi_nand: BCH timeout, last DMA :1 [ 4.156033] gpmi_nand: Error in ECC-based read: -110 [ 4.161136] UBI warning: ubi_io_read: error -110 while reading 64 bytes from PEB 0:0, read only 0 bytes, retry [ 4.171283] step 1 error [ 4.173846] gpmi_nand: Chip: 0, Error -1 Without BCH soft reset we successfully executed 1,000,000 MX28 reboots. I have a quote from NXP regarding this problem, from July 18th 2016: "As the i.MX23 and i.MX28 are of the same generation, they share many characteristics. Unfortunately, also the erratas may be shared. In case of the documented erratas and the workarounds, you can also apply the workaround solution of one device on the other one. This have been reported, but I’m afraid that there are not an estimated date for updating the Errata documents. Please accept our apologies for any inconveniences this may cause." Fixes: 6f2a6a52560a ("mtd: nand: gpmi: reset BCH earlier, too, to avoid NAND startup problems") Cc: stable@vger.kernel.org Signed-off-by: Manfred Schlaegl Signed-off-by: Martin Kepplinger Reviewed-by: Miquel Raynal Reviewed-by: Fabio Estevam Acked-by: Han Xu Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/gpmi-nand/gpmi-lib.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c index 97787246af41..55e369b6b862 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c @@ -168,9 +168,10 @@ int gpmi_init(struct gpmi_nand_data *this) /* * Reset BCH here, too. We got failures otherwise :( - * See later BCH reset for explanation of MX23 handling + * See later BCH reset for explanation of MX23 and MX28 handling */ - ret = gpmi_reset_block(r->bch_regs, GPMI_IS_MX23(this)); + ret = gpmi_reset_block(r->bch_regs, + GPMI_IS_MX23(this) || GPMI_IS_MX28(this)); if (ret) goto err_out; @@ -275,13 +276,11 @@ int bch_set_geometry(struct gpmi_nand_data *this) /* * Due to erratum #2847 of the MX23, the BCH cannot be soft reset on this - * chip, otherwise it will lock up. So we skip resetting BCH on the MX23. - * On the other hand, the MX28 needs the reset, because one case has been - * seen where the BCH produced ECC errors constantly after 10000 - * consecutive reboots. The latter case has not been seen on the MX23 - * yet, still we don't know if it could happen there as well. + * chip, otherwise it will lock up. So we skip resetting BCH on the MX23 + * and MX28. */ - ret = gpmi_reset_block(r->bch_regs, GPMI_IS_MX23(this)); + ret = gpmi_reset_block(r->bch_regs, + GPMI_IS_MX23(this) || GPMI_IS_MX28(this)); if (ret) goto err_out; From 4337e2073104a051982ab08724c5116c8ce1d325 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 5 Jan 2019 19:36:18 +0100 Subject: [PATCH 1856/2608] iio: adc: axp288: Fix TS-pin handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9bcf15f75cac3c6a00d8f8083a635de9c8537799 upstream. Prior to this commit there were 3 issues with our handling of the TS-pin: 1) There are 2 ways how the firmware can disable monitoring of the TS-pin for designs which do not have a temperature-sensor for the battery: a) Clearing bit 0 of the AXP20X_ADC_EN1 register b) Setting bit 2 of the AXP288_ADC_TS_PIN_CTRL monitoring Prior to this commit we were unconditionally setting both bits to the value used on devices with a TS. This causes the temperature protection to kick in on devices without a TS, such as the Jumper ezbook v2, causing them to not charge under Linux. This commit fixes this by using regmap_update_bits when updating these 2 registers, leaving the 2 mentioned bits alone. The next 2 problems are related to our handling of the current-source for the TS-pin. The current-source used for the battery temp-sensor (TS) is shared with the GPADC. For proper fuel-gauge and charger operation the TS current-source needs to be permanently on. But to read the GPADC we need to temporary switch the TS current-source to ondemand, so that the GPADC can use it, otherwise we will always read an all 0 value. 2) Problem 2 is we were writing hardcoded values to the ADC TS pin-ctrl register, overwriting various other unrelated bits. Specifically we were overwriting the current-source setting for the TS and GPIO0 pins, forcing it to 80ųA independent of its original setting. On a Chuwi Vi10 tablet this was causing us to get a too high adc value (due to a too high current-source) resulting in the following errors being logged: ACPI Error: AE_ERROR, Returned by Handler for [UserDefinedRegion] ACPI Error: Method parse/execution failed \_SB.SXP1._TMP, AE_ERROR This commit fixes this by using regmap_update_bits to change only the relevant bits. 3) After reading the GPADC channel we were unconditionally enabling the TS current-source even on devices where the TS-pin is not used and the current-source thus was off before axp288_adc_read_raw call. This commit fixes this by making axp288_adc_set_ts a nop on devices where the ADC is not enabled for the TS-pin. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1610545 Fixes: 3091141d7803 ("iio: adc: axp288: Fix the GPADC pin ...") Signed-off-by: Hans de Goede Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/axp288_adc.c | 76 ++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c index 462a99c13e7a..0153df01e7b6 100644 --- a/drivers/iio/adc/axp288_adc.c +++ b/drivers/iio/adc/axp288_adc.c @@ -27,9 +27,18 @@ #include #include -#define AXP288_ADC_EN_MASK 0xF1 -#define AXP288_ADC_TS_PIN_GPADC 0xF2 -#define AXP288_ADC_TS_PIN_ON 0xF3 +/* + * This mask enables all ADCs except for the battery temp-sensor (TS), that is + * left as-is to avoid breaking charging on devices without a temp-sensor. + */ +#define AXP288_ADC_EN_MASK 0xF0 +#define AXP288_ADC_TS_ENABLE 0x01 + +#define AXP288_ADC_TS_CURRENT_ON_OFF_MASK GENMASK(1, 0) +#define AXP288_ADC_TS_CURRENT_OFF (0 << 0) +#define AXP288_ADC_TS_CURRENT_ON_WHEN_CHARGING (1 << 0) +#define AXP288_ADC_TS_CURRENT_ON_ONDEMAND (2 << 0) +#define AXP288_ADC_TS_CURRENT_ON (3 << 0) enum axp288_adc_id { AXP288_ADC_TS, @@ -44,6 +53,7 @@ enum axp288_adc_id { struct axp288_adc_info { int irq; struct regmap *regmap; + bool ts_enabled; }; static const struct iio_chan_spec axp288_adc_channels[] = { @@ -123,21 +133,33 @@ static int axp288_adc_read_channel(int *val, unsigned long address, return IIO_VAL_INT; } -static int axp288_adc_set_ts(struct regmap *regmap, unsigned int mode, - unsigned long address) +/* + * The current-source used for the battery temp-sensor (TS) is shared + * with the GPADC. For proper fuel-gauge and charger operation the TS + * current-source needs to be permanently on. But to read the GPADC we + * need to temporary switch the TS current-source to ondemand, so that + * the GPADC can use it, otherwise we will always read an all 0 value. + */ +static int axp288_adc_set_ts(struct axp288_adc_info *info, + unsigned int mode, unsigned long address) { int ret; - /* channels other than GPADC do not need to switch TS pin */ + /* No need to switch the current-source if the TS pin is disabled */ + if (!info->ts_enabled) + return 0; + + /* Channels other than GPADC do not need the current source */ if (address != AXP288_GP_ADC_H) return 0; - ret = regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, mode); + ret = regmap_update_bits(info->regmap, AXP288_ADC_TS_PIN_CTRL, + AXP288_ADC_TS_CURRENT_ON_OFF_MASK, mode); if (ret) return ret; /* When switching to the GPADC pin give things some time to settle */ - if (mode == AXP288_ADC_TS_PIN_GPADC) + if (mode == AXP288_ADC_TS_CURRENT_ON_ONDEMAND) usleep_range(6000, 10000); return 0; @@ -153,14 +175,14 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev, mutex_lock(&indio_dev->mlock); switch (mask) { case IIO_CHAN_INFO_RAW: - if (axp288_adc_set_ts(info->regmap, AXP288_ADC_TS_PIN_GPADC, + if (axp288_adc_set_ts(info, AXP288_ADC_TS_CURRENT_ON_ONDEMAND, chan->address)) { dev_err(&indio_dev->dev, "GPADC mode\n"); ret = -EINVAL; break; } ret = axp288_adc_read_channel(val, chan->address, info->regmap); - if (axp288_adc_set_ts(info->regmap, AXP288_ADC_TS_PIN_ON, + if (axp288_adc_set_ts(info, AXP288_ADC_TS_CURRENT_ON, chan->address)) dev_err(&indio_dev->dev, "TS pin restore\n"); break; @@ -172,13 +194,35 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev, return ret; } -static int axp288_adc_set_state(struct regmap *regmap) +static int axp288_adc_initialize(struct axp288_adc_info *info) { - /* ADC should be always enabled for internal FG to function */ - if (regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, AXP288_ADC_TS_PIN_ON)) - return -EIO; + int ret, adc_enable_val; - return regmap_write(regmap, AXP20X_ADC_EN1, AXP288_ADC_EN_MASK); + /* + * Determine if the TS pin is enabled and set the TS current-source + * accordingly. + */ + ret = regmap_read(info->regmap, AXP20X_ADC_EN1, &adc_enable_val); + if (ret) + return ret; + + if (adc_enable_val & AXP288_ADC_TS_ENABLE) { + info->ts_enabled = true; + ret = regmap_update_bits(info->regmap, AXP288_ADC_TS_PIN_CTRL, + AXP288_ADC_TS_CURRENT_ON_OFF_MASK, + AXP288_ADC_TS_CURRENT_ON); + } else { + info->ts_enabled = false; + ret = regmap_update_bits(info->regmap, AXP288_ADC_TS_PIN_CTRL, + AXP288_ADC_TS_CURRENT_ON_OFF_MASK, + AXP288_ADC_TS_CURRENT_OFF); + } + if (ret) + return ret; + + /* Turn on the ADC for all channels except TS, leave TS as is */ + return regmap_update_bits(info->regmap, AXP20X_ADC_EN1, + AXP288_ADC_EN_MASK, AXP288_ADC_EN_MASK); } static const struct iio_info axp288_adc_iio_info = { @@ -209,7 +253,7 @@ static int axp288_adc_probe(struct platform_device *pdev) * Set ADC to enabled state at all time, including system suspend. * otherwise internal fuel gauge functionality may be affected. */ - ret = axp288_adc_set_state(axp20x->regmap); + ret = axp288_adc_initialize(info); if (ret) { dev_err(&pdev->dev, "unable to enable ADC device\n"); return ret; From 4c93500004f23cb198713c6bb35c946334e35821 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Sun, 30 Dec 2018 19:07:01 -0800 Subject: [PATCH 1857/2608] iio: chemical: atlas-ph-sensor: correct IIO_TEMP values to millicelsius commit 0808831dc62e90023ad14ff8da4804c7846e904b upstream. IIO_TEMP scale value for temperature was incorrect and not in millicelsius as required by the ABI documentation. Signed-off-by: Matt Ranostay Fixes: 27dec00ecf2d (iio: chemical: add Atlas pH-SM sensor support) Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/chemical/atlas-ph-sensor.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iio/chemical/atlas-ph-sensor.c b/drivers/iio/chemical/atlas-ph-sensor.c index ef761a508630..dad2a8be6830 100644 --- a/drivers/iio/chemical/atlas-ph-sensor.c +++ b/drivers/iio/chemical/atlas-ph-sensor.c @@ -453,9 +453,8 @@ static int atlas_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_SCALE: switch (chan->type) { case IIO_TEMP: - *val = 1; /* 0.01 */ - *val2 = 100; - break; + *val = 10; + return IIO_VAL_INT; case IIO_PH: *val = 1; /* 0.001 */ *val2 = 1000; @@ -486,7 +485,7 @@ static int atlas_write_raw(struct iio_dev *indio_dev, int val, int val2, long mask) { struct atlas_data *data = iio_priv(indio_dev); - __be32 reg = cpu_to_be32(val); + __be32 reg = cpu_to_be32(val / 10); if (val2 != 0 || val < 0 || val > 20000) return -EINVAL; From 3edbf7432556ccc1cade629b567972bd96414c63 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 6 Feb 2019 18:39:40 -0600 Subject: [PATCH 1858/2608] signal: Always notice exiting tasks commit 35634ffa1751b6efd8cf75010b509dcb0263e29b upstream. Recently syzkaller was able to create unkillablle processes by creating a timer that is delivered as a thread local signal on SIGHUP, and receiving SIGHUP SA_NODEFERER. Ultimately causing a loop failing to deliver SIGHUP but always trying. Upon examination it turns out part of the problem is actually most of the solution. Since 2.5 signal delivery has found all fatal signals, marked the signal group for death, and queued SIGKILL in every threads thread queue relying on signal->group_exit_code to preserve the information of which was the actual fatal signal. The conversion of all fatal signals to SIGKILL results in the synchronous signal heuristic in next_signal kicking in and preferring SIGHUP to SIGKILL. Which is especially problematic as all fatal signals have already been transformed into SIGKILL. Instead of dequeueing signals and depending upon SIGKILL to be the first signal dequeued, first test if the signal group has already been marked for death. This guarantees that nothing in the signal queue can prevent a process that needs to exit from exiting. Cc: stable@vger.kernel.org Tested-by: Dmitry Vyukov Reported-by: Dmitry Vyukov Ref: ebf5ebe31d2c ("[PATCH] signal-fixes-2.5.59-A4") History Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- kernel/signal.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/signal.c b/kernel/signal.c index 164c36ef0825..d39ca9f5835f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2225,6 +2225,11 @@ relock: goto relock; } + /* Has this task already been marked for death? */ + ksig->info.si_signo = signr = SIGKILL; + if (signal_group_exit(signal)) + goto fatal; + for (;;) { struct k_sigaction *ka; @@ -2320,6 +2325,7 @@ relock: continue; } + fatal: spin_unlock_irq(&sighand->siglock); /* From 284f7b1a09d73107ec4492dabd3a2b6db28122ee Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 6 Feb 2019 17:51:47 -0600 Subject: [PATCH 1859/2608] signal: Better detection of synchronous signals commit 7146db3317c67b517258cb5e1b08af387da0618b upstream. Recently syzkaller was able to create unkillablle processes by creating a timer that is delivered as a thread local signal on SIGHUP, and receiving SIGHUP SA_NODEFERER. Ultimately causing a loop failing to deliver SIGHUP but always trying. When the stack overflows delivery of SIGHUP fails and force_sigsegv is called. Unfortunately because SIGSEGV is numerically higher than SIGHUP next_signal tries again to deliver a SIGHUP. From a quality of implementation standpoint attempting to deliver the timer SIGHUP signal is wrong. We should attempt to deliver the synchronous SIGSEGV signal we just forced. We can make that happening in a fairly straight forward manner by instead of just looking at the signal number we also look at the si_code. In particular for exceptions (aka synchronous signals) the si_code is always greater than 0. That still has the potential to pick up a number of asynchronous signals as in a few cases the same si_codes that are used for synchronous signals are also used for asynchronous signals, and SI_KERNEL is also included in the list of possible si_codes. Still the heuristic is much better and timer signals are definitely excluded. Which is enough to prevent all known ways for someone sending a process signals fast enough to cause unexpected and arguably incorrect behavior. Cc: stable@vger.kernel.org Fixes: a27341cd5fcb ("Prioritize synchronous signals over 'normal' signals") Tested-by: Dmitry Vyukov Reported-by: Dmitry Vyukov Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- kernel/signal.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index d39ca9f5835f..04b3a621b3cc 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -672,6 +672,48 @@ void signal_wake_up_state(struct task_struct *t, unsigned int state) kick_process(t); } +static int dequeue_synchronous_signal(siginfo_t *info) +{ + struct task_struct *tsk = current; + struct sigpending *pending = &tsk->pending; + struct sigqueue *q, *sync = NULL; + + /* + * Might a synchronous signal be in the queue? + */ + if (!((pending->signal.sig[0] & ~tsk->blocked.sig[0]) & SYNCHRONOUS_MASK)) + return 0; + + /* + * Return the first synchronous signal in the queue. + */ + list_for_each_entry(q, &pending->list, list) { + /* Synchronous signals have a postive si_code */ + if ((q->info.si_code > SI_USER) && + (sigmask(q->info.si_signo) & SYNCHRONOUS_MASK)) { + sync = q; + goto next; + } + } + return 0; +next: + /* + * Check if there is another siginfo for the same signal. + */ + list_for_each_entry_continue(q, &pending->list, list) { + if (q->info.si_signo == sync->info.si_signo) + goto still_pending; + } + + sigdelset(&pending->signal, sync->info.si_signo); + recalc_sigpending(); +still_pending: + list_del_init(&sync->list); + copy_siginfo(info, &sync->info); + __sigqueue_free(sync); + return info->si_signo; +} + /* * Remove signals in mask from the pending set and queue. * Returns 1 if any signals were found. @@ -2243,7 +2285,15 @@ relock: goto relock; } - signr = dequeue_signal(current, ¤t->blocked, &ksig->info); + /* + * Signals generated by the execution of an instruction + * need to be delivered before any other pending signals + * so that the instruction pointer in the signal stack + * frame points to the faulting instruction. + */ + signr = dequeue_synchronous_signal(&ksig->info); + if (!signr) + signr = dequeue_signal(current, ¤t->blocked, &ksig->info); if (!signr) break; /* will return 0 */ From 7e21768dfff37e71fed34f3f2cfcfadfd55b7d07 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 3 Dec 2018 17:52:19 +0300 Subject: [PATCH 1860/2608] misc: vexpress: Off by one in vexpress_syscfg_exec() commit f8a70d8b889f180e6860cb1f85fed43d37844c5a upstream. The > comparison should be >= to prevent reading beyond the end of the func->template[] array. (The func->template array is allocated in vexpress_syscfg_regmap_init() and it has func->num_templates elements.) Fixes: 974cc7b93441 ("mfd: vexpress: Define the device as MFD cells") Signed-off-by: Dan Carpenter Acked-by: Sudeep Holla Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vexpress-syscfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/vexpress-syscfg.c b/drivers/misc/vexpress-syscfg.c index 2cde80c7bb93..9b4eba41ee5d 100644 --- a/drivers/misc/vexpress-syscfg.c +++ b/drivers/misc/vexpress-syscfg.c @@ -61,7 +61,7 @@ static int vexpress_syscfg_exec(struct vexpress_syscfg_func *func, int tries; long timeout; - if (WARN_ON(index > func->num_templates)) + if (WARN_ON(index >= func->num_templates)) return -EINVAL; command = readl(syscfg->base + SYS_CFGCTRL); From aef80fd68017793d14fddc3b4ef8e1c050b44faf Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 24 Jan 2019 14:45:03 +0200 Subject: [PATCH 1861/2608] samples: mei: use /dev/mei0 instead of /dev/mei commit c4a46acf1db3ce547d290c29e55b3476c78dd76c upstream. The device was moved from misc device to character devices to support multiple mei devices. Cc: #v4.9+ Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- samples/mei/mei-amt-version.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/mei/mei-amt-version.c b/samples/mei/mei-amt-version.c index 57d0d871dcf7..bb9988914a56 100644 --- a/samples/mei/mei-amt-version.c +++ b/samples/mei/mei-amt-version.c @@ -117,7 +117,7 @@ static bool mei_init(struct mei *me, const uuid_le *guid, me->verbose = verbose; - me->fd = open("/dev/mei", O_RDWR); + me->fd = open("/dev/mei0", O_RDWR); if (me->fd == -1) { mei_err(me, "Cannot establish a handle to the Intel MEI driver\n"); goto err; From c87bdb9e8148c9955192d4ef4acdb889a557cd6d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 23 Jan 2019 11:27:02 +0100 Subject: [PATCH 1862/2608] debugfs: fix debugfs_rename parameter checking commit d88c93f090f708c18195553b352b9f205e65418f upstream. debugfs_rename() needs to check that the dentries passed into it really are valid, as sometimes they are not (i.e. if the return value of another debugfs call is passed into this one.) So fix this up by properly checking if the two parent directories are errors (they are allowed to be NULL), and if the dentry to rename is not NULL or an error. Cc: stable Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index c59f015f386e..ccfe1e1cb6bc 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -766,6 +766,13 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *dentry = NULL, *trap; struct name_snapshot old_name; + if (IS_ERR(old_dir)) + return old_dir; + if (IS_ERR(new_dir)) + return new_dir; + if (IS_ERR_OR_NULL(old_dentry)) + return old_dentry; + trap = lock_rename(new_dir, old_dir); /* Source or destination directories don't exist? */ if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir)) From b49be10b2e624c4f50ae39239f112782addbcc4a Mon Sep 17 00:00:00 2001 From: Andreas Ziegler Date: Thu, 17 Jan 2019 14:30:23 +0100 Subject: [PATCH 1863/2608] tracing: uprobes: Fix typo in pr_fmt string commit ea6eb5e7d15e1838de335609994b4546e2abcaaf upstream. The subsystem-specific message prefix for uprobes was also "trace_kprobe: " instead of "trace_uprobe: " as described in the original commit message. Link: http://lkml.kernel.org/r/20190117133023.19292-1-andreas.ziegler@fau.de Cc: Ingo Molnar Cc: stable@vger.kernel.org Acked-by: Masami Hiramatsu Fixes: 7257634135c24 ("tracing/probe: Show subsystem name in messages") Signed-off-by: Andreas Ziegler Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_uprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index ea0d90a31fc9..86718c85d8d3 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -17,7 +17,7 @@ * Copyright (C) IBM Corporation, 2010-2012 * Author: Srikar Dronamraju */ -#define pr_fmt(fmt) "trace_kprobe: " fmt +#define pr_fmt(fmt) "trace_uprobe: " fmt #include #include From eb5c1f624e6383e7a41f07b42dc6140b31baf495 Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Wed, 6 Feb 2019 13:46:17 +0200 Subject: [PATCH 1864/2608] mips: cm: reprime error cause commit 05dc6001af0630e200ad5ea08707187fe5537e6d upstream. Accordingly to the documentation ---cut--- The GCR_ERROR_CAUSE.ERR_TYPE field and the GCR_ERROR_MULT.ERR_TYPE fields can be cleared by either a reset or by writing the current value of GCR_ERROR_CAUSE.ERR_TYPE to the GCR_ERROR_CAUSE.ERR_TYPE register. ---cut--- Do exactly this. Original value of cm_error may be safely written back; it clears error cause and keeps other bits untouched. Fixes: 3885c2b463f6 ("MIPS: CM: Add support for reporting CM cache errors") Signed-off-by: Vladimir Kondratiev Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/mips-cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index 8f5bd04f320a..7f3f136572de 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -457,5 +457,5 @@ void mips_cm_error_report(void) } /* reprime cause register */ - write_gcr_error_cause(0); + write_gcr_error_cause(cm_error); } From c87691444b5f2bbf6bf2fd374000615e5ffc0372 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 27 Jan 2019 23:28:33 +0200 Subject: [PATCH 1865/2608] MIPS: OCTEON: don't set octeon_dma_bar_type if PCI is disabled commit dcf300a69ac307053dfb35c2e33972e754a98bce upstream. Don't set octeon_dma_bar_type if PCI is disabled. This avoids creation of the MSI irqchip later on, and saves a bit of memory. Signed-off-by: Aaro Koskinen Signed-off-by: Paul Burton Fixes: a214720cbf50 ("Disable MSI also when pcie-octeon.pcie_disable on") Cc: stable@vger.kernel.org # v3.3+ Cc: linux-mips@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/pci/pci-octeon.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c index 3e92a06fa772..4adb8f1fcbc7 100644 --- a/arch/mips/pci/pci-octeon.c +++ b/arch/mips/pci/pci-octeon.c @@ -572,6 +572,11 @@ static int __init octeon_pci_setup(void) if (octeon_has_feature(OCTEON_FEATURE_PCIE)) return 0; + if (!octeon_is_pci_host()) { + pr_notice("Not in host mode, PCI Controller not initialized\n"); + return 0; + } + /* Point pcibios_map_irq() to the PCI version of it */ octeon_pcibios_map_irq = octeon_pci_pcibios_map_irq; @@ -583,11 +588,6 @@ static int __init octeon_pci_setup(void) else octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_BIG; - if (!octeon_is_pci_host()) { - pr_notice("Not in host mode, PCI Controller not initialized\n"); - return 0; - } - /* PCI I/O and PCI MEM values */ set_io_port_base(OCTEON_PCI_IOSPACE_BASE); ioport_resource.start = 0; From c2e0cb20f8fb5bd2dcc76d00c74c99223de8bd42 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 28 Jan 2019 23:16:22 +0000 Subject: [PATCH 1866/2608] MIPS: VDSO: Include $(ccflags-vdso) in o32,n32 .lds builds commit 67fc5dc8a541e8f458d7f08bf88ff55933bf9f9d upstream. When generating vdso-o32.lds & vdso-n32.lds for use with programs running as compat ABIs under 64b kernels, we previously haven't included the compiler flags that are supposedly common to all ABIs - ie. those in the ccflags-vdso variable. This is problematic in cases where we need to provide the -m%-float flag in order to ensure that we don't attempt to use a floating point ABI that's incompatible with the target CPU & ABI. For example a toolchain using current gcc trunk configured --with-fp-32=xx fails to build a 64r6el_defconfig kernel with the following error: cc1: error: '-march=mips1' requires '-mfp32' make[2]: *** [arch/mips/vdso/Makefile:135: arch/mips/vdso/vdso-o32.lds] Error 1 Include $(ccflags-vdso) for the compat VDSO .lds builds, just as it is included for the native VDSO .lds & when compiling objects for the compat VDSOs. This ensures we consistently provide the -msoft-float flag amongst others, avoiding the problem by ensuring we're agnostic to the toolchain defaults. Signed-off-by: Paul Burton Fixes: ebb5e78cc634 ("MIPS: Initial implementation of a VDSO") Cc: linux-mips@vger.kernel.org Cc: Kevin Hilman Cc: Guenter Roeck Cc: Maciej W . Rozycki Cc: stable@vger.kernel.org # v4.4+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/vdso/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index ce196046ac3e..d1a60690e690 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -121,7 +121,7 @@ $(obj)/%-o32.o: $(src)/%.c FORCE $(call cmd,force_checksrc) $(call if_changed_rule,cc_o_c) -$(obj)/vdso-o32.lds: KBUILD_CPPFLAGS := -mabi=32 +$(obj)/vdso-o32.lds: KBUILD_CPPFLAGS := $(ccflags-vdso) -mabi=32 $(obj)/vdso-o32.lds: $(src)/vdso.lds.S FORCE $(call if_changed_dep,cpp_lds_S) @@ -161,7 +161,7 @@ $(obj)/%-n32.o: $(src)/%.c FORCE $(call cmd,force_checksrc) $(call if_changed_rule,cc_o_c) -$(obj)/vdso-n32.lds: KBUILD_CPPFLAGS := -mabi=n32 +$(obj)/vdso-n32.lds: KBUILD_CPPFLAGS := $(ccflags-vdso) -mabi=n32 $(obj)/vdso-n32.lds: $(src)/vdso.lds.S FORCE $(call if_changed_dep,cpp_lds_S) From ec61525653a2d6c6914b1070681bdfc23c3c5885 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 25 Jan 2019 20:10:15 +0000 Subject: [PATCH 1867/2608] ARM: iop32x/n2100: fix PCI IRQ mapping commit db4090920ba2d61a5827a23e441447926a02ffee upstream. Booting 4.20 on a TheCUS N2100 results in a kernel oops while probing PCI, due to n2100_pci_map_irq() having been discarded during boot. Signed-off-by: Russell King Cc: stable@vger.kernel.org # 2.6.18+ Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-iop32x/n2100.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/mach-iop32x/n2100.c b/arch/arm/mach-iop32x/n2100.c index c1cd80ecc219..a904244264ce 100644 --- a/arch/arm/mach-iop32x/n2100.c +++ b/arch/arm/mach-iop32x/n2100.c @@ -75,8 +75,7 @@ void __init n2100_map_io(void) /* * N2100 PCI. */ -static int __init -n2100_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +static int n2100_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq; From 39551af0e4e6562a859b96aed99a011f756038b7 Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Wed, 16 Jan 2019 16:49:58 +0100 Subject: [PATCH 1868/2608] ARM: tango: Improve ARCH_MULTIPLATFORM compatibility commit d0f9f16788e15d9eb40f68b047732d49658c5a3a upstream. Calling platform-specific code unconditionally blows up when running an ARCH_MULTIPLATFORM kernel on a different platform. Don't do it. Reported-by: Paolo Pisati Signed-off-by: Marc Gonzalez Acked-by: Pavel Machek Cc: stable@vger.kernel.org # v4.8+ Fixes: a30eceb7a59d ("ARM: tango: add Suspend-to-RAM support") Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-tango/pm.c | 6 ++---- arch/arm/mach-tango/pm.h | 7 +++++++ arch/arm/mach-tango/setup.c | 2 ++ 3 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 arch/arm/mach-tango/pm.h diff --git a/arch/arm/mach-tango/pm.c b/arch/arm/mach-tango/pm.c index 028e50c6383f..a32c3b631484 100644 --- a/arch/arm/mach-tango/pm.c +++ b/arch/arm/mach-tango/pm.c @@ -3,6 +3,7 @@ #include #include #include "smc.h" +#include "pm.h" static int tango_pm_powerdown(unsigned long arg) { @@ -24,10 +25,7 @@ static const struct platform_suspend_ops tango_pm_ops = { .valid = suspend_valid_only_mem, }; -static int __init tango_pm_init(void) +void __init tango_pm_init(void) { suspend_set_ops(&tango_pm_ops); - return 0; } - -late_initcall(tango_pm_init); diff --git a/arch/arm/mach-tango/pm.h b/arch/arm/mach-tango/pm.h new file mode 100644 index 000000000000..35ea705a0ee2 --- /dev/null +++ b/arch/arm/mach-tango/pm.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifdef CONFIG_SUSPEND +void __init tango_pm_init(void); +#else +#define tango_pm_init NULL +#endif diff --git a/arch/arm/mach-tango/setup.c b/arch/arm/mach-tango/setup.c index 677dd7b5efd9..824f90737b04 100644 --- a/arch/arm/mach-tango/setup.c +++ b/arch/arm/mach-tango/setup.c @@ -2,6 +2,7 @@ #include #include #include "smc.h" +#include "pm.h" static void tango_l2c_write(unsigned long val, unsigned int reg) { @@ -15,4 +16,5 @@ DT_MACHINE_START(TANGO_DT, "Sigma Tango DT") .dt_compat = tango_dt_compat, .l2c_aux_mask = ~0, .l2c_write_sec = tango_l2c_write, + .init_late = tango_pm_init, MACHINE_END From 2243296f1144ddefb9f435c1f4f5e6042481dac7 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 29 Jan 2019 11:10:57 +0100 Subject: [PATCH 1869/2608] mac80211: ensure that mgmt tx skbs have tailroom for encryption commit 9d0f50b80222dc273e67e4e14410fcfa4130a90c upstream. Some drivers use IEEE80211_KEY_FLAG_SW_MGMT_TX to indicate that management frames need to be software encrypted. Since normal data packets are still encrypted by the hardware, crypto_tx_tailroom_needed_cnt gets decremented after key upload to hw. This can lead to passing skbs to ccmp_encrypt_skb, which don't have the necessary tailroom for software encryption. Change the code to add tailroom for encrypted management packets, even if crypto_tx_tailroom_needed_cnt is 0. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/tx.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 6b9bf9c027a2..305a4655f23e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1856,9 +1856,16 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, int head_need, bool may_encrypt) { struct ieee80211_local *local = sdata->local; + struct ieee80211_hdr *hdr; + bool enc_tailroom; int tail_need = 0; - if (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt) { + hdr = (struct ieee80211_hdr *) skb->data; + enc_tailroom = may_encrypt && + (sdata->crypto_tx_tailroom_needed_cnt || + ieee80211_is_mgmt(hdr->frame_control)); + + if (enc_tailroom) { tail_need = IEEE80211_ENCRYPT_TAILROOM; tail_need -= skb_tailroom(skb); tail_need = max_t(int, tail_need, 0); @@ -1866,8 +1873,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, if (skb_cloned(skb) && (!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) || - !skb_clone_writable(skb, ETH_HLEN) || - (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt))) + !skb_clone_writable(skb, ETH_HLEN) || enc_tailroom)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); From 90c7dfa37723f974c8a29155cc38e76ab8ea4a2d Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Wed, 23 Jan 2019 15:28:59 +0800 Subject: [PATCH 1870/2608] drm/modes: Prevent division by zero htotal commit a2fcd5c84f7a7825e028381b10182439067aa90d upstream. This patch prevents division by zero htotal. In a follow-up mail Tina writes: > > How did you manage to get here with htotal == 0? This needs backtraces (or if > > this is just about static checkers, a mention of that). > > -Daniel > > In GVT-g, we are trying to enable a virtual display w/o setting timings for a pipe > (a.k.a htotal=0), then we met the following kernel panic: > > [ 32.832048] divide error: 0000 [#1] SMP PTI > [ 32.833614] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.18.0-rc4-sriov+ #33 > [ 32.834438] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.10.1-0-g8891697-dirty-20180511_165818-tinazhang-linux-1 04/01/2014 > [ 32.835901] RIP: 0010:drm_mode_hsync+0x1e/0x40 > [ 32.836004] Code: 31 c0 c3 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 8b 87 d8 00 00 00 85 c0 75 22 8b 4f 68 85 c9 78 1b 69 47 58 e8 03 00 00 99 f9 b9 d3 4d 62 10 05 f4 01 00 00 f7 e1 89 d0 c1 e8 06 f3 c3 66 > [ 32.836004] RSP: 0000:ffffc900000ebb90 EFLAGS: 00010206 > [ 32.836004] RAX: 0000000000000000 RBX: ffff88001c67c8a0 RCX: 0000000000000000 > [ 32.836004] RDX: 0000000000000000 RSI: ffff88001c67c000 RDI: ffff88001c67c8a0 > [ 32.836004] RBP: ffff88001c7d03a0 R08: ffff88001c67c8a0 R09: ffff88001c7d0330 > [ 32.836004] R10: ffffffff822c3a98 R11: 0000000000000001 R12: ffff88001c67c000 > [ 32.836004] R13: ffff88001c7d0370 R14: ffffffff8207eb78 R15: ffff88001c67c800 > [ 32.836004] FS: 0000000000000000(0000) GS:ffff88001da00000(0000) knlGS:0000000000000000 > [ 32.836004] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 32.836004] CR2: 0000000000000000 CR3: 000000000220a000 CR4: 00000000000006f0 > [ 32.836004] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > [ 32.836004] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > [ 32.836004] Call Trace: > [ 32.836004] intel_mode_from_pipe_config+0x72/0x90 > [ 32.836004] intel_modeset_setup_hw_state+0x569/0xf90 > [ 32.836004] intel_modeset_init+0x905/0x1db0 > [ 32.836004] i915_driver_load+0xb8c/0x1120 > [ 32.836004] i915_pci_probe+0x4d/0xb0 > [ 32.836004] local_pci_probe+0x44/0xa0 > [ 32.836004] ? pci_assign_irq+0x27/0x130 > [ 32.836004] pci_device_probe+0x102/0x1c0 > [ 32.836004] driver_probe_device+0x2b8/0x480 > [ 32.836004] __driver_attach+0x109/0x110 > [ 32.836004] ? driver_probe_device+0x480/0x480 > [ 32.836004] bus_for_each_dev+0x67/0xc0 > [ 32.836004] ? klist_add_tail+0x3b/0x70 > [ 32.836004] bus_add_driver+0x1e8/0x260 > [ 32.836004] driver_register+0x5b/0xe0 > [ 32.836004] ? mipi_dsi_bus_init+0x11/0x11 > [ 32.836004] do_one_initcall+0x4d/0x1eb > [ 32.836004] kernel_init_freeable+0x197/0x237 > [ 32.836004] ? rest_init+0xd0/0xd0 > [ 32.836004] kernel_init+0xa/0x110 > [ 32.836004] ret_from_fork+0x35/0x40 > [ 32.836004] Modules linked in: > [ 32.859183] ---[ end trace 525608b0ed0e8665 ]--- > [ 32.859722] RIP: 0010:drm_mode_hsync+0x1e/0x40 > [ 32.860287] Code: 31 c0 c3 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 8b 87 d8 00 00 00 85 c0 75 22 8b 4f 68 85 c9 78 1b 69 47 58 e8 03 00 00 99 f9 b9 d3 4d 62 10 05 f4 01 00 00 f7 e1 89 d0 c1 e8 06 f3 c3 66 > [ 32.862680] RSP: 0000:ffffc900000ebb90 EFLAGS: 00010206 > [ 32.863309] RAX: 0000000000000000 RBX: ffff88001c67c8a0 RCX: 0000000000000000 > [ 32.864182] RDX: 0000000000000000 RSI: ffff88001c67c000 RDI: ffff88001c67c8a0 > [ 32.865206] RBP: ffff88001c7d03a0 R08: ffff88001c67c8a0 R09: ffff88001c7d0330 > [ 32.866359] R10: ffffffff822c3a98 R11: 0000000000000001 R12: ffff88001c67c000 > [ 32.867213] R13: ffff88001c7d0370 R14: ffffffff8207eb78 R15: ffff88001c67c800 > [ 32.868075] FS: 0000000000000000(0000) GS:ffff88001da00000(0000) knlGS:0000000000000000 > [ 32.868983] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 32.869659] CR2: 0000000000000000 CR3: 000000000220a000 CR4: 00000000000006f0 > [ 32.870599] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > [ 32.871598] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > [ 32.872549] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b > > Since drm_mode_hsync() has the logic to check mode->htotal, I just extend it to cover the case htotal==0. Signed-off-by: Tina Zhang Cc: Adam Jackson Cc: Dave Airlie Cc: Daniel Vetter [danvet: Add additional explanations + cc: stable.] Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1548228539-3061-1-git-send-email-tina.zhang@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_modes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 4a3f68a33844..3e3035c9e96b 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -751,7 +751,7 @@ int drm_mode_hsync(const struct drm_display_mode *mode) if (mode->hsync) return mode->hsync; - if (mode->htotal < 0) + if (mode->htotal <= 0) return 0; calc_val = (mode->clock * 1000) / mode->htotal; /* hsync in Hz */ From d69e310cbfcbbf5c7b409c477005355487aa79f4 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 28 Jan 2019 10:31:33 +0100 Subject: [PATCH 1871/2608] drm/vmwgfx: Fix setting of dma masks commit 4cbfa1e6c09e98450aab3240e5119b0ab2c9795b upstream. Previously we set only the dma mask and not the coherent mask. Fix that. Also, for clarity, make sure both are initially set to 64 bits. Cc: Fixes: 0d00c488f3de: ("drm/vmwgfx: Fix the driver for large dma addresses") Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 86d25f18aa99..3bc7915097ad 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -604,13 +604,16 @@ out_fixup: static int vmw_dma_masks(struct vmw_private *dev_priv) { struct drm_device *dev = dev_priv->dev; + int ret = 0; - if (intel_iommu_enabled && + ret = dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)); + if (dev_priv->map_mode != vmw_dma_phys && (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask)) { DRM_INFO("Restricting DMA addresses to 44 bits.\n"); - return dma_set_mask(dev->dev, DMA_BIT_MASK(44)); + return dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(44)); } - return 0; + + return ret; } #else static int vmw_dma_masks(struct vmw_private *dev_priv) From 7fa5536f92fe647c2462c8a64a129706f1a8da63 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 31 Jan 2019 10:55:37 +0100 Subject: [PATCH 1872/2608] drm/vmwgfx: Return error code from vmw_execbuf_copy_fence_user commit 728354c005c36eaf44b6e5552372b67e60d17f56 upstream. The function was unconditionally returning 0, and a caller would have to rely on the returned fence pointer being NULL to detect errors. However, the function vmw_execbuf_copy_fence_user() would expect a non-zero error code in that case and would BUG otherwise. So make sure we return a proper non-zero error code if the fence pointer returned is NULL. Cc: Fixes: ae2a104058e2: ("vmwgfx: Implement fence objects") Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 87e8af5776a3..49c28a48c5ab 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -3818,7 +3818,7 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv, *p_fence = NULL; } - return 0; + return ret; } /** From e0f784bf571528011a7421021f72dbe4bfe10a7c Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Tue, 29 Jan 2019 11:58:35 +0100 Subject: [PATCH 1873/2608] HID: debug: fix the ring buffer implementation commit 13054abbaa4f1fd4e6f3b4b63439ec033b4c8035 upstream. Ring buffer implementation in hid_debug_event() and hid_debug_events_read() is strange allowing lost or corrupted data. After commit 717adfdaf147 ("HID: debug: check length before copy_to_user()") it is possible to enter an infinite loop in hid_debug_events_read() by providing 0 as count, this locks up a system. Fix this by rewriting the ring buffer implementation with kfifo and simplify the code. This fixes CVE-2019-3819. v2: fix an execution logic and add a comment v3: use __set_current_state() instead of set_current_state() Backport to v4.14: 2 tree-wide patches 6396bb22151 ("treewide: kzalloc() -> kcalloc()") and a9a08845e9ac ("vfs: do bulk POLL* -> EPOLL* replacement") are missing in v4.14 so cherry-pick relevant pieces. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1669187 Cc: stable@vger.kernel.org # v4.18+ Fixes: cd667ce24796 ("HID: use debugfs for events/reports dumping") Fixes: 717adfdaf147 ("HID: debug: check length before copy_to_user()") Signed-off-by: Vladis Dronov Reviewed-by: Oleg Nesterov Signed-off-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-debug.c | 124 +++++++++++++++----------------------- include/linux/hid-debug.h | 9 ++- 2 files changed, 53 insertions(+), 80 deletions(-) diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index ae8c8e66a6c4..a90967cd4987 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -457,7 +458,7 @@ static char *resolv_usage_page(unsigned page, struct seq_file *f) { char *buf = NULL; if (!f) { - buf = kzalloc(sizeof(char) * HID_DEBUG_BUFSIZE, GFP_ATOMIC); + buf = kzalloc(HID_DEBUG_BUFSIZE, GFP_ATOMIC); if (!buf) return ERR_PTR(-ENOMEM); } @@ -661,17 +662,12 @@ EXPORT_SYMBOL_GPL(hid_dump_device); /* enqueue string to 'events' ring buffer */ void hid_debug_event(struct hid_device *hdev, char *buf) { - unsigned i; struct hid_debug_list *list; unsigned long flags; spin_lock_irqsave(&hdev->debug_list_lock, flags); - list_for_each_entry(list, &hdev->debug_list, node) { - for (i = 0; buf[i]; i++) - list->hid_debug_buf[(list->tail + i) % HID_DEBUG_BUFSIZE] = - buf[i]; - list->tail = (list->tail + i) % HID_DEBUG_BUFSIZE; - } + list_for_each_entry(list, &hdev->debug_list, node) + kfifo_in(&list->hid_debug_fifo, buf, strlen(buf)); spin_unlock_irqrestore(&hdev->debug_list_lock, flags); wake_up_interruptible(&hdev->debug_wait); @@ -722,8 +718,7 @@ void hid_dump_input(struct hid_device *hdev, struct hid_usage *usage, __s32 valu hid_debug_event(hdev, buf); kfree(buf); - wake_up_interruptible(&hdev->debug_wait); - + wake_up_interruptible(&hdev->debug_wait); } EXPORT_SYMBOL_GPL(hid_dump_input); @@ -1088,8 +1083,8 @@ static int hid_debug_events_open(struct inode *inode, struct file *file) goto out; } - if (!(list->hid_debug_buf = kzalloc(sizeof(char) * HID_DEBUG_BUFSIZE, GFP_KERNEL))) { - err = -ENOMEM; + err = kfifo_alloc(&list->hid_debug_fifo, HID_DEBUG_FIFOSIZE, GFP_KERNEL); + if (err) { kfree(list); goto out; } @@ -1109,77 +1104,57 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct hid_debug_list *list = file->private_data; - int ret = 0, len; + int ret = 0, copied; DECLARE_WAITQUEUE(wait, current); mutex_lock(&list->read_mutex); - while (ret == 0) { - if (list->head == list->tail) { - add_wait_queue(&list->hdev->debug_wait, &wait); - set_current_state(TASK_INTERRUPTIBLE); + if (kfifo_is_empty(&list->hid_debug_fifo)) { + add_wait_queue(&list->hdev->debug_wait, &wait); + set_current_state(TASK_INTERRUPTIBLE); - while (list->head == list->tail) { - if (file->f_flags & O_NONBLOCK) { - ret = -EAGAIN; - break; - } - if (signal_pending(current)) { - ret = -ERESTARTSYS; - break; - } - - if (!list->hdev || !list->hdev->debug) { - ret = -EIO; - set_current_state(TASK_RUNNING); - goto out; - } - - /* allow O_NONBLOCK from other threads */ - mutex_unlock(&list->read_mutex); - schedule(); - mutex_lock(&list->read_mutex); - set_current_state(TASK_INTERRUPTIBLE); + while (kfifo_is_empty(&list->hid_debug_fifo)) { + if (file->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + break; } - set_current_state(TASK_RUNNING); - remove_wait_queue(&list->hdev->debug_wait, &wait); + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + /* if list->hdev is NULL we cannot remove_wait_queue(). + * if list->hdev->debug is 0 then hid_debug_unregister() + * was already called and list->hdev is being destroyed. + * if we add remove_wait_queue() here we can hit a race. + */ + if (!list->hdev || !list->hdev->debug) { + ret = -EIO; + set_current_state(TASK_RUNNING); + goto out; + } + + /* allow O_NONBLOCK from other threads */ + mutex_unlock(&list->read_mutex); + schedule(); + mutex_lock(&list->read_mutex); + set_current_state(TASK_INTERRUPTIBLE); } + __set_current_state(TASK_RUNNING); + remove_wait_queue(&list->hdev->debug_wait, &wait); + if (ret) goto out; - - /* pass the ringbuffer contents to userspace */ -copy_rest: - if (list->tail == list->head) - goto out; - if (list->tail > list->head) { - len = list->tail - list->head; - if (len > count) - len = count; - - if (copy_to_user(buffer + ret, &list->hid_debug_buf[list->head], len)) { - ret = -EFAULT; - goto out; - } - ret += len; - list->head += len; - } else { - len = HID_DEBUG_BUFSIZE - list->head; - if (len > count) - len = count; - - if (copy_to_user(buffer, &list->hid_debug_buf[list->head], len)) { - ret = -EFAULT; - goto out; - } - list->head = 0; - ret += len; - count -= len; - if (count > 0) - goto copy_rest; - } - } + + /* pass the fifo content to userspace, locking is not needed with only + * one concurrent reader and one concurrent writer + */ + ret = kfifo_to_user(&list->hid_debug_fifo, buffer, count, &copied); + if (ret) + goto out; + ret = copied; out: mutex_unlock(&list->read_mutex); return ret; @@ -1190,7 +1165,7 @@ static unsigned int hid_debug_events_poll(struct file *file, poll_table *wait) struct hid_debug_list *list = file->private_data; poll_wait(file, &list->hdev->debug_wait, wait); - if (list->head != list->tail) + if (!kfifo_is_empty(&list->hid_debug_fifo)) return POLLIN | POLLRDNORM; if (!list->hdev->debug) return POLLERR | POLLHUP; @@ -1205,7 +1180,7 @@ static int hid_debug_events_release(struct inode *inode, struct file *file) spin_lock_irqsave(&list->hdev->debug_list_lock, flags); list_del(&list->node); spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags); - kfree(list->hid_debug_buf); + kfifo_free(&list->hid_debug_fifo); kfree(list); return 0; @@ -1256,4 +1231,3 @@ void hid_debug_exit(void) { debugfs_remove_recursive(hid_debug_root); } - diff --git a/include/linux/hid-debug.h b/include/linux/hid-debug.h index 8663f216c563..2d6100edf204 100644 --- a/include/linux/hid-debug.h +++ b/include/linux/hid-debug.h @@ -24,7 +24,10 @@ #ifdef CONFIG_DEBUG_FS +#include + #define HID_DEBUG_BUFSIZE 512 +#define HID_DEBUG_FIFOSIZE 512 void hid_dump_input(struct hid_device *, struct hid_usage *, __s32); void hid_dump_report(struct hid_device *, int , u8 *, int); @@ -37,11 +40,8 @@ void hid_debug_init(void); void hid_debug_exit(void); void hid_debug_event(struct hid_device *, char *); - struct hid_debug_list { - char *hid_debug_buf; - int head; - int tail; + DECLARE_KFIFO_PTR(hid_debug_fifo, char); struct fasync_struct *fasync; struct hid_device *hdev; struct list_head node; @@ -64,4 +64,3 @@ struct hid_debug_list { #endif #endif - From 850d47601371d800a4e8d46ac08a09c5c3aa3891 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 31 Jan 2019 23:41:11 -0500 Subject: [PATCH 1874/2608] Revert "ext4: use ext4_write_inode() when fsyncing w/o a journal" commit 8fdd60f2ae3682caf2a7258626abc21eb4711892 upstream. This reverts commit ad211f3e94b314a910d4af03178a0b52a7d1ee0a. As Jan Kara pointed out, this change was unsafe since it means we lose the call to sync_mapping_buffers() in the nojournal case. The original point of the commit was avoid taking the inode mutex (since it causes a lockdep warning in generic/113); but we need the mutex in order to call sync_mapping_buffers(). The real fix to this problem was discussed here: https://lore.kernel.org/lkml/20181025150540.259281-4-bvanassche@acm.org The proposed patch was to fix a syzbot complaint, but the problem can also demonstrated via "kvm-xfstests -c nojournal generic/113". Multiple solutions were discused in the e-mail thread, but none have landed in the kernel as of this writing. Anyway, commit ad211f3e94b314 is absolutely the wrong way to suppress the lockdep, so revert it. Fixes: ad211f3e94b314a910d4af03178a0b52a7d1ee0a ("ext4: use ext4_write_inode() when fsyncing w/o a journal") Signed-off-by: Theodore Ts'o Reported: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext4/fsync.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 712f00995390..5508baa11bb6 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -116,16 +116,8 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } - ret = file_write_and_wait_range(file, start, end); - if (ret) - return ret; - if (!journal) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL - }; - - ret = ext4_write_inode(inode, &wbc); + ret = __generic_file_fsync(file, start, end, datasync); if (!ret) ret = ext4_sync_parent(inode); if (test_opt(inode->i_sb, BARRIER)) @@ -133,6 +125,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } + ret = file_write_and_wait_range(file, start, end); + if (ret) + return ret; /* * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. From 264c83c2fe7b78b545b52b2bc7cc88855eac6c78 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 14 Jan 2019 21:13:10 +0100 Subject: [PATCH 1875/2608] libceph: avoid KEEPALIVE_PENDING races in ceph_con_keepalive() commit 4aac9228d16458cedcfd90c7fb37211cf3653ac3 upstream. con_fault() can transition the connection into STANDBY right after ceph_con_keepalive() clears STANDBY in clear_standby(): libceph user thread ceph-msgr worker ceph_con_keepalive() mutex_lock(&con->mutex) clear_standby(con) mutex_unlock(&con->mutex) mutex_lock(&con->mutex) con_fault() ... if KEEPALIVE_PENDING isn't set set state to STANDBY ... mutex_unlock(&con->mutex) set KEEPALIVE_PENDING set WRITE_PENDING This triggers warnings in clear_standby() when either ceph_con_send() or ceph_con_keepalive() get to clearing STANDBY next time. I don't see a reason to condition queue_con() call on the previous value of KEEPALIVE_PENDING, so move the setting of KEEPALIVE_PENDING into the critical section -- unlike WRITE_PENDING, KEEPALIVE_PENDING could have been a non-atomic flag. Reported-by: syzbot+acdeb633f6211ccdf886@syzkaller.appspotmail.com Signed-off-by: Ilya Dryomov Tested-by: Myungho Jung Signed-off-by: Greg Kroah-Hartman --- net/ceph/messenger.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index f864807284d4..5fd222dc64b3 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -3210,9 +3210,10 @@ void ceph_con_keepalive(struct ceph_connection *con) dout("con_keepalive %p\n", con); mutex_lock(&con->mutex); clear_standby(con); + con_flag_set(con, CON_FLAG_KEEPALIVE_PENDING); mutex_unlock(&con->mutex); - if (con_flag_test_and_set(con, CON_FLAG_KEEPALIVE_PENDING) == 0 && - con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0) + + if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0) queue_con(con); } EXPORT_SYMBOL(ceph_con_keepalive); From bc09fc5d8acd819a3e06f2894975a328e375e2f7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 9 Jan 2019 14:37:34 +0100 Subject: [PATCH 1876/2608] xfrm: refine validation of template and selector families commit 35e6103861a3a970de6c84688c6e7a1f65b164ca upstream. The check assumes that in transport mode, the first templates family must match the address family of the policy selector. Syzkaller managed to build a template using MODE_ROUTEOPTIMIZATION, with ipv4-in-ipv6 chain, leading to following splat: BUG: KASAN: stack-out-of-bounds in xfrm_state_find+0x1db/0x1854 Read of size 4 at addr ffff888063e57aa0 by task a.out/2050 xfrm_state_find+0x1db/0x1854 xfrm_tmpl_resolve+0x100/0x1d0 xfrm_resolve_and_create_bundle+0x108/0x1000 [..] Problem is that addresses point into flowi4 struct, but xfrm_state_find treats them as being ipv6 because it uses templ->encap_family is used (AF_INET6 in case of reproducer) rather than family (AF_INET). This patch inverts the logic: Enforce 'template family must match selector' EXCEPT for tunnel and BEET mode. In BEET and Tunnel mode, xfrm_tmpl_resolve_one will have remote/local address pointers changed to point at the addresses found in the template, rather than the flowi ones, so no oob read will occur. Reported-by: 3ntr0py1337@gmail.com Reported-by: Daniel Borkmann Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4e8319766f2b..9ff9255d2191 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1445,10 +1445,15 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) if (!ut[i].family) ut[i].family = family; - if ((ut[i].mode == XFRM_MODE_TRANSPORT) && - (ut[i].family != prev_family)) - return -EINVAL; - + switch (ut[i].mode) { + case XFRM_MODE_TUNNEL: + case XFRM_MODE_BEET: + break; + default: + if (ut[i].family != prev_family) + return -EINVAL; + break; + } if (ut[i].mode >= XFRM_MODE_MAX) return -EINVAL; From d66213863ade314c5f24c07aba74e31fd8b8c3b1 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 30 Dec 2018 12:46:01 +0100 Subject: [PATCH 1877/2608] batman-adv: Avoid WARN on net_device without parent in netns commit 955d3411a17f590364238bd0d3329b61f20c1cd2 upstream. It is not allowed to use WARN* helpers on potential incorrect input from the user or transient problems because systems configured as panic_on_warn will reboot due to such a problem. A NULL return value of __dev_get_by_index can be caused by various problems which can either be related to the system configuration or problems (incorrectly returned network namespaces) in other (virtual) net_device drivers. batman-adv should not cause a (harmful) WARN in this situation and instead only report it via a simple message. Fixes: b7eddd0b3950 ("batman-adv: prevent using any virtual device created on batman-adv as hard-interface") Reported-by: syzbot+c764de0fcfadca9a8595@syzkaller.appspotmail.com Reported-by: Dmitry Vyukov Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/hard-interface.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index e348f76ea8c1..2e1a084b0bd2 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -19,7 +19,6 @@ #include "main.h" #include -#include #include #include #include @@ -172,8 +171,10 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) parent_dev = __dev_get_by_index((struct net *)parent_net, dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ - if (WARN(!parent_dev, "Cannot find parent device")) + if (!parent_dev) { + pr_err("Cannot find parent device\n"); return false; + } if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net)) return false; From 0ee47efddd0568853d8ad548e00d1681351d12ec Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 31 Dec 2018 22:31:01 +0100 Subject: [PATCH 1878/2608] batman-adv: Force mac header to start of data on xmit commit 9114daa825fc3f335f9bea3313ce667090187280 upstream. The caller of ndo_start_xmit may not already have called skb_reset_mac_header. The returned value of skb_mac_header/eth_hdr therefore can be in the wrong position and even outside the current skbuff. This for example happens when the user binds to the device using a PF_PACKET-SOCK_RAW with enabled qdisc-bypass: int opt = 4; setsockopt(sock, SOL_PACKET, PACKET_QDISC_BYPASS, &opt, sizeof(opt)); Since eth_hdr is used all over the codebase, the batadv_interface_tx function must always take care of resetting it. Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Reported-by: syzbot+9d7405c7faa390e60b4e@syzkaller.appspotmail.com Reported-by: syzbot+7d20bc3f1ddddc0f9079@syzkaller.appspotmail.com Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/soft-interface.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 8cedb5db1ab3..3a80beef247c 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -213,6 +213,8 @@ static int batadv_interface_tx(struct sk_buff *skb, netif_trans_update(soft_iface); vid = batadv_get_vid(skb, 0); + + skb_reset_mac_header(skb); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { From ffc2faad73247fe57fd64c4f315dbacb5cd34572 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Jul 2017 16:50:30 +0200 Subject: [PATCH 1879/2608] perf tests attr: Fix task term values commit 10836d9f9ac63d40ccfa756f871ce4ed51ae3b52 upstream. The perf_event_attr::task is 1 by default for first (tracking) event in the session. Setting task=1 as default and adding task=0 for cases that need it. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas-Mich Richter Link: http://lkml.kernel.org/r/20170703145030.12903-16-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Cc: "Rantala, Tommi T. (Nokia - FI/Espoo)" Signed-off-by: Greg Kroah-Hartman --- tools/perf/tests/attr/base-record | 2 +- tools/perf/tests/attr/test-record-group | 1 + tools/perf/tests/attr/test-record-group-sampling | 2 +- tools/perf/tests/attr/test-record-group1 | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index 31e0b1da830b..37940665f736 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -23,7 +23,7 @@ comm=1 freq=1 inherit_stat=0 enable_on_exec=1 -task=0 +task=1 watermark=0 precise_ip=0|1|2|3 mmap_data=0 diff --git a/tools/perf/tests/attr/test-record-group b/tools/perf/tests/attr/test-record-group index 6e7961f6f7a5..618ba1c17474 100644 --- a/tools/perf/tests/attr/test-record-group +++ b/tools/perf/tests/attr/test-record-group @@ -17,5 +17,6 @@ sample_type=327 read_format=4 mmap=0 comm=0 +task=0 enable_on_exec=0 disabled=0 diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling index ef59afd6d635..f906b793196f 100644 --- a/tools/perf/tests/attr/test-record-group-sampling +++ b/tools/perf/tests/attr/test-record-group-sampling @@ -23,7 +23,7 @@ sample_type=343 # PERF_FORMAT_ID | PERF_FORMAT_GROUP read_format=12 - +task=0 mmap=0 comm=0 enable_on_exec=0 diff --git a/tools/perf/tests/attr/test-record-group1 b/tools/perf/tests/attr/test-record-group1 index 87a222d014d8..48e8bd12fe46 100644 --- a/tools/perf/tests/attr/test-record-group1 +++ b/tools/perf/tests/attr/test-record-group1 @@ -18,5 +18,6 @@ sample_type=327 read_format=4 mmap=0 comm=0 +task=0 enable_on_exec=0 disabled=0 From 3fbfbd393fbe0b064e39df81775538490e0625c4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 28 Sep 2017 18:06:33 +0200 Subject: [PATCH 1880/2608] perf tests attr: Fix group stat tests commit f6a9820d572bd8384d982357cbad214b3a6c04bb upstream. We started to use group read whenever it's possible: 82bf311e15d2 perf stat: Use group read for event groups That breaks some of attr tests, this change adds the new possible read_format value. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Martin Schwidefsky Cc: Thomas-Mich Richter LPU-Reference: 20170928160633.GA26973@krava Link: http://lkml.kernel.org/n/tip-1ko2zc4nph93d8lfwjyk9ivz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Cc: "Rantala, Tommi T. (Nokia - FI/Espoo)" Signed-off-by: Greg Kroah-Hartman --- tools/perf/tests/attr/test-stat-group | 2 ++ tools/perf/tests/attr/test-stat-group1 | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tools/perf/tests/attr/test-stat-group b/tools/perf/tests/attr/test-stat-group index fdc1596a8862..e15d6946e9b3 100644 --- a/tools/perf/tests/attr/test-stat-group +++ b/tools/perf/tests/attr/test-stat-group @@ -6,6 +6,7 @@ ret = 1 [event-1:base-stat] fd=1 group_fd=-1 +read_format=3|15 [event-2:base-stat] fd=2 @@ -13,3 +14,4 @@ group_fd=1 config=1 disabled=0 enable_on_exec=0 +read_format=3|15 diff --git a/tools/perf/tests/attr/test-stat-group1 b/tools/perf/tests/attr/test-stat-group1 index 2a1f86e4a904..1746751123dc 100644 --- a/tools/perf/tests/attr/test-stat-group1 +++ b/tools/perf/tests/attr/test-stat-group1 @@ -6,6 +6,7 @@ ret = 1 [event-1:base-stat] fd=1 group_fd=-1 +read_format=3|15 [event-2:base-stat] fd=2 @@ -13,3 +14,4 @@ group_fd=1 config=1 disabled=0 enable_on_exec=0 +read_format=3|15 From 943f5f2a7d70f117a730dceb58131957f3c337c8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 9 Oct 2017 15:07:12 +0200 Subject: [PATCH 1881/2608] perf tests attr: Make hw events optional commit 692f5a22cd284bb8233a38e3ed86881d2d9c89d4 upstream. Otherwise we fail on virtual machines with no support for specific HW events. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171009130712.14747-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Cc: "Rantala, Tommi T. (Nokia - FI/Espoo)" Signed-off-by: Greg Kroah-Hartman --- tools/perf/tests/attr/test-stat-C0 | 1 + tools/perf/tests/attr/test-stat-basic | 1 + tools/perf/tests/attr/test-stat-default | 4 ++++ tools/perf/tests/attr/test-stat-detailed-1 | 8 ++++++++ tools/perf/tests/attr/test-stat-detailed-2 | 13 +++++++++++++ tools/perf/tests/attr/test-stat-detailed-3 | 13 +++++++++++++ tools/perf/tests/attr/test-stat-no-inherit | 1 + 7 files changed, 41 insertions(+) diff --git a/tools/perf/tests/attr/test-stat-C0 b/tools/perf/tests/attr/test-stat-C0 index 67717fe6a65d..a2c76d10b2bb 100644 --- a/tools/perf/tests/attr/test-stat-C0 +++ b/tools/perf/tests/attr/test-stat-C0 @@ -7,3 +7,4 @@ ret = 1 # events are disabled by default when attached to cpu disabled=1 enable_on_exec=0 +optional=1 diff --git a/tools/perf/tests/attr/test-stat-basic b/tools/perf/tests/attr/test-stat-basic index 74e17881f2ba..69867d049fda 100644 --- a/tools/perf/tests/attr/test-stat-basic +++ b/tools/perf/tests/attr/test-stat-basic @@ -4,3 +4,4 @@ args = -e cycles kill >/dev/null 2>&1 ret = 1 [event:base-stat] +optional=1 diff --git a/tools/perf/tests/attr/test-stat-default b/tools/perf/tests/attr/test-stat-default index e911dbd4eb47..d9e99b3f77e6 100644 --- a/tools/perf/tests/attr/test-stat-default +++ b/tools/perf/tests/attr/test-stat-default @@ -32,6 +32,7 @@ config=2 fd=5 type=0 config=0 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND [event6:base-stat] @@ -52,15 +53,18 @@ optional=1 fd=8 type=0 config=1 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS [event9:base-stat] fd=9 type=0 config=4 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES [event10:base-stat] fd=10 type=0 config=5 +optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-1 b/tools/perf/tests/attr/test-stat-detailed-1 index b39270a08e74..8b04a055d154 100644 --- a/tools/perf/tests/attr/test-stat-detailed-1 +++ b/tools/perf/tests/attr/test-stat-detailed-1 @@ -33,6 +33,7 @@ config=2 fd=5 type=0 config=0 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND [event6:base-stat] @@ -53,18 +54,21 @@ optional=1 fd=8 type=0 config=1 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS [event9:base-stat] fd=9 type=0 config=4 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES [event10:base-stat] fd=10 type=0 config=5 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | @@ -74,6 +78,7 @@ config=5 fd=11 type=3 config=0 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | @@ -83,6 +88,7 @@ config=0 fd=12 type=3 config=65536 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_LL << 0 | @@ -92,6 +98,7 @@ config=65536 fd=13 type=3 config=2 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_LL << 0 | @@ -101,3 +108,4 @@ config=2 fd=14 type=3 config=65538 +optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-2 b/tools/perf/tests/attr/test-stat-detailed-2 index 45f8e6ea34f8..4fca9f1bfbf8 100644 --- a/tools/perf/tests/attr/test-stat-detailed-2 +++ b/tools/perf/tests/attr/test-stat-detailed-2 @@ -33,6 +33,7 @@ config=2 fd=5 type=0 config=0 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND [event6:base-stat] @@ -53,18 +54,21 @@ optional=1 fd=8 type=0 config=1 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS [event9:base-stat] fd=9 type=0 config=4 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES [event10:base-stat] fd=10 type=0 config=5 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | @@ -74,6 +78,7 @@ config=5 fd=11 type=3 config=0 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | @@ -83,6 +88,7 @@ config=0 fd=12 type=3 config=65536 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_LL << 0 | @@ -92,6 +98,7 @@ config=65536 fd=13 type=3 config=2 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_LL << 0 | @@ -101,6 +108,7 @@ config=2 fd=14 type=3 config=65538 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_L1I << 0 | @@ -120,6 +128,7 @@ optional=1 fd=16 type=3 config=65537 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_DTLB << 0 | @@ -129,6 +138,7 @@ config=65537 fd=17 type=3 config=3 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_DTLB << 0 | @@ -138,6 +148,7 @@ config=3 fd=18 type=3 config=65539 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_ITLB << 0 | @@ -147,6 +158,7 @@ config=65539 fd=19 type=3 config=4 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_ITLB << 0 | @@ -156,3 +168,4 @@ config=4 fd=20 type=3 config=65540 +optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-3 b/tools/perf/tests/attr/test-stat-detailed-3 index 30ae0fb7a3fd..4bb58e1c82a6 100644 --- a/tools/perf/tests/attr/test-stat-detailed-3 +++ b/tools/perf/tests/attr/test-stat-detailed-3 @@ -33,6 +33,7 @@ config=2 fd=5 type=0 config=0 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND [event6:base-stat] @@ -53,18 +54,21 @@ optional=1 fd=8 type=0 config=1 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS [event9:base-stat] fd=9 type=0 config=4 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES [event10:base-stat] fd=10 type=0 config=5 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | @@ -74,6 +78,7 @@ config=5 fd=11 type=3 config=0 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | @@ -83,6 +88,7 @@ config=0 fd=12 type=3 config=65536 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_LL << 0 | @@ -92,6 +98,7 @@ config=65536 fd=13 type=3 config=2 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_LL << 0 | @@ -101,6 +108,7 @@ config=2 fd=14 type=3 config=65538 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_L1I << 0 | @@ -120,6 +128,7 @@ optional=1 fd=16 type=3 config=65537 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_DTLB << 0 | @@ -129,6 +138,7 @@ config=65537 fd=17 type=3 config=3 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_DTLB << 0 | @@ -138,6 +148,7 @@ config=3 fd=18 type=3 config=65539 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_ITLB << 0 | @@ -147,6 +158,7 @@ config=65539 fd=19 type=3 config=4 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_ITLB << 0 | @@ -156,6 +168,7 @@ config=4 fd=20 type=3 config=65540 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_L1D << 0 | diff --git a/tools/perf/tests/attr/test-stat-no-inherit b/tools/perf/tests/attr/test-stat-no-inherit index d54b2a1e3e28..924fbb9300d1 100644 --- a/tools/perf/tests/attr/test-stat-no-inherit +++ b/tools/perf/tests/attr/test-stat-no-inherit @@ -5,3 +5,4 @@ ret = 1 [event:base-stat] inherit=0 +optional=1 From cd4fe6339ccd9638da1304160abe60b84115dee5 Mon Sep 17 00:00:00 2001 From: Hamish Martin Date: Wed, 13 Feb 2019 16:29:24 +0000 Subject: [PATCH 1882/2608] uio: Reduce return paths from uio_write() commit 81daa406c2cc97d85eef9409400404efc2a3f756 upstream. Drive all return paths for uio_write() through a single block at the end of the function. Signed-off-by: Hamish Martin Reviewed-by: Chris Packham Signed-off-by: Greg Kroah-Hartman Signed-off-by: Tommi Rantala Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 654579bc1e54..10f249628e79 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -570,20 +570,29 @@ static ssize_t uio_write(struct file *filep, const char __user *buf, ssize_t retval; s32 irq_on; - if (!idev->info->irq) - return -EIO; + if (!idev->info->irq) { + retval = -EIO; + goto out; + } - if (count != sizeof(s32)) - return -EINVAL; + if (count != sizeof(s32)) { + retval = -EINVAL; + goto out; + } - if (!idev->info->irqcontrol) - return -ENOSYS; + if (!idev->info->irqcontrol) { + retval = -ENOSYS; + goto out; + } - if (copy_from_user(&irq_on, buf, count)) - return -EFAULT; + if (copy_from_user(&irq_on, buf, count)) { + retval = -EFAULT; + goto out; + } retval = idev->info->irqcontrol(idev->info, irq_on); +out: return retval ? retval : sizeof(s32); } From 085d735c858934e5d5bfaedb1fc98bd9135e6ff1 Mon Sep 17 00:00:00 2001 From: Hamish Martin Date: Wed, 13 Feb 2019 16:29:29 +0000 Subject: [PATCH 1883/2608] uio: Prevent device destruction while fds are open commit a93e7b331568227500186a465fee3c2cb5dffd1f upstream. Prevent destruction of a uio_device while user space apps hold open file descriptors to that device. Further, access to the 'info' member of the struct uio_device is protected by spinlock. This is to ensure stale pointers to data not under control of the UIO subsystem are not dereferenced. Signed-off-by: Hamish Martin Reviewed-by: Chris Packham Signed-off-by: Greg Kroah-Hartman [4.14 change __poll_t to unsigned int] Signed-off-by: Tommi Rantala Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 98 ++++++++++++++++++++++++++++---------- include/linux/uio_driver.h | 4 +- 2 files changed, 75 insertions(+), 27 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 10f249628e79..288c4b977184 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -272,7 +272,7 @@ static int uio_dev_add_attributes(struct uio_device *idev) if (!map_found) { map_found = 1; idev->map_dir = kobject_create_and_add("maps", - &idev->dev->kobj); + &idev->dev.kobj); if (!idev->map_dir) { ret = -ENOMEM; goto err_map; @@ -301,7 +301,7 @@ static int uio_dev_add_attributes(struct uio_device *idev) if (!portio_found) { portio_found = 1; idev->portio_dir = kobject_create_and_add("portio", - &idev->dev->kobj); + &idev->dev.kobj); if (!idev->portio_dir) { ret = -ENOMEM; goto err_portio; @@ -344,7 +344,7 @@ err_map_kobj: kobject_put(&map->kobj); } kobject_put(idev->map_dir); - dev_err(idev->dev, "error creating sysfs files (%d)\n", ret); + dev_err(&idev->dev, "error creating sysfs files (%d)\n", ret); return ret; } @@ -381,7 +381,7 @@ static int uio_get_minor(struct uio_device *idev) idev->minor = retval; retval = 0; } else if (retval == -ENOSPC) { - dev_err(idev->dev, "too many uio devices\n"); + dev_err(&idev->dev, "too many uio devices\n"); retval = -EINVAL; } mutex_unlock(&minor_lock); @@ -435,6 +435,7 @@ static int uio_open(struct inode *inode, struct file *filep) struct uio_device *idev; struct uio_listener *listener; int ret = 0; + unsigned long flags; mutex_lock(&minor_lock); idev = idr_find(&uio_idr, iminor(inode)); @@ -444,9 +445,11 @@ static int uio_open(struct inode *inode, struct file *filep) goto out; } + get_device(&idev->dev); + if (!try_module_get(idev->owner)) { ret = -ENODEV; - goto out; + goto err_module_get; } listener = kmalloc(sizeof(*listener), GFP_KERNEL); @@ -459,11 +462,13 @@ static int uio_open(struct inode *inode, struct file *filep) listener->event_count = atomic_read(&idev->event); filep->private_data = listener; - if (idev->info->open) { + spin_lock_irqsave(&idev->info_lock, flags); + if (idev->info && idev->info->open) ret = idev->info->open(idev->info, inode); - if (ret) - goto err_infoopen; - } + spin_unlock_irqrestore(&idev->info_lock, flags); + if (ret) + goto err_infoopen; + return 0; err_infoopen: @@ -472,6 +477,9 @@ err_infoopen: err_alloc_listener: module_put(idev->owner); +err_module_get: + put_device(&idev->dev); + out: return ret; } @@ -489,12 +497,16 @@ static int uio_release(struct inode *inode, struct file *filep) int ret = 0; struct uio_listener *listener = filep->private_data; struct uio_device *idev = listener->dev; + unsigned long flags; - if (idev->info->release) + spin_lock_irqsave(&idev->info_lock, flags); + if (idev->info && idev->info->release) ret = idev->info->release(idev->info, inode); + spin_unlock_irqrestore(&idev->info_lock, flags); module_put(idev->owner); kfree(listener); + put_device(&idev->dev); return ret; } @@ -502,9 +514,16 @@ static unsigned int uio_poll(struct file *filep, poll_table *wait) { struct uio_listener *listener = filep->private_data; struct uio_device *idev = listener->dev; + unsigned int ret = 0; + unsigned long flags; - if (!idev->info->irq) - return -EIO; + spin_lock_irqsave(&idev->info_lock, flags); + if (!idev->info || !idev->info->irq) + ret = -EIO; + spin_unlock_irqrestore(&idev->info_lock, flags); + + if (ret) + return ret; poll_wait(filep, &idev->wait, wait); if (listener->event_count != atomic_read(&idev->event)) @@ -518,11 +537,17 @@ static ssize_t uio_read(struct file *filep, char __user *buf, struct uio_listener *listener = filep->private_data; struct uio_device *idev = listener->dev; DECLARE_WAITQUEUE(wait, current); - ssize_t retval; + ssize_t retval = 0; s32 event_count; + unsigned long flags; - if (!idev->info->irq) - return -EIO; + spin_lock_irqsave(&idev->info_lock, flags); + if (!idev->info || !idev->info->irq) + retval = -EIO; + spin_unlock_irqrestore(&idev->info_lock, flags); + + if (retval) + return retval; if (count != sizeof(s32)) return -EINVAL; @@ -569,8 +594,10 @@ static ssize_t uio_write(struct file *filep, const char __user *buf, struct uio_device *idev = listener->dev; ssize_t retval; s32 irq_on; + unsigned long flags; - if (!idev->info->irq) { + spin_lock_irqsave(&idev->info_lock, flags); + if (!idev->info || !idev->info->irq) { retval = -EIO; goto out; } @@ -593,6 +620,7 @@ static ssize_t uio_write(struct file *filep, const char __user *buf, retval = idev->info->irqcontrol(idev->info, irq_on); out: + spin_unlock_irqrestore(&idev->info_lock, flags); return retval ? retval : sizeof(s32); } @@ -809,6 +837,13 @@ static void release_uio_class(void) uio_major_cleanup(); } +static void uio_device_release(struct device *dev) +{ + struct uio_device *idev = dev_get_drvdata(dev); + + kfree(idev); +} + /** * uio_register_device - register a new userspace IO device * @owner: module that creates the new device @@ -832,13 +867,14 @@ int __uio_register_device(struct module *owner, info->uio_dev = NULL; - idev = devm_kzalloc(parent, sizeof(*idev), GFP_KERNEL); + idev = kzalloc(sizeof(*idev), GFP_KERNEL); if (!idev) { return -ENOMEM; } idev->owner = owner; idev->info = info; + spin_lock_init(&idev->info_lock); init_waitqueue_head(&idev->wait); atomic_set(&idev->event, 0); @@ -846,14 +882,19 @@ int __uio_register_device(struct module *owner, if (ret) return ret; - idev->dev = device_create(&uio_class, parent, - MKDEV(uio_major, idev->minor), idev, - "uio%d", idev->minor); - if (IS_ERR(idev->dev)) { - printk(KERN_ERR "UIO: device register failed\n"); - ret = PTR_ERR(idev->dev); + idev->dev.devt = MKDEV(uio_major, idev->minor); + idev->dev.class = &uio_class; + idev->dev.parent = parent; + idev->dev.release = uio_device_release; + dev_set_drvdata(&idev->dev, idev); + + ret = dev_set_name(&idev->dev, "uio%d", idev->minor); + if (ret) + goto err_device_create; + + ret = device_register(&idev->dev); + if (ret) goto err_device_create; - } ret = uio_dev_add_attributes(idev); if (ret) @@ -883,7 +924,7 @@ int __uio_register_device(struct module *owner, err_request_irq: uio_dev_del_attributes(idev); err_uio_dev_add_attributes: - device_destroy(&uio_class, MKDEV(uio_major, idev->minor)); + device_unregister(&idev->dev); err_device_create: uio_free_minor(idev); return ret; @@ -898,6 +939,7 @@ EXPORT_SYMBOL_GPL(__uio_register_device); void uio_unregister_device(struct uio_info *info) { struct uio_device *idev; + unsigned long flags; if (!info || !info->uio_dev) return; @@ -911,7 +953,11 @@ void uio_unregister_device(struct uio_info *info) if (info->irq && info->irq != UIO_IRQ_CUSTOM) free_irq(info->irq, idev); - device_destroy(&uio_class, MKDEV(uio_major, idev->minor)); + spin_lock_irqsave(&idev->info_lock, flags); + idev->info = NULL; + spin_unlock_irqrestore(&idev->info_lock, flags); + + device_unregister(&idev->dev); return; } diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 3c85c81b0027..6c5f2074e14f 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -14,6 +14,7 @@ #ifndef _UIO_DRIVER_H_ #define _UIO_DRIVER_H_ +#include #include #include @@ -68,12 +69,13 @@ struct uio_port { struct uio_device { struct module *owner; - struct device *dev; + struct device dev; int minor; atomic_t event; struct fasync_struct *async_queue; wait_queue_head_t wait; struct uio_info *info; + spinlock_t info_lock; struct kobject *map_dir; struct kobject *portio_dir; }; From 8a68c55d15af981361ce324d90ee06e8276d330c Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 13 Feb 2019 16:29:31 +0000 Subject: [PATCH 1884/2608] uio: use request_threaded_irq instead commit 9421e45f5ff3d558cf8b75a8cc0824530caf3453 upstream. Prepraing for changing to use mutex lock. Signed-off-by: Xiubo Li Signed-off-by: Greg Kroah-Hartman Signed-off-by: Tommi Rantala Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 288c4b977184..c97945a3f572 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -911,8 +911,9 @@ int __uio_register_device(struct module *owner, * FDs at the time of unregister and therefore may not be * freed until they are released. */ - ret = request_irq(info->irq, uio_interrupt, - info->irq_flags, info->name, idev); + ret = request_threaded_irq(info->irq, NULL, uio_interrupt, + info->irq_flags, info->name, idev); + if (ret) { info->uio_dev = NULL; goto err_request_irq; From 3f400c2c2e7043b64c65d9b8709a0b353e1f8384 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 13 Feb 2019 16:29:33 +0000 Subject: [PATCH 1885/2608] uio: change to use the mutex lock instead of the spin lock commit 543af5861f41af0a5d2432f6fb5976af50f9cee5 upstream. We are hitting a regression with the following commit: commit a93e7b331568227500186a465fee3c2cb5dffd1f Author: Hamish Martin Date: Mon May 14 13:32:23 2018 +1200 uio: Prevent device destruction while fds are open The problem is the addition of spin_lock_irqsave in uio_write. This leads to hitting uio_write -> copy_from_user -> _copy_from_user -> might_fault and the logs filling up with sleeping warnings. I also noticed some uio drivers allocate memory, sleep, grab mutexes from callouts like open() and release and uio is now doing spin_lock_irqsave while calling them. Reported-by: Mike Christie CC: Hamish Martin Reviewed-by: Hamish Martin Signed-off-by: Xiubo Li Signed-off-by: Greg Kroah-Hartman Signed-off-by: Tommi Rantala Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 32 +++++++++++++------------------- include/linux/uio_driver.h | 2 +- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index c97945a3f572..4441235a56cc 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -435,7 +435,6 @@ static int uio_open(struct inode *inode, struct file *filep) struct uio_device *idev; struct uio_listener *listener; int ret = 0; - unsigned long flags; mutex_lock(&minor_lock); idev = idr_find(&uio_idr, iminor(inode)); @@ -462,10 +461,10 @@ static int uio_open(struct inode *inode, struct file *filep) listener->event_count = atomic_read(&idev->event); filep->private_data = listener; - spin_lock_irqsave(&idev->info_lock, flags); + mutex_lock(&idev->info_lock); if (idev->info && idev->info->open) ret = idev->info->open(idev->info, inode); - spin_unlock_irqrestore(&idev->info_lock, flags); + mutex_unlock(&idev->info_lock); if (ret) goto err_infoopen; @@ -497,12 +496,11 @@ static int uio_release(struct inode *inode, struct file *filep) int ret = 0; struct uio_listener *listener = filep->private_data; struct uio_device *idev = listener->dev; - unsigned long flags; - spin_lock_irqsave(&idev->info_lock, flags); + mutex_lock(&idev->info_lock); if (idev->info && idev->info->release) ret = idev->info->release(idev->info, inode); - spin_unlock_irqrestore(&idev->info_lock, flags); + mutex_unlock(&idev->info_lock); module_put(idev->owner); kfree(listener); @@ -515,12 +513,11 @@ static unsigned int uio_poll(struct file *filep, poll_table *wait) struct uio_listener *listener = filep->private_data; struct uio_device *idev = listener->dev; unsigned int ret = 0; - unsigned long flags; - spin_lock_irqsave(&idev->info_lock, flags); + mutex_lock(&idev->info_lock); if (!idev->info || !idev->info->irq) ret = -EIO; - spin_unlock_irqrestore(&idev->info_lock, flags); + mutex_unlock(&idev->info_lock); if (ret) return ret; @@ -539,12 +536,11 @@ static ssize_t uio_read(struct file *filep, char __user *buf, DECLARE_WAITQUEUE(wait, current); ssize_t retval = 0; s32 event_count; - unsigned long flags; - spin_lock_irqsave(&idev->info_lock, flags); + mutex_lock(&idev->info_lock); if (!idev->info || !idev->info->irq) retval = -EIO; - spin_unlock_irqrestore(&idev->info_lock, flags); + mutex_unlock(&idev->info_lock); if (retval) return retval; @@ -594,9 +590,8 @@ static ssize_t uio_write(struct file *filep, const char __user *buf, struct uio_device *idev = listener->dev; ssize_t retval; s32 irq_on; - unsigned long flags; - spin_lock_irqsave(&idev->info_lock, flags); + mutex_lock(&idev->info_lock); if (!idev->info || !idev->info->irq) { retval = -EIO; goto out; @@ -620,7 +615,7 @@ static ssize_t uio_write(struct file *filep, const char __user *buf, retval = idev->info->irqcontrol(idev->info, irq_on); out: - spin_unlock_irqrestore(&idev->info_lock, flags); + mutex_unlock(&idev->info_lock); return retval ? retval : sizeof(s32); } @@ -874,7 +869,7 @@ int __uio_register_device(struct module *owner, idev->owner = owner; idev->info = info; - spin_lock_init(&idev->info_lock); + mutex_init(&idev->info_lock); init_waitqueue_head(&idev->wait); atomic_set(&idev->event, 0); @@ -940,7 +935,6 @@ EXPORT_SYMBOL_GPL(__uio_register_device); void uio_unregister_device(struct uio_info *info) { struct uio_device *idev; - unsigned long flags; if (!info || !info->uio_dev) return; @@ -954,9 +948,9 @@ void uio_unregister_device(struct uio_info *info) if (info->irq && info->irq != UIO_IRQ_CUSTOM) free_irq(info->irq, idev); - spin_lock_irqsave(&idev->info_lock, flags); + mutex_lock(&idev->info_lock); idev->info = NULL; - spin_unlock_irqrestore(&idev->info_lock, flags); + mutex_unlock(&idev->info_lock); device_unregister(&idev->dev); diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 6c5f2074e14f..6f8b68cd460f 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -75,7 +75,7 @@ struct uio_device { struct fasync_struct *async_queue; wait_queue_head_t wait; struct uio_info *info; - spinlock_t info_lock; + struct mutex info_lock; struct kobject *map_dir; struct kobject *portio_dir; }; From 13af019c87f2d90e663742cb1a819834048842ae Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 13 Feb 2019 16:29:34 +0000 Subject: [PATCH 1886/2608] uio: fix crash after the device is unregistered commit 57c5f4df0a5a0ee83df799991251e2ee93a5e4e9 upstream. For the target_core_user use case, after the device is unregistered it maybe still opened in user space, then the kernel will crash, like: [ 251.163692] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 251.163820] IP: [] show_name+0x23/0x40 [uio] [ 251.163965] PGD 8000000062694067 PUD 62696067 PMD 0 [ 251.164097] Oops: 0000 [#1] SMP ... [ 251.165605] e1000 mptscsih mptbase drm_panel_orientation_quirks dm_mirror dm_region_hash dm_log dm_mod [ 251.166014] CPU: 0 PID: 13380 Comm: tcmu-runner Kdump: loaded Not tainted 3.10.0-916.el7.test.x86_64 #1 [ 251.166381] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017 [ 251.166747] task: ffff971eb91db0c0 ti: ffff971e9e384000 task.ti: ffff971e9e384000 [ 251.167137] RIP: 0010:[] [] show_name+0x23/0x40 [uio] [ 251.167563] RSP: 0018:ffff971e9e387dc8 EFLAGS: 00010282 [ 251.167978] RAX: 0000000000000000 RBX: ffff971e9e3f8000 RCX: ffff971eb8368d98 [ 251.168408] RDX: ffff971e9e3f8000 RSI: ffffffffc0738084 RDI: ffff971e9e3f8000 [ 251.168856] RBP: ffff971e9e387dd0 R08: ffff971eb8bc0018 R09: 0000000000000000 [ 251.169296] R10: 0000000000001000 R11: ffffffffa09d444d R12: ffffffffa1076e80 [ 251.169750] R13: ffff971e9e387f18 R14: 0000000000000001 R15: ffff971e9cfb1c80 [ 251.170213] FS: 00007ff37d175880(0000) GS:ffff971ebb600000(0000) knlGS:0000000000000000 [ 251.170693] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 251.171248] CR2: 0000000000000008 CR3: 00000000001f6000 CR4: 00000000003607f0 [ 251.172071] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 251.172640] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 251.173236] Call Trace: [ 251.173789] [] dev_attr_show+0x23/0x60 [ 251.174356] [] ? mutex_lock+0x12/0x2f [ 251.174892] [] sysfs_kf_seq_show+0xcf/0x1f0 [ 251.175433] [] kernfs_seq_show+0x26/0x30 [ 251.175981] [] seq_read+0x110/0x3f0 [ 251.176609] [] kernfs_fop_read+0xf5/0x160 [ 251.177158] [] vfs_read+0x9f/0x170 [ 251.177707] [] SyS_read+0x7f/0xf0 [ 251.178268] [] system_call_fastpath+0x1c/0x21 [ 251.178823] Code: 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 53 48 89 d3 e8 7e 96 56 e0 48 8b 80 d8 02 00 00 48 89 df 48 c7 c6 84 80 73 c0 <48> 8b 50 08 31 c0 e8 e2 67 44 e0 5b 48 98 5d c3 0f 1f 00 66 2e [ 251.180115] RIP [] show_name+0x23/0x40 [uio] [ 251.180820] RSP [ 251.181473] CR2: 0000000000000008 CC: Hamish Martin CC: Mike Christie Reviewed-by: Hamish Martin Signed-off-by: Xiubo Li Signed-off-by: Greg Kroah-Hartman Signed-off-by: Tommi Rantala Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 104 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 88 insertions(+), 16 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 4441235a56cc..262610192755 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -215,7 +215,20 @@ static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buf) { struct uio_device *idev = dev_get_drvdata(dev); - return sprintf(buf, "%s\n", idev->info->name); + int ret; + + mutex_lock(&idev->info_lock); + if (!idev->info) { + ret = -EINVAL; + dev_err(dev, "the device has been unregistered\n"); + goto out; + } + + ret = sprintf(buf, "%s\n", idev->info->name); + +out: + mutex_unlock(&idev->info_lock); + return ret; } static DEVICE_ATTR_RO(name); @@ -223,7 +236,20 @@ static ssize_t version_show(struct device *dev, struct device_attribute *attr, char *buf) { struct uio_device *idev = dev_get_drvdata(dev); - return sprintf(buf, "%s\n", idev->info->version); + int ret; + + mutex_lock(&idev->info_lock); + if (!idev->info) { + ret = -EINVAL; + dev_err(dev, "the device has been unregistered\n"); + goto out; + } + + ret = sprintf(buf, "%s\n", idev->info->version); + +out: + mutex_unlock(&idev->info_lock); + return ret; } static DEVICE_ATTR_RO(version); @@ -417,11 +443,15 @@ EXPORT_SYMBOL_GPL(uio_event_notify); static irqreturn_t uio_interrupt(int irq, void *dev_id) { struct uio_device *idev = (struct uio_device *)dev_id; - irqreturn_t ret = idev->info->handler(irq, idev->info); + irqreturn_t ret; + mutex_lock(&idev->info_lock); + + ret = idev->info->handler(irq, idev->info); if (ret == IRQ_HANDLED) uio_event_notify(idev->info); + mutex_unlock(&idev->info_lock); return ret; } @@ -462,6 +492,12 @@ static int uio_open(struct inode *inode, struct file *filep) filep->private_data = listener; mutex_lock(&idev->info_lock); + if (!idev->info) { + mutex_unlock(&idev->info_lock); + ret = -EINVAL; + goto err_alloc_listener; + } + if (idev->info && idev->info->open) ret = idev->info->open(idev->info, inode); mutex_unlock(&idev->info_lock); @@ -592,6 +628,11 @@ static ssize_t uio_write(struct file *filep, const char __user *buf, s32 irq_on; mutex_lock(&idev->info_lock); + if (!idev->info) { + retval = -EINVAL; + goto out; + } + if (!idev->info || !idev->info->irq) { retval = -EIO; goto out; @@ -637,10 +678,20 @@ static int uio_vma_fault(struct vm_fault *vmf) struct page *page; unsigned long offset; void *addr; + int ret = 0; + int mi; - int mi = uio_find_mem_index(vmf->vma); - if (mi < 0) - return VM_FAULT_SIGBUS; + mutex_lock(&idev->info_lock); + if (!idev->info) { + ret = VM_FAULT_SIGBUS; + goto out; + } + + mi = uio_find_mem_index(vmf->vma); + if (mi < 0) { + ret = VM_FAULT_SIGBUS; + goto out; + } /* * We need to subtract mi because userspace uses offset = N*PAGE_SIZE @@ -655,7 +706,11 @@ static int uio_vma_fault(struct vm_fault *vmf) page = vmalloc_to_page(addr); get_page(page); vmf->page = page; - return 0; + +out: + mutex_unlock(&idev->info_lock); + + return ret; } static const struct vm_operations_struct uio_logical_vm_ops = { @@ -680,6 +735,7 @@ static int uio_mmap_physical(struct vm_area_struct *vma) struct uio_device *idev = vma->vm_private_data; int mi = uio_find_mem_index(vma); struct uio_mem *mem; + if (mi < 0) return -EINVAL; mem = idev->info->mem + mi; @@ -721,30 +777,46 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma) vma->vm_private_data = idev; + mutex_lock(&idev->info_lock); + if (!idev->info) { + ret = -EINVAL; + goto out; + } + mi = uio_find_mem_index(vma); - if (mi < 0) - return -EINVAL; + if (mi < 0) { + ret = -EINVAL; + goto out; + } requested_pages = vma_pages(vma); actual_pages = ((idev->info->mem[mi].addr & ~PAGE_MASK) + idev->info->mem[mi].size + PAGE_SIZE -1) >> PAGE_SHIFT; - if (requested_pages > actual_pages) - return -EINVAL; + if (requested_pages > actual_pages) { + ret = -EINVAL; + goto out; + } if (idev->info->mmap) { ret = idev->info->mmap(idev->info, vma); - return ret; + goto out; } switch (idev->info->mem[mi].memtype) { case UIO_MEM_PHYS: - return uio_mmap_physical(vma); + ret = uio_mmap_physical(vma); + break; case UIO_MEM_LOGICAL: case UIO_MEM_VIRTUAL: - return uio_mmap_logical(vma); + ret = uio_mmap_logical(vma); + break; default: - return -EINVAL; + ret = -EINVAL; } + +out: + mutex_unlock(&idev->info_lock); + return 0; } static const struct file_operations uio_fops = { @@ -943,12 +1015,12 @@ void uio_unregister_device(struct uio_info *info) uio_free_minor(idev); + mutex_lock(&idev->info_lock); uio_dev_del_attributes(idev); if (info->irq && info->irq != UIO_IRQ_CUSTOM) free_irq(info->irq, idev); - mutex_lock(&idev->info_lock); idev->info = NULL; mutex_unlock(&idev->info_lock); From 28c618abeee3380a4ead50fe9863b692dd851e07 Mon Sep 17 00:00:00 2001 From: Hailong Liu Date: Wed, 13 Feb 2019 16:29:36 +0000 Subject: [PATCH 1887/2608] uio: fix wrong return value from uio_mmap() commit e7de2590f18a272e63732b9d519250d1b522b2c4 upstream. uio_mmap has multiple fail paths to set return value to nonzero then goto out. However, it always returns *0* from the *out* at end, and this will mislead callers who check the return value of this function. Fixes: 57c5f4df0a5a0ee ("uio: fix crash after the device is unregistered") CC: Xiubo Li Signed-off-by: Hailong Liu Cc: stable Signed-off-by: Jiang Biao Signed-off-by: Greg Kroah-Hartman Signed-off-by: Tommi Rantala Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 262610192755..fed2d8fa4d4d 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -816,7 +816,7 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma) out: mutex_unlock(&idev->info_lock); - return 0; + return ret; } static const struct file_operations uio_fops = { From 5d07d245cb418064561c1a4e57313cf0a9132aa6 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 13 Feb 2019 16:29:36 +0000 Subject: [PATCH 1888/2608] uio: fix possible circular locking dependency commit b34e9a15b37b8ddbf06a4da142b0c39c74211eb4 upstream. The call trace: XXX/1910 is trying to acquire lock: (&mm->mmap_sem){++++++}, at: [] might_fault+0x57/0xb0 but task is already holding lock: (&idev->info_lock){+.+...}, at: [] uio_write+0x46/0x130 [uio] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&idev->info_lock){+.+...}: [] lock_acquire+0x99/0x1e0 [] mutex_lock_nested+0x93/0x410 [] uio_mmap+0x2d/0x170 [uio] [] mmap_region+0x428/0x650 [] do_mmap+0x3b8/0x4e0 [] vm_mmap_pgoff+0xd3/0x120 [] SyS_mmap_pgoff+0x1f1/0x270 [] SyS_mmap+0x22/0x30 [] system_call_fastpath+0x1c/0x21 -> #0 (&mm->mmap_sem){++++++}: [] __lock_acquire+0xdac/0x15f0 [] lock_acquire+0x99/0x1e0 [] might_fault+0x84/0xb0 [] uio_write+0xb4/0x130 [uio] [] vfs_write+0xc3/0x1f0 [] SyS_write+0x8a/0x100 [] system_call_fastpath+0x1c/0x21 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&idev->info_lock); lock(&mm->mmap_sem); lock(&idev->info_lock); lock(&mm->mmap_sem); *** DEADLOCK *** 1 lock held by XXX/1910: #0: (&idev->info_lock){+.+...}, at: [] uio_write+0x46/0x130 [uio] stack backtrace: CPU: 0 PID: 1910 Comm: XXX Kdump: loaded Not tainted #1 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017 Call Trace: [] dump_stack+0x19/0x1b [] print_circular_bug+0x1f9/0x207 [] check_prevs_add+0x957/0x960 [] __lock_acquire+0xdac/0x15f0 [] ? mark_held_locks+0xb9/0x140 [] lock_acquire+0x99/0x1e0 [] ? might_fault+0x57/0xb0 [] might_fault+0x84/0xb0 [] ? might_fault+0x57/0xb0 [] uio_write+0xb4/0x130 [uio] [] vfs_write+0xc3/0x1f0 [] ? fget_light+0xfc/0x510 [] SyS_write+0x8a/0x100 [] system_call_fastpath+0x1c/0x21 Signed-off-by: Xiubo Li Signed-off-by: Greg Kroah-Hartman Signed-off-by: Tommi Rantala Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index fed2d8fa4d4d..4e0cb7cdf739 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -627,6 +627,12 @@ static ssize_t uio_write(struct file *filep, const char __user *buf, ssize_t retval; s32 irq_on; + if (count != sizeof(s32)) + return -EINVAL; + + if (copy_from_user(&irq_on, buf, count)) + return -EFAULT; + mutex_lock(&idev->info_lock); if (!idev->info) { retval = -EINVAL; @@ -638,21 +644,11 @@ static ssize_t uio_write(struct file *filep, const char __user *buf, goto out; } - if (count != sizeof(s32)) { - retval = -EINVAL; - goto out; - } - if (!idev->info->irqcontrol) { retval = -ENOSYS; goto out; } - if (copy_from_user(&irq_on, buf, count)) { - retval = -EFAULT; - goto out; - } - retval = idev->info->irqcontrol(idev->info, irq_on); out: From f142573d9cb63ccbcfc311ce5c997191b1df55e9 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 13 Feb 2019 16:29:39 +0000 Subject: [PATCH 1889/2608] Revert "uio: use request_threaded_irq instead" commit 3d27c4de8d4fb2d4099ff324671792aa2578c6f9 upstream. Since mutex lock in irq hanler is useless currently, here will remove it together with it. This reverts commit 9421e45f5ff3d558cf8b75a8cc0824530caf3453. Reported-by: james.r.harris@intel.com CC: Ahsan Atta Signed-off-by: Xiubo Li Signed-off-by: Greg Kroah-Hartman Signed-off-by: Tommi Rantala Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 4e0cb7cdf739..fb5c9701b1fb 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -445,13 +445,10 @@ static irqreturn_t uio_interrupt(int irq, void *dev_id) struct uio_device *idev = (struct uio_device *)dev_id; irqreturn_t ret; - mutex_lock(&idev->info_lock); - ret = idev->info->handler(irq, idev->info); if (ret == IRQ_HANDLED) uio_event_notify(idev->info); - mutex_unlock(&idev->info_lock); return ret; } @@ -974,9 +971,8 @@ int __uio_register_device(struct module *owner, * FDs at the time of unregister and therefore may not be * freed until they are released. */ - ret = request_threaded_irq(info->irq, NULL, uio_interrupt, - info->irq_flags, info->name, idev); - + ret = request_irq(info->irq, uio_interrupt, + info->irq_flags, info->name, idev); if (ret) { info->uio_dev = NULL; goto err_request_irq; From 557ac4e2077364ff58c69fc524a8fc79c83870bf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 15 Feb 2019 08:09:14 +0100 Subject: [PATCH 1890/2608] Linux 4.14.100 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3b10c8b542e2..86fa9a371383 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 99 +SUBLEVEL = 100 EXTRAVERSION = NAME = Petit Gorille From 56f88d7584a376a935969e65c8a14181bd6e8c65 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 14 Feb 2019 15:02:18 -0800 Subject: [PATCH 1891/2608] Revert "exec: load_script: don't blindly truncate shebang string" commit cb5b020a8d38f77209d0472a0fea755299a8ec78 upstream. This reverts commit 8099b047ecc431518b9bb6bdbba3549bbecdc343. It turns out that people do actually depend on the shebang string being truncated, and on the fact that an interpreter (like perl) will often just re-interpret it entirely to get the full argument list. Reported-by: Samuel Dionne-Riel Acked-by: Kees Cook Cc: Oleg Nesterov Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/binfmt_script.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index d0078cbb718b..7cde3f46ad26 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -42,14 +42,10 @@ static int load_script(struct linux_binprm *bprm) fput(bprm->file); bprm->file = NULL; - for (cp = bprm->buf+2;; cp++) { - if (cp >= bprm->buf + BINPRM_BUF_SIZE) - return -ENOEXEC; - if (!*cp || (*cp == '\n')) - break; - } + bprm->buf[BINPRM_BUF_SIZE - 1] = '\0'; + if ((cp = strchr(bprm->buf, '\n')) == NULL) + cp = bprm->buf+BINPRM_BUF_SIZE-1; *cp = '\0'; - while (cp > bprm->buf) { cp--; if ((*cp == ' ') || (*cp == '\t')) From d6bf9dcebdd11967b2b6c73d7c8f4863624f69c7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 15 Feb 2019 09:08:56 +0100 Subject: [PATCH 1892/2608] Linux 4.14.101 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 86fa9a371383..d5b20b618517 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 100 +SUBLEVEL = 101 EXTRAVERSION = NAME = Petit Gorille From 90019b6dc4153450b8032cfd4e431e0c0c56aaaa Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 17 Feb 2019 13:14:51 +0200 Subject: [PATCH 1893/2608] dt-bindings: eeprom: at24: add "atmel,24c2048" compatible string commit 6c0c5dc33ff42af49243e94842d0ebdb153189ea upstream. Add new compatible to the device tree bindings. Signed-off-by: Adrian Bunk Acked-by: Rob Herring Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- Documentation/devicetree/bindings/eeprom/eeprom.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/eeprom/eeprom.txt b/Documentation/devicetree/bindings/eeprom/eeprom.txt index afc04589eadf..3c9a822d576c 100644 --- a/Documentation/devicetree/bindings/eeprom/eeprom.txt +++ b/Documentation/devicetree/bindings/eeprom/eeprom.txt @@ -6,7 +6,8 @@ Required properties: "atmel,24c00", "atmel,24c01", "atmel,24c02", "atmel,24c04", "atmel,24c08", "atmel,24c16", "atmel,24c32", "atmel,24c64", - "atmel,24c128", "atmel,24c256", "atmel,24c512", "atmel,24c1024" + "atmel,24c128", "atmel,24c256", "atmel,24c512", "atmel,24c1024", + "atmel,24c2048" "catalyst,24c32" @@ -23,7 +24,7 @@ Required properties: device with and manufacturer "atmel" should be used. Possible types are: "24c00", "24c01", "24c02", "24c04", "24c08", "24c16", "24c32", "24c64", - "24c128", "24c256", "24c512", "24c1024", "spd" + "24c128", "24c256", "24c512", "24c1024", "24c2048", "spd" - reg : the I2C address of the EEPROM From e97773ce1c9e68291332bbb757a433402a72e4a6 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 17 Feb 2019 13:14:52 +0200 Subject: [PATCH 1894/2608] eeprom: at24: add support for 24c2048 commit 37cf28d3b5bca1b532a0b6aac722e7f2788a9294 upstream. Works with ST M24M02. Signed-off-by: Adrian Bunk Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/misc/eeprom/Kconfig | 2 +- drivers/misc/eeprom/at24.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig index de58762097c4..3f93e4564cab 100644 --- a/drivers/misc/eeprom/Kconfig +++ b/drivers/misc/eeprom/Kconfig @@ -12,7 +12,7 @@ config EEPROM_AT24 ones like at24c64, 24lc02 or fm24c04: 24c00, 24c01, 24c02, spd (readonly 24c02), 24c04, 24c08, - 24c16, 24c32, 24c64, 24c128, 24c256, 24c512, 24c1024 + 24c16, 24c32, 24c64, 24c128, 24c256, 24c512, 24c1024, 24c2048 Unless you like data loss puzzles, always be sure that any chip you configure as a 24c32 (32 kbit) or larger is NOT really a diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index ded48a0c77ee..59dcd97ee3de 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -170,6 +170,7 @@ static const struct i2c_device_id at24_ids[] = { { "24c256", AT24_DEVICE_MAGIC(262144 / 8, AT24_FLAG_ADDR16) }, { "24c512", AT24_DEVICE_MAGIC(524288 / 8, AT24_FLAG_ADDR16) }, { "24c1024", AT24_DEVICE_MAGIC(1048576 / 8, AT24_FLAG_ADDR16) }, + { "24c2048", AT24_DEVICE_MAGIC(2097152 / 8, AT24_FLAG_ADDR16) }, { "at24", 0 }, { /* END OF LIST */ } }; From 883d561c49ebf1d540c3657892995692118afa91 Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Wed, 30 Jan 2019 17:01:56 +0800 Subject: [PATCH 1895/2608] blk-mq: fix a hung issue when fsync [ Upstream commit 85bd6e61f34dffa8ec2dc75ff3c02ee7b2f1cbce ] Florian reported a io hung issue when fsync(). It should be triggered by following race condition. data + post flush a flush blk_flush_complete_seq case REQ_FSEQ_DATA blk_flush_queue_rq issued to driver blk_mq_dispatch_rq_list try to issue a flush req failed due to NON-NCQ command .queue_rq return BLK_STS_DEV_RESOURCE request completion req->end_io // doesn't check RESTART mq_flush_data_end_io case REQ_FSEQ_POSTFLUSH blk_kick_flush do nothing because previous flush has not been completed blk_mq_run_hw_queue insert rq to hctx->dispatch due to RESTART is still set, do nothing To fix this, replace the blk_mq_run_hw_queue in mq_flush_data_end_io with blk_mq_sched_restart to check and clear the RESTART flag. Fixes: bd166ef1 (blk-mq-sched: add framework for MQ capable IO schedulers) Reported-by: Florian Stecker Tested-by: Florian Stecker Signed-off-by: Jianchao Wang Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-flush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index 4938bec8cfef..6603352879e7 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -402,7 +402,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error) blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error); spin_unlock_irqrestore(&fq->mq_flush_lock, flags); - blk_mq_run_hw_queue(hctx, true); + blk_mq_sched_restart(hctx); } /** From 03089bb1f4974cb8cf0667d6632684d16916b95b Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Thu, 14 Feb 2019 14:18:00 +0100 Subject: [PATCH 1896/2608] uapi/if_ether.h: prevent redefinition of struct ethhdr commit 6926e041a8920c8ec27e4e155efa760aa01551fd upstream. Musl provides its own ethhdr struct definition. Add a guard to prevent its definition of the appropriate musl header has already been included. glibc does not implement this header, but when glibc will implement this they can just define __UAPI_DEF_ETHHDR 0 to make it work with the kernel. Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- include/uapi/linux/if_ether.h | 3 +++ include/uapi/linux/libc-compat.h | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 244e3213ecb0..60ec9114e28f 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -23,6 +23,7 @@ #define _UAPI_LINUX_IF_ETHER_H #include +#include /* * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble @@ -150,11 +151,13 @@ * This is an Ethernet frame header. */ +#if __UAPI_DEF_ETHHDR struct ethhdr { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ unsigned char h_source[ETH_ALEN]; /* source ether addr */ __be16 h_proto; /* packet type ID field */ } __attribute__((packed)); +#endif #endif /* _UAPI_LINUX_IF_ETHER_H */ diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index 8254c937c9f4..fc29efaa918c 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -264,4 +264,10 @@ #endif /* __GLIBC__ */ +/* Definitions for if_ether.h */ +/* allow libcs like musl to deactivate this, glibc does not implement this. */ +#ifndef __UAPI_DEF_ETHHDR +#define __UAPI_DEF_ETHHDR 1 +#endif + #endif /* _UAPI_LIBC_COMPAT_H */ From 56a0f2f329032f7feef14390bf6ea10476590e67 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 13 Feb 2019 21:10:06 -0500 Subject: [PATCH 1897/2608] ARM: 8789/1: signal: copy registers using __copy_to_user() Commit 5ca451cf6ed04443774bbb7ee45332dafa42e99f upstream. When saving the ARM integer registers, use __copy_to_user() to copy them into user signal frame, rather than __put_user_error(). This has the benefit of disabling/enabling PAN once for the whole copy intead of once per write. Signed-off-by: Julien Thierry Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/kernel/signal.c | 47 ++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index cdfe52b15a0a..1dc1f39825b6 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -296,30 +296,35 @@ static int setup_sigframe(struct sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { struct aux_sigframe __user *aux; + struct sigcontext context; int err = 0; - __put_user_error(regs->ARM_r0, &sf->uc.uc_mcontext.arm_r0, err); - __put_user_error(regs->ARM_r1, &sf->uc.uc_mcontext.arm_r1, err); - __put_user_error(regs->ARM_r2, &sf->uc.uc_mcontext.arm_r2, err); - __put_user_error(regs->ARM_r3, &sf->uc.uc_mcontext.arm_r3, err); - __put_user_error(regs->ARM_r4, &sf->uc.uc_mcontext.arm_r4, err); - __put_user_error(regs->ARM_r5, &sf->uc.uc_mcontext.arm_r5, err); - __put_user_error(regs->ARM_r6, &sf->uc.uc_mcontext.arm_r6, err); - __put_user_error(regs->ARM_r7, &sf->uc.uc_mcontext.arm_r7, err); - __put_user_error(regs->ARM_r8, &sf->uc.uc_mcontext.arm_r8, err); - __put_user_error(regs->ARM_r9, &sf->uc.uc_mcontext.arm_r9, err); - __put_user_error(regs->ARM_r10, &sf->uc.uc_mcontext.arm_r10, err); - __put_user_error(regs->ARM_fp, &sf->uc.uc_mcontext.arm_fp, err); - __put_user_error(regs->ARM_ip, &sf->uc.uc_mcontext.arm_ip, err); - __put_user_error(regs->ARM_sp, &sf->uc.uc_mcontext.arm_sp, err); - __put_user_error(regs->ARM_lr, &sf->uc.uc_mcontext.arm_lr, err); - __put_user_error(regs->ARM_pc, &sf->uc.uc_mcontext.arm_pc, err); - __put_user_error(regs->ARM_cpsr, &sf->uc.uc_mcontext.arm_cpsr, err); + context = (struct sigcontext) { + .arm_r0 = regs->ARM_r0, + .arm_r1 = regs->ARM_r1, + .arm_r2 = regs->ARM_r2, + .arm_r3 = regs->ARM_r3, + .arm_r4 = regs->ARM_r4, + .arm_r5 = regs->ARM_r5, + .arm_r6 = regs->ARM_r6, + .arm_r7 = regs->ARM_r7, + .arm_r8 = regs->ARM_r8, + .arm_r9 = regs->ARM_r9, + .arm_r10 = regs->ARM_r10, + .arm_fp = regs->ARM_fp, + .arm_ip = regs->ARM_ip, + .arm_sp = regs->ARM_sp, + .arm_lr = regs->ARM_lr, + .arm_pc = regs->ARM_pc, + .arm_cpsr = regs->ARM_cpsr, - __put_user_error(current->thread.trap_no, &sf->uc.uc_mcontext.trap_no, err); - __put_user_error(current->thread.error_code, &sf->uc.uc_mcontext.error_code, err); - __put_user_error(current->thread.address, &sf->uc.uc_mcontext.fault_address, err); - __put_user_error(set->sig[0], &sf->uc.uc_mcontext.oldmask, err); + .trap_no = current->thread.trap_no, + .error_code = current->thread.error_code, + .fault_address = current->thread.address, + .oldmask = set->sig[0], + }; + + err |= __copy_to_user(&sf->uc.uc_mcontext, &context, sizeof(context)); err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); From a26e4c1f48b7f1f666a78a9805dce6641220d03b Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 13 Feb 2019 21:10:07 -0500 Subject: [PATCH 1898/2608] ARM: 8790/1: signal: always use __copy_to_user to save iwmmxt context Commit 73839798af7ebc6c8d0c9271ebbbc148700e521f upstream. When setting a dummy iwmmxt context, create a local instance and use __copy_to_user both cases whether iwmmxt is being used or not. This has the benefit of disabling/enabling PAN once for the whole copy intead of once per write. Signed-off-by: Julien Thierry Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/kernel/signal.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 1dc1f39825b6..b12499cc3582 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -76,8 +76,6 @@ static int preserve_iwmmxt_context(struct iwmmxt_sigframe __user *frame) kframe->magic = IWMMXT_MAGIC; kframe->size = IWMMXT_STORAGE_SIZE; iwmmxt_task_copy(current_thread_info(), &kframe->storage); - - err = __copy_to_user(frame, kframe, sizeof(*frame)); } else { /* * For bug-compatibility with older kernels, some space @@ -85,10 +83,14 @@ static int preserve_iwmmxt_context(struct iwmmxt_sigframe __user *frame) * Set the magic and size appropriately so that properly * written userspace can skip it reliably: */ - __put_user_error(DUMMY_MAGIC, &frame->magic, err); - __put_user_error(IWMMXT_STORAGE_SIZE, &frame->size, err); + *kframe = (struct iwmmxt_sigframe) { + .magic = DUMMY_MAGIC, + .size = IWMMXT_STORAGE_SIZE, + }; } + err = __copy_to_user(frame, kframe, sizeof(*kframe)); + return err; } From 6d3ccf78e2867718c400a0a0e773adac721a470b Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 13 Feb 2019 21:10:08 -0500 Subject: [PATCH 1899/2608] ARM: 8791/1: vfp: use __copy_to_user() when saving VFP state Commit 3aa2df6ec2ca6bc143a65351cca4266d03a8bc41 upstream. Use __copy_to_user() rather than __put_user_error() for individual members when saving VFP state. This has the benefit of disabling/enabling PAN once per copied struct intead of once per write. Signed-off-by: Julien Thierry Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/include/asm/thread_info.h | 4 ++-- arch/arm/kernel/signal.c | 13 +++++++------ arch/arm/vfp/vfpmodule.c | 20 ++++++++------------ 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 57d2ad9c75ca..df8420672c7e 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -124,8 +124,8 @@ extern void vfp_flush_hwstate(struct thread_info *); struct user_vfp; struct user_vfp_exc; -extern int vfp_preserve_user_clear_hwstate(struct user_vfp __user *, - struct user_vfp_exc __user *); +extern int vfp_preserve_user_clear_hwstate(struct user_vfp *, + struct user_vfp_exc *); extern int vfp_restore_user_hwstate(struct user_vfp *, struct user_vfp_exc *); #endif diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index b12499cc3582..1eb83ee7e1cc 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -136,17 +136,18 @@ static int restore_iwmmxt_context(char __user **auxp) static int preserve_vfp_context(struct vfp_sigframe __user *frame) { - const unsigned long magic = VFP_MAGIC; - const unsigned long size = VFP_STORAGE_SIZE; + struct vfp_sigframe kframe; int err = 0; - __put_user_error(magic, &frame->magic, err); - __put_user_error(size, &frame->size, err); + memset(&kframe, 0, sizeof(kframe)); + kframe.magic = VFP_MAGIC; + kframe.size = VFP_STORAGE_SIZE; + err = vfp_preserve_user_clear_hwstate(&kframe.ufp, &kframe.ufp_exc); if (err) - return -EFAULT; + return err; - return vfp_preserve_user_clear_hwstate(&frame->ufp, &frame->ufp_exc); + return __copy_to_user(frame, &kframe, sizeof(kframe)); } static int restore_vfp_context(char __user **auxp) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 6abcd4af8274..6bacd69c9e81 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -554,12 +554,11 @@ void vfp_flush_hwstate(struct thread_info *thread) * Save the current VFP state into the provided structures and prepare * for entry into a new function (signal handler). */ -int vfp_preserve_user_clear_hwstate(struct user_vfp __user *ufp, - struct user_vfp_exc __user *ufp_exc) +int vfp_preserve_user_clear_hwstate(struct user_vfp *ufp, + struct user_vfp_exc *ufp_exc) { struct thread_info *thread = current_thread_info(); struct vfp_hard_struct *hwstate = &thread->vfpstate.hard; - int err = 0; /* Ensure that the saved hwstate is up-to-date. */ vfp_sync_hwstate(thread); @@ -568,22 +567,19 @@ int vfp_preserve_user_clear_hwstate(struct user_vfp __user *ufp, * Copy the floating point registers. There can be unused * registers see asm/hwcap.h for details. */ - err |= __copy_to_user(&ufp->fpregs, &hwstate->fpregs, - sizeof(hwstate->fpregs)); + memcpy(&ufp->fpregs, &hwstate->fpregs, sizeof(hwstate->fpregs)); + /* * Copy the status and control register. */ - __put_user_error(hwstate->fpscr, &ufp->fpscr, err); + ufp->fpscr = hwstate->fpscr; /* * Copy the exception registers. */ - __put_user_error(hwstate->fpexc, &ufp_exc->fpexc, err); - __put_user_error(hwstate->fpinst, &ufp_exc->fpinst, err); - __put_user_error(hwstate->fpinst2, &ufp_exc->fpinst2, err); - - if (err) - return -EFAULT; + ufp_exc->fpexc = hwstate->fpexc; + ufp_exc->fpinst = hwstate->fpinst; + ufp_exc->fpinst2 = ufp_exc->fpinst2; /* Ensure that VFP is disabled. */ vfp_flush_hwstate(thread); From 96d4881f04b916118a0f043360e52bdf51a58f15 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 13 Feb 2019 21:10:09 -0500 Subject: [PATCH 1900/2608] ARM: 8792/1: oabi-compat: copy oabi events using __copy_to_user() Commit 319508902600c2688e057750148487996396e9ca upstream. Copy events to user using __copy_to_user() rather than copy members of individually with __put_user_error(). This has the benefit of disabling/enabling PAN once per event intead of once per event member. Signed-off-by: Julien Thierry Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/kernel/sys_oabi-compat.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 4abe4909417f..a87684532327 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -277,6 +277,7 @@ asmlinkage long sys_oabi_epoll_wait(int epfd, int maxevents, int timeout) { struct epoll_event *kbuf; + struct oabi_epoll_event e; mm_segment_t fs; long ret, err, i; @@ -295,8 +296,11 @@ asmlinkage long sys_oabi_epoll_wait(int epfd, set_fs(fs); err = 0; for (i = 0; i < ret; i++) { - __put_user_error(kbuf[i].events, &events->events, err); - __put_user_error(kbuf[i].data, &events->data, err); + e.events = kbuf[i].events; + e.data = kbuf[i].data; + err = __copy_to_user(events, &e, sizeof(e)); + if (err) + break; events++; } kfree(kbuf); From 2ef94913e3289d5e23c0a10fdfbdf51ca1c52d67 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 13 Feb 2019 21:10:10 -0500 Subject: [PATCH 1901/2608] ARM: 8793/1: signal: replace __put_user_error with __put_user Commit 18ea66bd6e7a95bdc598223d72757190916af28b upstream. With Spectre-v1.1 mitigations, __put_user_error is pointless. In an attempt to remove it, replace its references in frame setups with __put_user. Signed-off-by: Julien Thierry Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/kernel/signal.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 1eb83ee7e1cc..02e6b6dfffa7 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -344,7 +344,7 @@ setup_sigframe(struct sigframe __user *sf, struct pt_regs *regs, sigset_t *set) if (err == 0) err |= preserve_vfp_context(&aux->vfp); #endif - __put_user_error(0, &aux->end_magic, err); + err |= __put_user(0, &aux->end_magic); return err; } @@ -476,7 +476,7 @@ setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) /* * Set uc.uc_flags to a value which sc.trap_no would never have. */ - __put_user_error(0x5ac3c35a, &frame->uc.uc_flags, err); + err = __put_user(0x5ac3c35a, &frame->uc.uc_flags); err |= setup_sigframe(frame, regs, set); if (err == 0) @@ -496,8 +496,8 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) err |= copy_siginfo_to_user(&frame->info, &ksig->info); - __put_user_error(0, &frame->sig.uc.uc_flags, err); - __put_user_error(NULL, &frame->sig.uc.uc_link, err); + err |= __put_user(0, &frame->sig.uc.uc_flags); + err |= __put_user(NULL, &frame->sig.uc.uc_link); err |= __save_altstack(&frame->sig.uc.uc_stack, regs->ARM_sp); err |= setup_sigframe(&frame->sig, regs, set); From f2ed1df0161fa04f6914c4a61b3b2e1df3fd8b82 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 13 Feb 2019 21:10:11 -0500 Subject: [PATCH 1902/2608] ARM: 8794/1: uaccess: Prevent speculative use of the current addr_limit Commit 621afc677465db231662ed126ae1f355bf8eac47 upstream. A mispredicted conditional call to set_fs could result in the wrong addr_limit being forwarded under speculation to a subsequent access_ok check, potentially forming part of a spectre-v1 attack using uaccess routines. This patch prevents this forwarding from taking place, but putting heavy barriers in set_fs after writing the addr_limit. Porting commit c2f0ad4fc089cff8 ("arm64: uaccess: Prevent speculative use of the current addr_limit"). Signed-off-by: Julien Thierry Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/include/asm/uaccess.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 4140be431087..7d19584f5c39 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -69,6 +69,14 @@ extern int __put_user_bad(void); static inline void set_fs(mm_segment_t fs) { current_thread_info()->addr_limit = fs; + + /* + * Prevent a mispredicted conditional call to set_fs from forwarding + * the wrong address limit to access_ok under speculation. + */ + dsb(nsh); + isb(); + modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER); } From 0d24778382b8c39e3af2d9a7a87e0f38438b7216 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 13 Feb 2019 21:10:12 -0500 Subject: [PATCH 1903/2608] ARM: 8795/1: spectre-v1.1: use put_user() for __put_user() Commit e3aa6243434fd9a82e84bb79ab1abd14f2d9a5a7 upstream. When Spectre mitigation is required, __put_user() needs to include check_uaccess. This is already the case for put_user(), so just make __put_user() an alias of put_user(). Signed-off-by: Julien Thierry Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/include/asm/uaccess.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 7d19584f5c39..cf50b030c62c 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -370,6 +370,14 @@ do { \ __pu_err; \ }) +#ifdef CONFIG_CPU_SPECTRE +/* + * When mitigating Spectre variant 1.1, all accessors need to include + * verification of the address space. + */ +#define __put_user(x, ptr) put_user(x, ptr) + +#else #define __put_user(x, ptr) \ ({ \ long __pu_err = 0; \ @@ -377,12 +385,6 @@ do { \ __pu_err; \ }) -#define __put_user_error(x, ptr, err) \ -({ \ - __put_user_switch((x), (ptr), (err), __put_user_nocheck); \ - (void) 0; \ -}) - #define __put_user_nocheck(x, __pu_ptr, __err, __size) \ do { \ unsigned long __pu_addr = (unsigned long)__pu_ptr; \ @@ -462,6 +464,7 @@ do { \ : "r" (x), "i" (-EFAULT) \ : "cc") +#endif /* !CONFIG_CPU_SPECTRE */ #ifdef CONFIG_MMU extern unsigned long __must_check From d5e5018179eada4f3d95d11fdfa1c9bd14595f0b Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 13 Feb 2019 21:10:13 -0500 Subject: [PATCH 1904/2608] ARM: 8796/1: spectre-v1,v1.1: provide helpers for address sanitization Commit afaf6838f4bc896a711180b702b388b8cfa638fc upstream. Introduce C and asm helpers to sanitize user address, taking the address range they target into account. Use asm helper for existing sanitization in __copy_from_user(). Signed-off-by: Julien Thierry Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/include/asm/assembler.h | 11 +++++++++++ arch/arm/include/asm/uaccess.h | 26 ++++++++++++++++++++++++++ arch/arm/lib/copy_from_user.S | 6 +----- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index b17ee03d280b..88286dd483ff 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -467,6 +467,17 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm + .macro uaccess_mask_range_ptr, addr:req, size:req, limit:req, tmp:req +#ifdef CONFIG_CPU_SPECTRE + sub \tmp, \limit, #1 + subs \tmp, \tmp, \addr @ tmp = limit - 1 - addr + addhs \tmp, \tmp, #1 @ if (tmp >= 0) { + subhss \tmp, \tmp, \size @ tmp = limit - (addr + size) } + movlo \addr, #0 @ if (tmp < 0) addr = NULL + csdb +#endif + .endm + .macro uaccess_disable, tmp, isb=1 #ifdef CONFIG_CPU_SW_DOMAIN_PAN /* diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index cf50b030c62c..a5807b67ca8a 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -99,6 +99,32 @@ static inline void set_fs(mm_segment_t fs) #define __inttype(x) \ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) +/* + * Sanitise a uaccess pointer such that it becomes NULL if addr+size + * is above the current addr_limit. + */ +#define uaccess_mask_range_ptr(ptr, size) \ + ((__typeof__(ptr))__uaccess_mask_range_ptr(ptr, size)) +static inline void __user *__uaccess_mask_range_ptr(const void __user *ptr, + size_t size) +{ + void __user *safe_ptr = (void __user *)ptr; + unsigned long tmp; + + asm volatile( + " sub %1, %3, #1\n" + " subs %1, %1, %0\n" + " addhs %1, %1, #1\n" + " subhss %1, %1, %2\n" + " movlo %0, #0\n" + : "+r" (safe_ptr), "=&r" (tmp) + : "r" (size), "r" (current_thread_info()->addr_limit) + : "cc"); + + csdb(); + return safe_ptr; +} + /* * Single-value transfer routines. They automatically use the right * size if we just have the right pointer type. Note that the functions diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index a826df3d3814..6709a8d33963 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -93,11 +93,7 @@ ENTRY(arm_copy_from_user) #ifdef CONFIG_CPU_SPECTRE get_thread_info r3 ldr r3, [r3, #TI_ADDR_LIMIT] - adds ip, r1, r2 @ ip=addr+size - sub r3, r3, #1 @ addr_limit - 1 - cmpcc ip, r3 @ if (addr+size > addr_limit - 1) - movcs r1, #0 @ addr = NULL - csdb + uaccess_mask_range_ptr r1, r2, r3, ip #endif #include "copy_template.S" From 27dd16a03e5dd81dc37ddae2b51add3504dff4b8 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 13 Feb 2019 21:10:14 -0500 Subject: [PATCH 1905/2608] ARM: 8797/1: spectre-v1.1: harden __copy_to_user Commit a1d09e074250fad24f1b993f327b18cc6812eb7a upstream. Sanitize user pointer given to __copy_to_user, both for standard version and memcopy version of the user accessor. Signed-off-by: Julien Thierry Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/lib/copy_to_user.S | 6 +++++- arch/arm/lib/uaccess_with_memcpy.c | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index caf5019d8161..970abe521197 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -94,6 +94,11 @@ ENTRY(__copy_to_user_std) WEAK(arm_copy_to_user) +#ifdef CONFIG_CPU_SPECTRE + get_thread_info r3 + ldr r3, [r3, #TI_ADDR_LIMIT] + uaccess_mask_range_ptr r0, r2, r3, ip +#endif #include "copy_template.S" @@ -108,4 +113,3 @@ ENDPROC(__copy_to_user_std) rsb r0, r0, r2 copy_abort_end .popsection - diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index 9b4ed1728616..73dc7360cbdd 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -152,7 +152,8 @@ arm_copy_to_user(void __user *to, const void *from, unsigned long n) n = __copy_to_user_std(to, from, n); uaccess_restore(ua_flags); } else { - n = __copy_to_user_memcpy(to, from, n); + n = __copy_to_user_memcpy(uaccess_mask_range_ptr(to, n), + from, n); } return n; } From 5c08ea767f8e5e72e5f738e58f29f5953534a34e Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 13 Feb 2019 21:10:15 -0500 Subject: [PATCH 1906/2608] ARM: 8810/1: vfp: Fix wrong assignement to ufp_exc Commit 5df7a99bdd0de4a0480320264c44c04543c29d5a upstream. In vfp_preserve_user_clear_hwstate, ufp_exc->fpinst2 gets assigned to itself. It should actually be hwstate->fpinst2 that gets assigned to the ufp_exc field. Fixes commit 3aa2df6ec2ca6bc143a65351cca4266d03a8bc41 ("ARM: 8791/1: vfp: use __copy_to_user() when saving VFP state"). Reported-by: David Binderman Signed-off-by: Julien Thierry Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/vfp/vfpmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 6bacd69c9e81..8e11223d32a1 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -579,7 +579,7 @@ int vfp_preserve_user_clear_hwstate(struct user_vfp *ufp, */ ufp_exc->fpexc = hwstate->fpexc; ufp_exc->fpinst = hwstate->fpinst; - ufp_exc->fpinst2 = ufp_exc->fpinst2; + ufp_exc->fpinst2 = hwstate->fpinst2; /* Ensure that VFP is disabled. */ vfp_flush_hwstate(thread); From ac120739812fec8cc56fc3697d79c03e94556e5b Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 13 Feb 2019 21:10:16 -0500 Subject: [PATCH 1907/2608] ARM: make lookup_processor_type() non-__init Commit 899a42f836678a595f7d2bc36a5a0c2b03d08cbc upstream. Move lookup_processor_type() out of the __init section so it is callable from (eg) the secondary startup code during hotplug. Reviewed-by: Julien Thierry Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Tested-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/kernel/head-common.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 8733012d231f..7e662bdd5cb3 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -122,6 +122,9 @@ __mmap_switched_data: .long init_thread_union + THREAD_START_SP @ sp .size __mmap_switched_data, . - __mmap_switched_data + __FINIT + .text + /* * This provides a C-API version of __lookup_processor_type */ @@ -133,9 +136,6 @@ ENTRY(lookup_processor_type) ldmfd sp!, {r4 - r6, r9, pc} ENDPROC(lookup_processor_type) - __FINIT - .text - /* * Read processor ID register (CP#15, CR0), and look up in the linker-built * supported processor list. Note that we can't use the absolute addresses From 48b874ec26b9e8c92cd0284754fe22b96c4deddd Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 13 Feb 2019 21:10:17 -0500 Subject: [PATCH 1908/2608] ARM: split out processor lookup Commit 65987a8553061515b5851b472081aedb9837a391 upstream. Split out the lookup of the processor type and associated error handling from the rest of setup_processor() - we will need to use this in the secondary CPU bringup path for big.Little Spectre variant 2 mitigation. Reviewed-by: Julien Thierry Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Tested-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/include/asm/cputype.h | 1 + arch/arm/kernel/setup.c | 35 ++++++++++++++++++++-------------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 3379c2c684c2..25d523185c6a 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -107,6 +107,7 @@ #define ARM_CPU_PART_SCORPION 0x510002d0 extern unsigned int processor_id; +struct proc_info_list *lookup_processor(u32 midr); #ifdef CONFIG_CPU_CP15 #define read_cpuid(reg) \ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 8e9a3e40d949..1a041ad4881e 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -667,21 +667,28 @@ static void __init smp_build_mpidr_hash(void) } #endif +/* + * locate processor in the list of supported processor types. The linker + * builds this table for us from the entries in arch/arm/mm/proc-*.S + */ +struct proc_info_list *lookup_processor(u32 midr) +{ + struct proc_info_list *list = lookup_processor_type(midr); + + if (!list) { + pr_err("CPU%u: configuration botched (ID %08x), CPU halted\n", + smp_processor_id(), midr); + while (1) + /* can't use cpu_relax() here as it may require MMU setup */; + } + + return list; +} + static void __init setup_processor(void) { - struct proc_info_list *list; - - /* - * locate processor in the list of supported processor - * types. The linker builds this table for us from the - * entries in arch/arm/mm/proc-*.S - */ - list = lookup_processor_type(read_cpuid_id()); - if (!list) { - pr_err("CPU configuration botched (ID %08x), unable to continue.\n", - read_cpuid_id()); - while (1); - } + unsigned int midr = read_cpuid_id(); + struct proc_info_list *list = lookup_processor(midr); cpu_name = list->cpu_name; __cpu_architecture = __get_cpu_architecture(); @@ -700,7 +707,7 @@ static void __init setup_processor(void) #endif pr_info("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n", - cpu_name, read_cpuid_id(), read_cpuid_id() & 15, + list->cpu_name, midr, midr & 15, proc_arch[cpu_architecture()], get_cr()); snprintf(init_utsname()->machine, __NEW_UTS_LEN + 1, "%s%c", From 211fcda99be34de8a605181eb9b2a9d4d23c4241 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 13 Feb 2019 21:10:18 -0500 Subject: [PATCH 1909/2608] ARM: clean up per-processor check_bugs method call Commit 945aceb1db8885d3a35790cf2e810f681db52756 upstream. Call the per-processor type check_bugs() method in the same way as we do other per-processor functions - move the "processor." detail into proc-fns.h. Reviewed-by: Julien Thierry Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Tested-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/include/asm/proc-fns.h | 1 + arch/arm/kernel/bugs.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index e25f4392e1b2..30c499146320 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -99,6 +99,7 @@ extern void cpu_do_suspend(void *); extern void cpu_do_resume(void *); #else #define cpu_proc_init processor._proc_init +#define cpu_check_bugs processor.check_bugs #define cpu_proc_fin processor._proc_fin #define cpu_reset processor.reset #define cpu_do_idle processor._do_idle diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c index 7be511310191..d41d3598e5e5 100644 --- a/arch/arm/kernel/bugs.c +++ b/arch/arm/kernel/bugs.c @@ -6,8 +6,8 @@ void check_other_bugs(void) { #ifdef MULTI_CPU - if (processor.check_bugs) - processor.check_bugs(); + if (cpu_check_bugs) + cpu_check_bugs(); #endif } From 94944687acf7d398099c3841a99553c65201df98 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 13 Feb 2019 21:10:19 -0500 Subject: [PATCH 1910/2608] ARM: add PROC_VTABLE and PROC_TABLE macros Commit e209950fdd065d2cc46e6338e47e52841b830cba upstream. Allow the way we access members of the processor vtable to be changed at compile time. We will need to move to per-CPU vtables to fix the Spectre variant 2 issues on big.Little systems. However, we have a couple of calls that do not need the vtable treatment, and indeed cause a kernel warning due to the (later) use of smp_processor_id(), so also introduce the PROC_TABLE macro for these which always use CPU 0's function pointers. Reviewed-by: Julien Thierry Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Tested-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/include/asm/proc-fns.h | 39 ++++++++++++++++++++++----------- arch/arm/kernel/setup.c | 4 +--- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index 30c499146320..c259cc49c641 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -23,7 +23,7 @@ struct mm_struct; /* * Don't change this structure - ASM code relies on it. */ -extern struct processor { +struct processor { /* MISC * get data abort address/flags */ @@ -79,9 +79,13 @@ extern struct processor { unsigned int suspend_size; void (*do_suspend)(void *); void (*do_resume)(void *); -} processor; +}; #ifndef MULTI_CPU +static inline void init_proc_vtable(const struct processor *p) +{ +} + extern void cpu_proc_init(void); extern void cpu_proc_fin(void); extern int cpu_do_idle(void); @@ -98,18 +102,27 @@ extern void cpu_reset(unsigned long addr, bool hvc) __attribute__((noreturn)); extern void cpu_do_suspend(void *); extern void cpu_do_resume(void *); #else -#define cpu_proc_init processor._proc_init -#define cpu_check_bugs processor.check_bugs -#define cpu_proc_fin processor._proc_fin -#define cpu_reset processor.reset -#define cpu_do_idle processor._do_idle -#define cpu_dcache_clean_area processor.dcache_clean_area -#define cpu_set_pte_ext processor.set_pte_ext -#define cpu_do_switch_mm processor.switch_mm -/* These three are private to arch/arm/kernel/suspend.c */ -#define cpu_do_suspend processor.do_suspend -#define cpu_do_resume processor.do_resume +extern struct processor processor; +#define PROC_VTABLE(f) processor.f +#define PROC_TABLE(f) processor.f +static inline void init_proc_vtable(const struct processor *p) +{ + processor = *p; +} + +#define cpu_proc_init PROC_VTABLE(_proc_init) +#define cpu_check_bugs PROC_VTABLE(check_bugs) +#define cpu_proc_fin PROC_VTABLE(_proc_fin) +#define cpu_reset PROC_VTABLE(reset) +#define cpu_do_idle PROC_VTABLE(_do_idle) +#define cpu_dcache_clean_area PROC_TABLE(dcache_clean_area) +#define cpu_set_pte_ext PROC_TABLE(set_pte_ext) +#define cpu_do_switch_mm PROC_VTABLE(switch_mm) + +/* These two are private to arch/arm/kernel/suspend.c */ +#define cpu_do_suspend PROC_VTABLE(do_suspend) +#define cpu_do_resume PROC_VTABLE(do_resume) #endif extern void cpu_resume(void); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 1a041ad4881e..c1588e31aa83 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -693,9 +693,7 @@ static void __init setup_processor(void) cpu_name = list->cpu_name; __cpu_architecture = __get_cpu_architecture(); -#ifdef MULTI_CPU - processor = *list->proc; -#endif + init_proc_vtable(list->proc); #ifdef MULTI_TLB cpu_tlb = *list->tlb; #endif From 89229bc38d9c98e0cde2c83a7e7e360ed1400911 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 13 Feb 2019 21:10:20 -0500 Subject: [PATCH 1911/2608] ARM: spectre-v2: per-CPU vtables to work around big.Little systems Commit 383fb3ee8024d596f488d2dbaf45e572897acbdb upstream. In big.Little systems, some CPUs require the Spectre workarounds in paths such as the context switch, but other CPUs do not. In order to handle these differences, we need per-CPU vtables. We are unable to use the kernel's per-CPU variables to support this as per-CPU is not initialised at times when we need access to the vtables, so we have to use an array indexed by logical CPU number. We use an array-of-pointers to avoid having function pointers in the kernel's read/write .data section. Reviewed-by: Julien Thierry Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Tested-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/include/asm/proc-fns.h | 23 +++++++++++++++++++++++ arch/arm/kernel/setup.c | 5 +++++ arch/arm/kernel/smp.c | 31 +++++++++++++++++++++++++++++++ arch/arm/mm/proc-v7-bugs.c | 17 ++--------------- 4 files changed, 61 insertions(+), 15 deletions(-) diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index c259cc49c641..e1b6f280ab08 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -104,12 +104,35 @@ extern void cpu_do_resume(void *); #else extern struct processor processor; +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +#include +/* + * This can't be a per-cpu variable because we need to access it before + * per-cpu has been initialised. We have a couple of functions that are + * called in a pre-emptible context, and so can't use smp_processor_id() + * there, hence PROC_TABLE(). We insist in init_proc_vtable() that the + * function pointers for these are identical across all CPUs. + */ +extern struct processor *cpu_vtable[]; +#define PROC_VTABLE(f) cpu_vtable[smp_processor_id()]->f +#define PROC_TABLE(f) cpu_vtable[0]->f +static inline void init_proc_vtable(const struct processor *p) +{ + unsigned int cpu = smp_processor_id(); + *cpu_vtable[cpu] = *p; + WARN_ON_ONCE(cpu_vtable[cpu]->dcache_clean_area != + cpu_vtable[0]->dcache_clean_area); + WARN_ON_ONCE(cpu_vtable[cpu]->set_pte_ext != + cpu_vtable[0]->set_pte_ext); +} +#else #define PROC_VTABLE(f) processor.f #define PROC_TABLE(f) processor.f static inline void init_proc_vtable(const struct processor *p) { processor = *p; } +#endif #define cpu_proc_init PROC_VTABLE(_proc_init) #define cpu_check_bugs PROC_VTABLE(check_bugs) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index c1588e31aa83..a6d27284105a 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -115,6 +115,11 @@ EXPORT_SYMBOL(elf_hwcap2); #ifdef MULTI_CPU struct processor processor __ro_after_init; +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +struct processor *cpu_vtable[NR_CPUS] = { + [0] = &processor, +}; +#endif #endif #ifdef MULTI_TLB struct cpu_tlb_fns cpu_tlb __ro_after_init; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 5e31c62127a0..f57333f46242 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -102,6 +103,30 @@ static unsigned long get_arch_pgd(pgd_t *pgd) #endif } +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +static int secondary_biglittle_prepare(unsigned int cpu) +{ + if (!cpu_vtable[cpu]) + cpu_vtable[cpu] = kzalloc(sizeof(*cpu_vtable[cpu]), GFP_KERNEL); + + return cpu_vtable[cpu] ? 0 : -ENOMEM; +} + +static void secondary_biglittle_init(void) +{ + init_proc_vtable(lookup_processor(read_cpuid_id())->proc); +} +#else +static int secondary_biglittle_prepare(unsigned int cpu) +{ + return 0; +} + +static void secondary_biglittle_init(void) +{ +} +#endif + int __cpu_up(unsigned int cpu, struct task_struct *idle) { int ret; @@ -109,6 +134,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) if (!smp_ops.smp_boot_secondary) return -ENOSYS; + ret = secondary_biglittle_prepare(cpu); + if (ret) + return ret; + /* * We need to tell the secondary core where to find * its stack and the page tables. @@ -360,6 +389,8 @@ asmlinkage void secondary_start_kernel(void) struct mm_struct *mm = &init_mm; unsigned int cpu; + secondary_biglittle_init(); + /* * The identity mapping is uncached (strongly ordered), so * switch away from it before attempting any exclusive accesses. diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 5544b82a2e7a..9a07916af8dd 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -52,8 +52,6 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A17: case ARM_CPU_PART_CORTEX_A73: case ARM_CPU_PART_CORTEX_A75: - if (processor.switch_mm != cpu_v7_bpiall_switch_mm) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = harden_branch_predictor_bpiall; spectre_v2_method = "BPIALL"; @@ -61,8 +59,6 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A15: case ARM_CPU_PART_BRAHMA_B15: - if (processor.switch_mm != cpu_v7_iciallu_switch_mm) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = harden_branch_predictor_iciallu; spectre_v2_method = "ICIALLU"; @@ -88,11 +84,9 @@ static void cpu_v7_spectre_init(void) ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) break; - if (processor.switch_mm != cpu_v7_hvc_switch_mm && cpu) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = call_hvc_arch_workaround_1; - processor.switch_mm = cpu_v7_hvc_switch_mm; + cpu_do_switch_mm = cpu_v7_hvc_switch_mm; spectre_v2_method = "hypervisor"; break; @@ -101,11 +95,9 @@ static void cpu_v7_spectre_init(void) ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) break; - if (processor.switch_mm != cpu_v7_smc_switch_mm && cpu) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = call_smc_arch_workaround_1; - processor.switch_mm = cpu_v7_smc_switch_mm; + cpu_do_switch_mm = cpu_v7_smc_switch_mm; spectre_v2_method = "firmware"; break; @@ -119,11 +111,6 @@ static void cpu_v7_spectre_init(void) if (spectre_v2_method) pr_info("CPU%u: Spectre v2: using %s workaround\n", smp_processor_id(), spectre_v2_method); - return; - -bl_error: - pr_err("CPU%u: Spectre v2: incorrect context switching function, system vulnerable\n", - cpu); } #else static void cpu_v7_spectre_init(void) From a7fb58b2c8a16c7e9435255bd95d4bf218542145 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 13 Feb 2019 21:10:21 -0500 Subject: [PATCH 1912/2608] ARM: ensure that processor vtables is not lost after boot Commit 3a4d0c2172bcf15b7a3d9d498b2b355f9864286b upstream. Marek Szyprowski reported problems with CPU hotplug in current kernels. This was tracked down to the processor vtables being located in an init section, and therefore discarded after kernel boot, despite being required after boot to properly initialise the non-boot CPUs. Arrange for these tables to end up in .rodata when required. Reported-by: Marek Szyprowski Tested-by: Krzysztof Kozlowski Fixes: 383fb3ee8024 ("ARM: spectre-v2: per-CPU vtables to work around big.Little systems") Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Tested-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/mm/proc-macros.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 81d0efb055c6..19516fbc2c55 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -274,6 +274,13 @@ .endm .macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0 +/* + * If we are building for big.Little with branch predictor hardening, + * we need the processor function tables to remain available after boot. + */ +#if 1 // defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) + .section ".rodata" +#endif .type \name\()_processor_functions, #object .align 2 ENTRY(\name\()_processor_functions) @@ -309,6 +316,9 @@ ENTRY(\name\()_processor_functions) .endif .size \name\()_processor_functions, . - \name\()_processor_functions +#if 1 // defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) + .previous +#endif .endm .macro define_cache_functions name:req From dee92bc8c2761373b88f67bba87393ac50d8ceda Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 13 Feb 2019 21:10:22 -0500 Subject: [PATCH 1913/2608] ARM: fix the cockup in the previous patch Commit d6951f582cc50ba0ad22ef46b599740966599b14 upstream. The intention in the previous patch was to only place the processor tables in the .rodata section if big.Little was being built and we wanted the branch target hardening, but instead (due to the way it was tested) it ended up always placing the tables into the .rodata section. Although harmless, let's correct this anyway. Fixes: 3a4d0c2172bc ("ARM: ensure that processor vtables is not lost after boot") Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Tested-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/mm/proc-macros.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 19516fbc2c55..5461d589a1e2 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -278,7 +278,7 @@ * If we are building for big.Little with branch predictor hardening, * we need the processor function tables to remain available after boot. */ -#if 1 // defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) .section ".rodata" #endif .type \name\()_processor_functions, #object @@ -316,7 +316,7 @@ ENTRY(\name\()_processor_functions) .endif .size \name\()_processor_functions, . - \name\()_processor_functions -#if 1 // defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) .previous #endif .endm From 785644d6731914407b87e70db00aca351a44a935 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 14 Feb 2019 11:31:17 +0100 Subject: [PATCH 1914/2608] net: create skb_gso_validate_mac_len() commit 2b16f048729bf35e6c28a40cbfad07239f9dcd90 upstream If you take a GSO skb, and split it into packets, will the MAC length (L2 + L3 + L4 headers + payload) of those packets be small enough to fit within a given length? Move skb_gso_mac_seglen() to skbuff.h with other related functions like skb_gso_network_seglen() so we can use it, and then create skb_gso_validate_mac_len to do the full calculation. Signed-off-by: Daniel Axtens Signed-off-by: David S. Miller [jwang: cherry pick for CVE-2018-1000026] Signed-off-by: Jack Wang Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 16 +++++++++ net/core/skbuff.c | 73 +++++++++++++++++++++++++++++++----------- net/sched/sch_tbf.c | 10 ------ 3 files changed, 71 insertions(+), 28 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 39c2570ddcf6..50a4a5968f3a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3317,6 +3317,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu); +bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, int write_len); @@ -4087,6 +4088,21 @@ static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb) return hdr_len + skb_gso_transport_seglen(skb); } +/** + * skb_gso_mac_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_mac_seglen is used to determine the real size of the + * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4 + * headers (TCP/UDP). + */ +static inline unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + return hdr_len + skb_gso_transport_seglen(skb); +} + /* Local Checksum Offload. * Compute outer checksum based on the assumption that the * inner checksum will be offloaded later. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 873032d1a083..6dbd2c54b2c9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4929,6 +4929,45 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) } EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); +/** + * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS + * + * There are a couple of instances where we have a GSO skb, and we + * want to determine what size it would be after it is segmented. + * + * We might want to check: + * - L3+L4+payload size (e.g. IP forwarding) + * - L2+L3+L4+payload size (e.g. sanity check before passing to driver) + * + * This is a helper to do that correctly considering GSO_BY_FRAGS. + * + * @seg_len: The segmented length (from skb_gso_*_seglen). In the + * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS]. + * + * @max_len: The maximum permissible length. + * + * Returns true if the segmented length <= max length. + */ +static inline bool skb_gso_size_check(const struct sk_buff *skb, + unsigned int seg_len, + unsigned int max_len) { + const struct skb_shared_info *shinfo = skb_shinfo(skb); + const struct sk_buff *iter; + + if (shinfo->gso_size != GSO_BY_FRAGS) + return seg_len <= max_len; + + /* Undo this so we can re-use header sizes */ + seg_len -= GSO_BY_FRAGS; + + skb_walk_frags(skb, iter) { + if (seg_len + skb_headlen(iter) > max_len) + return false; + } + + return true; +} + /** * skb_gso_validate_mtu - Return in case such skb fits a given MTU * @@ -4940,27 +4979,25 @@ EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); */ bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu) { - const struct skb_shared_info *shinfo = skb_shinfo(skb); - const struct sk_buff *iter; - unsigned int hlen; - - hlen = skb_gso_network_seglen(skb); - - if (shinfo->gso_size != GSO_BY_FRAGS) - return hlen <= mtu; - - /* Undo this so we can re-use header sizes */ - hlen -= GSO_BY_FRAGS; - - skb_walk_frags(skb, iter) { - if (hlen + skb_headlen(iter) > mtu) - return false; - } - - return true; + return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu); } EXPORT_SYMBOL_GPL(skb_gso_validate_mtu); +/** + * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length? + * + * @skb: GSO skb + * @len: length to validate against + * + * skb_gso_validate_mac_len validates if a given skb will fit a wanted + * length once split, including L2, L3 and L4 headers and the payload. + */ +bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len) +{ + return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len); +} +EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); + static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { int mac_len; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index b36ecb58aa6e..107cc76b6e24 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -142,16 +142,6 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r, return len; } -/* - * Return length of individual segments of a gso packet, - * including all headers (MAC, IP, TCP/UDP) - */ -static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) -{ - unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); - return hdr_len + skb_gso_transport_seglen(skb); -} - /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ From 60cd31866de4386d940e55073491c3ee17ca593e Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 14 Feb 2019 11:31:18 +0100 Subject: [PATCH 1915/2608] bnx2x: disable GSO where gso_size is too big for hardware commit 8914a595110a6eca69a5e275b323f5d09e18f4f9 upstream If a bnx2x card is passed a GSO packet with a gso_size larger than ~9700 bytes, it will cause a firmware error that will bring the card down: bnx2x: [bnx2x_attn_int_deasserted3:4323(enP24p1s0f0)]MC assert! bnx2x: [bnx2x_mc_assert:720(enP24p1s0f0)]XSTORM_ASSERT_LIST_INDEX 0x2 bnx2x: [bnx2x_mc_assert:736(enP24p1s0f0)]XSTORM_ASSERT_INDEX 0x0 = 0x00000000 0x25e43e47 0x00463e01 0x00010052 bnx2x: [bnx2x_mc_assert:750(enP24p1s0f0)]Chip Revision: everest3, FW Version: 7_13_1 ... (dump of values continues) ... Detect when the mac length of a GSO packet is greater than the maximum packet size (9700 bytes) and disable GSO. Signed-off-by: Daniel Axtens Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller [jwang: cherry pick for CVE-2018-1000026] Signed-off-by: Jack Wang Signed-off-by: Sasha Levin --- .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 022b06e770d1..41ac9a2bc153 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12978,6 +12978,24 @@ static netdev_features_t bnx2x_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { + /* + * A skb with gso_size + header length > 9700 will cause a + * firmware panic. Drop GSO support. + * + * Eventually the upper layer should not pass these packets down. + * + * For speed, if the gso_size is <= 9000, assume there will + * not be 700 bytes of headers and pass it through. Only do a + * full (slow) validation if the gso_size is > 9000. + * + * (Due to the way SKB_BY_FRAGS works this will also do a full + * validation in that case.) + */ + if (unlikely(skb_is_gso(skb) && + (skb_shinfo(skb)->gso_size > 9000) && + !skb_gso_validate_mac_len(skb, 9700))) + features &= ~NETIF_F_GSO_MASK; + features = vlan_features_check(skb, features); return vxlan_features_check(skb, features); } From bf56a48cb3f93f79882b250163878a231fc72fc1 Mon Sep 17 00:00:00 2001 From: Chao Fan Date: Wed, 26 Dec 2018 11:34:50 +0800 Subject: [PATCH 1916/2608] ACPI: NUMA: Use correct type for printing addresses on i386-PAE [ Upstream commit b9ced18acf68dffebe6888c7ec765a2b1db7a039 ] The addresses of NUMA nodes are not printed correctly on i386-PAE which is misleading. Here is a debian9-32bit with PAE in a QEMU guest having more than 4G of memory: qemu-system-i386 \ -hda /var/lib/libvirt/images/debian32.qcow2 \ -m 5G \ -enable-kvm \ -smp 10 \ -numa node,mem=512M,nodeid=0,cpus=0 \ -numa node,mem=512M,nodeid=1,cpus=1 \ -numa node,mem=512M,nodeid=2,cpus=2 \ -numa node,mem=512M,nodeid=3,cpus=3 \ -numa node,mem=512M,nodeid=4,cpus=4 \ -numa node,mem=512M,nodeid=5,cpus=5 \ -numa node,mem=512M,nodeid=6,cpus=6 \ -numa node,mem=512M,nodeid=7,cpus=7 \ -numa node,mem=512M,nodeid=8,cpus=8 \ -numa node,mem=512M,nodeid=9,cpus=9 \ -serial stdio Because of the wrong value type, it prints as below: [ 0.021049] ACPI: SRAT Memory (0x0 length 0xa0000) in proximity domain 0 enabled [ 0.021740] ACPI: SRAT Memory (0x100000 length 0x1ff00000) in proximity domain 0 enabled [ 0.022425] ACPI: SRAT Memory (0x20000000 length 0x20000000) in proximity domain 1 enabled [ 0.023092] ACPI: SRAT Memory (0x40000000 length 0x20000000) in proximity domain 2 enabled [ 0.023764] ACPI: SRAT Memory (0x60000000 length 0x20000000) in proximity domain 3 enabled [ 0.024431] ACPI: SRAT Memory (0x80000000 length 0x20000000) in proximity domain 4 enabled [ 0.025104] ACPI: SRAT Memory (0xa0000000 length 0x20000000) in proximity domain 5 enabled [ 0.025791] ACPI: SRAT Memory (0x0 length 0x20000000) in proximity domain 6 enabled [ 0.026412] ACPI: SRAT Memory (0x20000000 length 0x20000000) in proximity domain 7 enabled [ 0.027118] ACPI: SRAT Memory (0x40000000 length 0x20000000) in proximity domain 8 enabled [ 0.027802] ACPI: SRAT Memory (0x60000000 length 0x20000000) in proximity domain 9 enabled The upper half of the start address of the NUMA domains between 6 and 9 inclusive was cut, so the printed values are incorrect. Fix the value type, to get the correct values in the log as follows: [ 0.023698] ACPI: SRAT Memory (0x0 length 0xa0000) in proximity domain 0 enabled [ 0.024325] ACPI: SRAT Memory (0x100000 length 0x1ff00000) in proximity domain 0 enabled [ 0.024981] ACPI: SRAT Memory (0x20000000 length 0x20000000) in proximity domain 1 enabled [ 0.025659] ACPI: SRAT Memory (0x40000000 length 0x20000000) in proximity domain 2 enabled [ 0.026317] ACPI: SRAT Memory (0x60000000 length 0x20000000) in proximity domain 3 enabled [ 0.026980] ACPI: SRAT Memory (0x80000000 length 0x20000000) in proximity domain 4 enabled [ 0.027635] ACPI: SRAT Memory (0xa0000000 length 0x20000000) in proximity domain 5 enabled [ 0.028311] ACPI: SRAT Memory (0x100000000 length 0x20000000) in proximity domain 6 enabled [ 0.028985] ACPI: SRAT Memory (0x120000000 length 0x20000000) in proximity domain 7 enabled [ 0.029667] ACPI: SRAT Memory (0x140000000 length 0x20000000) in proximity domain 8 enabled [ 0.030334] ACPI: SRAT Memory (0x160000000 length 0x20000000) in proximity domain 9 enabled Signed-off-by: Chao Fan [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/numa.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 8fb74d9011da..a7907b58562a 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -147,9 +147,9 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header) { struct acpi_srat_mem_affinity *p = (struct acpi_srat_mem_affinity *)header; - pr_debug("SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s%s\n", - (unsigned long)p->base_address, - (unsigned long)p->length, + pr_debug("SRAT Memory (0x%llx length 0x%llx) in proximity domain %d %s%s%s\n", + (unsigned long long)p->base_address, + (unsigned long long)p->length, p->proximity_domain, (p->flags & ACPI_SRAT_MEM_ENABLED) ? "enabled" : "disabled", From e5d94a1a1f462fd78964a6de22bb5d34f6d6c1c5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Jan 2019 15:10:00 -0300 Subject: [PATCH 1917/2608] perf test shell: Use a fallback to get the pathname in vfs_getname [ Upstream commit 03fa483821c0b4db7c2b1453d3332f397d82313f ] Some kernels, like 4.19.13-300.fc29.x86_64 in fedora 29, fail with the existing probe definition asking for the contents of result->name, working when we ask for the 'filename' variable instead, so add a fallback to that. Now those tests are back working on fedora 29 systems with that kernel: # perf test vfs_getname 65: Use vfs_getname probe to get syscall args filenames : Ok 66: Add vfs_getname probe to get syscall args filenames : Ok 67: Check open filename arg using perf trace + vfs_getname: Ok # Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-klt3n0i58dfqttveti09q3fi@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/shell/lib/probe_vfs_getname.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh index 30a950c9d407..068d463e5cbf 100644 --- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh @@ -13,7 +13,8 @@ add_probe_vfs_getname() { local verbose=$1 if [ $had_vfs_getname -eq 1 ] ; then line=$(perf probe -L getname_flags 2>&1 | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/') - perf probe $verbose "vfs_getname=getname_flags:${line} pathname=result->name:string" + perf probe -q "vfs_getname=getname_flags:${line} pathname=result->name:string" || \ + perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:string" fi } From 085d26511fae506765d004bc5c8a54daaa129c2a Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 7 Jan 2019 18:51:53 +0000 Subject: [PATCH 1918/2608] cpufreq: check if policy is inactive early in __cpufreq_get() [ Upstream commit 2f66196208c98b3d1b4294edffb2c5a8197be899 ] cpuinfo_cur_freq gets current CPU frequency as detected by hardware while scaling_cur_freq last known CPU frequency. Some platforms may not allow checking the CPU frequency of an offline CPU or the associated resources may have been released via cpufreq_exit when the CPU gets offlined, in which case the policy would have been invalidated already. If we attempt to get current frequency from the hardware, it may result in hang or crash. For example on Juno, I see: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000188 [0000000000000188] pgd=0000000000000000 Internal error: Oops: 96000004 [#1] PREEMPT SMP Modules linked in: CPU: 5 PID: 4202 Comm: cat Not tainted 4.20.0-08251-ga0f2c0318a15-dirty #87 Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform pstate: 40000005 (nZcv daif -PAN -UAO) pc : scmi_cpufreq_get_rate+0x34/0xb0 lr : scmi_cpufreq_get_rate+0x34/0xb0 Call trace: scmi_cpufreq_get_rate+0x34/0xb0 __cpufreq_get+0x34/0xc0 show_cpuinfo_cur_freq+0x24/0x78 show+0x40/0x60 sysfs_kf_seq_show+0xc0/0x148 kernfs_seq_show+0x44/0x50 seq_read+0xd4/0x480 kernfs_fop_read+0x15c/0x208 __vfs_read+0x60/0x188 vfs_read+0x94/0x150 ksys_read+0x6c/0xd8 __arm64_sys_read+0x24/0x30 el0_svc_common+0x78/0x100 el0_svc_handler+0x38/0x78 el0_svc+0x8/0xc ---[ end trace 3d1024e58f77f6b2 ]--- So fix the issue by checking if the policy is invalid early in __cpufreq_get before attempting to get the current frequency. Signed-off-by: Sudeep Holla Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpufreq/cpufreq.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 93754300cb57..66c2790dcc5f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1523,17 +1523,16 @@ static unsigned int __cpufreq_get(struct cpufreq_policy *policy) { unsigned int ret_freq = 0; - if (!cpufreq_driver->get) + if (unlikely(policy_is_inactive(policy)) || !cpufreq_driver->get) return ret_freq; ret_freq = cpufreq_driver->get(policy->cpu); /* - * Updating inactive policies is invalid, so avoid doing that. Also - * if fast frequency switching is used with the given policy, the check + * If fast frequency switching is used with the given policy, the check * against policy->cur is pointless, so skip it in that case too. */ - if (unlikely(policy_is_inactive(policy)) || policy->fast_switch_enabled) + if (policy->fast_switch_enabled) return ret_freq; if (ret_freq && policy->cur && @@ -1562,10 +1561,7 @@ unsigned int cpufreq_get(unsigned int cpu) if (policy) { down_read(&policy->rwsem); - - if (!policy_is_inactive(policy)) - ret_freq = __cpufreq_get(policy); - + ret_freq = __cpufreq_get(policy); up_read(&policy->rwsem); cpufreq_cpu_put(policy); From b0b8a4e8d7afa633274835740fe8d488a14ab320 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 3 Jan 2019 13:59:49 +0200 Subject: [PATCH 1919/2608] drm/bridge: tc358767: add defines for DP1_SRCCTRL & PHY_2LANE [ Upstream commit adf4109896bbee27fd2ac3b48d22d6a0062fe517 ] DP1_SRCCTRL register and PHY_2LANE field did not have matching defines. Add these. Signed-off-by: Tomi Valkeinen Reviewed-by: Andrzej Hajda Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20190103115954.12785-3-tomi.valkeinen@ti.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/tc358767.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 8636e7eeb731..e697c7c6ca52 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -140,6 +140,8 @@ #define DP0_LTLOOPCTRL 0x06d8 #define DP0_SNKLTCTRL 0x06e4 +#define DP1_SRCCTRL 0x07a0 + /* PHY */ #define DP_PHY_CTRL 0x0800 #define DP_PHY_RST BIT(28) /* DP PHY Global Soft Reset */ @@ -148,6 +150,7 @@ #define PHY_M1_RST BIT(12) /* Reset PHY1 Main Channel */ #define PHY_RDY BIT(16) /* PHY Main Channels Ready */ #define PHY_M0_RST BIT(8) /* Reset PHY0 Main Channel */ +#define PHY_2LANE BIT(2) /* PHY Enable 2 lanes */ #define PHY_A0_EN BIT(1) /* PHY Aux Channel0 Enable */ #define PHY_M0_EN BIT(0) /* PHY Main Channel0 Enable */ @@ -562,7 +565,7 @@ static int tc_aux_link_setup(struct tc_data *tc) value |= SYSCLK_SEL_LSCLK | LSCLK_DIV_2; tc_write(SYS_PLLPARAM, value); - tc_write(DP_PHY_CTRL, BGREN | PWR_SW_EN | BIT(2) | PHY_A0_EN); + tc_write(DP_PHY_CTRL, BGREN | PWR_SW_EN | PHY_2LANE | PHY_A0_EN); /* * Initially PLLs are in bypass. Force PLL parameter update, @@ -832,7 +835,7 @@ static int tc_main_link_setup(struct tc_data *tc) DP0_SRCCTRL_LANESKEW | DP0_SRCCTRL_LANES_2 | DP0_SRCCTRL_BW27 | DP0_SRCCTRL_AUTOCORRECT); /* from excel file - DP1_SrcCtrl */ - tc_write(0x07a0, 0x00003083); + tc_write(DP1_SRCCTRL, 0x00003083); rate = clk_get_rate(tc->refclk); switch (rate) { @@ -853,8 +856,9 @@ static int tc_main_link_setup(struct tc_data *tc) } value |= SYSCLK_SEL_LSCLK | LSCLK_DIV_2; tc_write(SYS_PLLPARAM, value); + /* Setup Main Link */ - dp_phy_ctrl = BGREN | PWR_SW_EN | BIT(2) | PHY_A0_EN | PHY_M0_EN; + dp_phy_ctrl = BGREN | PWR_SW_EN | PHY_2LANE | PHY_A0_EN | PHY_M0_EN; tc_write(DP_PHY_CTRL, dp_phy_ctrl); msleep(100); From 0fa1a5d8720a196d78a9ff14dba5eee9d45f3436 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 3 Jan 2019 13:59:50 +0200 Subject: [PATCH 1920/2608] drm/bridge: tc358767: fix single lane configuration [ Upstream commit 4d9d54a730434cc068dd3515ba6116697196f77b ] PHY_2LANE bit is always set in DP_PHY_CTRL, breaking 1 lane use. Set PHY_2LANE only when 2 lanes are used. Signed-off-by: Tomi Valkeinen Reviewed-by: Andrzej Hajda Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20190103115954.12785-4-tomi.valkeinen@ti.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/tc358767.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index e697c7c6ca52..acac2c1769ad 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -541,6 +541,7 @@ static int tc_aux_link_setup(struct tc_data *tc) unsigned long rate; u32 value; int ret; + u32 dp_phy_ctrl; rate = clk_get_rate(tc->refclk); switch (rate) { @@ -565,7 +566,10 @@ static int tc_aux_link_setup(struct tc_data *tc) value |= SYSCLK_SEL_LSCLK | LSCLK_DIV_2; tc_write(SYS_PLLPARAM, value); - tc_write(DP_PHY_CTRL, BGREN | PWR_SW_EN | PHY_2LANE | PHY_A0_EN); + dp_phy_ctrl = BGREN | PWR_SW_EN | PHY_A0_EN; + if (tc->link.base.num_lanes == 2) + dp_phy_ctrl |= PHY_2LANE; + tc_write(DP_PHY_CTRL, dp_phy_ctrl); /* * Initially PLLs are in bypass. Force PLL parameter update, @@ -858,7 +862,9 @@ static int tc_main_link_setup(struct tc_data *tc) tc_write(SYS_PLLPARAM, value); /* Setup Main Link */ - dp_phy_ctrl = BGREN | PWR_SW_EN | PHY_2LANE | PHY_A0_EN | PHY_M0_EN; + dp_phy_ctrl = BGREN | PWR_SW_EN | PHY_A0_EN | PHY_M0_EN; + if (tc->link.base.num_lanes == 2) + dp_phy_ctrl |= PHY_2LANE; tc_write(DP_PHY_CTRL, dp_phy_ctrl); msleep(100); From bfac24ce3362511245d1db4ac54f9edd9cfc8419 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 3 Jan 2019 13:59:51 +0200 Subject: [PATCH 1921/2608] drm/bridge: tc358767: fix initial DP0/1_SRCCTRL value [ Upstream commit 9a63bd6fe1b5590ffa42ae2ed22ee21363293e31 ] Initially DP0_SRCCTRL is set to a static value which includes DP0_SRCCTRL_LANES_2 and DP0_SRCCTRL_BW27, even when only 1 lane of 1.62Gbps speed is used. DP1_SRCCTRL is configured to a magic number. This patch changes the configuration as follows: Configure DP0_SRCCTRL by using tc_srcctrl() which provides the correct value. DP1_SRCCTRL needs two bits to be set to the same value as DP0_SRCCTRL: SSCG and BW27. All other bits can be zero. Signed-off-by: Tomi Valkeinen Reviewed-by: Andrzej Hajda Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20190103115954.12785-5-tomi.valkeinen@ti.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/tc358767.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index acac2c1769ad..24bb6bfa36f3 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -834,12 +834,11 @@ static int tc_main_link_setup(struct tc_data *tc) if (!tc->mode) return -EINVAL; - /* from excel file - DP0_SrcCtrl */ - tc_write(DP0_SRCCTRL, DP0_SRCCTRL_SCRMBLDIS | DP0_SRCCTRL_EN810B | - DP0_SRCCTRL_LANESKEW | DP0_SRCCTRL_LANES_2 | - DP0_SRCCTRL_BW27 | DP0_SRCCTRL_AUTOCORRECT); - /* from excel file - DP1_SrcCtrl */ - tc_write(DP1_SRCCTRL, 0x00003083); + tc_write(DP0_SRCCTRL, tc_srcctrl(tc)); + /* SSCG and BW27 on DP1 must be set to the same as on DP0 */ + tc_write(DP1_SRCCTRL, + (tc->link.spread ? DP0_SRCCTRL_SSCG : 0) | + ((tc->link.base.rate != 162000) ? DP0_SRCCTRL_BW27 : 0)); rate = clk_get_rate(tc->refclk); switch (rate) { From 9a5bce69646dd7c2d2c2977d19de3a6c0e45db05 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 3 Jan 2019 13:59:52 +0200 Subject: [PATCH 1922/2608] drm/bridge: tc358767: reject modes which require too much BW [ Upstream commit 51b9e62eb6950c762162ab7eb8390990179be067 ] The current driver accepts any videomode with pclk < 154MHz. This is not correct, as with 1 lane and/or 1.62Mbps speed not all videomodes can be supported. Add code to reject modes that require more bandwidth that is available. Signed-off-by: Tomi Valkeinen Reviewed-by: Andrzej Hajda Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20190103115954.12785-6-tomi.valkeinen@ti.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/tc358767.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 24bb6bfa36f3..792a2548d3bb 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -1112,10 +1112,20 @@ static bool tc_bridge_mode_fixup(struct drm_bridge *bridge, static int tc_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct tc_data *tc = connector_to_tc(connector); + u32 req, avail; + u32 bits_per_pixel = 24; + /* DPI interface clock limitation: upto 154 MHz */ if (mode->clock > 154000) return MODE_CLOCK_HIGH; + req = mode->clock * bits_per_pixel / 8; + avail = tc->link.base.num_lanes * tc->link.base.rate; + + if (req > avail) + return MODE_BAD; + return MODE_OK; } From a928c675cf0f2aa14bf4761b94089a02179f0375 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 3 Jan 2019 13:59:53 +0200 Subject: [PATCH 1923/2608] drm/bridge: tc358767: fix output H/V syncs [ Upstream commit 7923e09c7a766e2d58de7fc395bb84c18e5bc625 ] The H and V syncs of the DP output are always set to active high. This patch fixes the syncs by configuring them according to the videomode. Signed-off-by: Tomi Valkeinen Reviewed-by: Andrzej Hajda Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20190103115954.12785-7-tomi.valkeinen@ti.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/tc358767.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 792a2548d3bb..6eebd8ad0c52 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -96,6 +96,8 @@ #define DP0_STARTVAL 0x064c #define DP0_ACTIVEVAL 0x0650 #define DP0_SYNCVAL 0x0654 +#define SYNCVAL_HS_POL_ACTIVE_LOW (1 << 15) +#define SYNCVAL_VS_POL_ACTIVE_LOW (1 << 31) #define DP0_MISC 0x0658 #define TU_SIZE_RECOMMENDED (63) /* LSCLK cycles per TU */ #define BPC_6 (0 << 5) @@ -724,7 +726,9 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) tc_write(DP0_ACTIVEVAL, (mode->vdisplay << 16) | (mode->hdisplay)); - tc_write(DP0_SYNCVAL, (vsync_len << 16) | (hsync_len << 0)); + tc_write(DP0_SYNCVAL, (vsync_len << 16) | (hsync_len << 0) | + ((mode->flags & DRM_MODE_FLAG_NHSYNC) ? SYNCVAL_HS_POL_ACTIVE_LOW : 0) | + ((mode->flags & DRM_MODE_FLAG_NVSYNC) ? SYNCVAL_VS_POL_ACTIVE_LOW : 0)); tc_write(DPIPXLFMT, VS_POL_ACTIVE_LOW | HS_POL_ACTIVE_LOW | DE_POL_ACTIVE_HIGH | SUB_CFG_TYPE_CONFIG1 | DPI_BPP_RGB888); From 90ad1dc9c66c5b70346eef30be1ed3a407578a1b Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Sat, 29 Dec 2018 17:23:43 +0000 Subject: [PATCH 1924/2608] nvme-pci: use the same attributes when freeing host_mem_desc_bufs. [ Upstream commit cc667f6d5de023ee131e96bb88e5cddca23272bd ] When using HMB the PCIe host driver allocates host_mem_desc_bufs using dma_alloc_attrs() but frees them using dma_free_coherent(). Use the correct dma_free_attrs() function to free the buffers. Signed-off-by: Liviu Dudau Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index afb99876fa9e..06355ca832db 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1624,8 +1624,9 @@ static void nvme_free_host_mem(struct nvme_dev *dev) struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i]; size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size; - dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i], - le64_to_cpu(desc->addr)); + dma_free_attrs(dev->dev, size, dev->host_mem_desc_bufs[i], + le64_to_cpu(desc->addr), + DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN); } kfree(dev->host_mem_desc_bufs); @@ -1691,8 +1692,9 @@ out_free_bufs: while (--i >= 0) { size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size; - dma_free_coherent(dev->dev, size, bufs[i], - le64_to_cpu(descs[i].addr)); + dma_free_attrs(dev->dev, size, bufs[i], + le64_to_cpu(descs[i].addr), + DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN); } kfree(bufs); From b0a3cda76a510f7f53790e3c47d37fc3eb81bbc6 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 19 Dec 2018 13:47:24 +0200 Subject: [PATCH 1925/2608] ARM: dts: da850-evm: Correct the sound card name [ Upstream commit 7fca69d4e43fa1ae9cb4f652772c132dc5a659c6 ] To avoid the following error: asoc-simple-card sound: ASoC: Failed to create card debugfs directory Which is because the card name contains '/' character, which can not be used in file or directory names. Signed-off-by: Peter Ujfalusi Signed-off-by: Sekhar Nori Signed-off-by: Sasha Levin --- arch/arm/boot/dts/da850-evm.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/da850-evm.dts b/arch/arm/boot/dts/da850-evm.dts index c75507922f7d..f5902bd1a972 100644 --- a/arch/arm/boot/dts/da850-evm.dts +++ b/arch/arm/boot/dts/da850-evm.dts @@ -169,7 +169,7 @@ sound { compatible = "simple-audio-card"; - simple-audio-card,name = "DA850/OMAP-L138 EVM"; + simple-audio-card,name = "DA850-OMAPL138 EVM"; simple-audio-card,widgets = "Line", "Line In", "Line", "Line Out"; From e4e11ef8e039660a63e38da3e8f33eaba728faf5 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 19 Dec 2018 13:47:26 +0200 Subject: [PATCH 1926/2608] ARM: dts: da850-lcdk: Correct the sound card name [ Upstream commit c25748acc5c20786ecb7518bfeae8fcef93472d6 ] To avoid the following error: asoc-simple-card sound: ASoC: Failed to create card debugfs directory Which is because the card name contains '/' character, which can not be used in file or directory names. Signed-off-by: Peter Ujfalusi Signed-off-by: Sekhar Nori Signed-off-by: Sasha Levin --- arch/arm/boot/dts/da850-lcdk.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/da850-lcdk.dts b/arch/arm/boot/dts/da850-lcdk.dts index a0f0916156e6..c9d4cb212b72 100644 --- a/arch/arm/boot/dts/da850-lcdk.dts +++ b/arch/arm/boot/dts/da850-lcdk.dts @@ -28,7 +28,7 @@ sound { compatible = "simple-audio-card"; - simple-audio-card,name = "DA850/OMAP-L138 LCDK"; + simple-audio-card,name = "DA850-OMAPL138 LCDK"; simple-audio-card,widgets = "Line", "Line In", "Line", "Line Out"; From ebfc274e51c507e32a6c20822becccd7d5b45f24 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 8 Jan 2019 00:08:18 +0100 Subject: [PATCH 1927/2608] ARM: dts: kirkwood: Fix polarity of GPIO fan lines [ Upstream commit b5f034845e70916fd33e172fad5ad530a29c10ab ] These two lines are active high, not active low. The bug was found when we changed the kernel to respect the polarity defined in the device tree. Fixes: 1b90e06b1429 ("ARM: kirkwood: Use devicetree to define DNS-32[05] fan") Cc: Jamie Lentin Cc: Guenter Roeck Cc: Jason Cooper Cc: Andrew Lunn Cc: Gregory Clement Cc: Sebastian Hesselbarth Cc: Julien D'Ascenzio Reviewed-by: Andrew Lunn Tested-by: Jamie Lentin Reported-by: Julien D'Ascenzio Tested-by: Julien D'Ascenzio Signed-off-by: Linus Walleij Signed-off-by: Gregory CLEMENT Signed-off-by: Sasha Levin --- arch/arm/boot/dts/kirkwood-dnskw.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/kirkwood-dnskw.dtsi b/arch/arm/boot/dts/kirkwood-dnskw.dtsi index cbaf06f2f78e..eb917462b219 100644 --- a/arch/arm/boot/dts/kirkwood-dnskw.dtsi +++ b/arch/arm/boot/dts/kirkwood-dnskw.dtsi @@ -36,8 +36,8 @@ compatible = "gpio-fan"; pinctrl-0 = <&pmx_fan_high_speed &pmx_fan_low_speed>; pinctrl-names = "default"; - gpios = <&gpio1 14 GPIO_ACTIVE_LOW - &gpio1 13 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 14 GPIO_ACTIVE_HIGH + &gpio1 13 GPIO_ACTIVE_HIGH>; gpio-fan,speed-map = <0 0 3000 1 6000 2>; From 10c96be398e95e3a8fe0e6b42eeb85883b86b75e Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Sat, 1 Dec 2018 12:57:18 +0100 Subject: [PATCH 1928/2608] gpio: pl061: handle failed allocations [ Upstream commit df209c43a0e8258e096fb722dfbdae4f0dd13fde ] devm_kzalloc(), devm_kstrdup() and devm_kasprintf() all can fail internal allocation and return NULL. Using any of the assigned objects without checking is not safe. As this is early in the boot phase and these allocations really should not fail, any failure here is probably an indication of a more serious issue so it makes little sense to try and rollback the previous allocated resources or try to continue; but rather the probe function is simply exited with -ENOMEM. Signed-off-by: Nicholas Mc Guire Fixes: 684284b64aae ("ARM: integrator: add MMCI device to IM-PD1") Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- arch/arm/mach-integrator/impd1.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c index a109f6482413..0f916c245a2e 100644 --- a/arch/arm/mach-integrator/impd1.c +++ b/arch/arm/mach-integrator/impd1.c @@ -393,7 +393,11 @@ static int __ref impd1_probe(struct lm_device *dev) sizeof(*lookup) + 3 * sizeof(struct gpiod_lookup), GFP_KERNEL); chipname = devm_kstrdup(&dev->dev, devname, GFP_KERNEL); - mmciname = kasprintf(GFP_KERNEL, "lm%x:00700", dev->id); + mmciname = devm_kasprintf(&dev->dev, GFP_KERNEL, + "lm%x:00700", dev->id); + if (!lookup || !chipname || !mmciname) + return -ENOMEM; + lookup->dev_id = mmciname; /* * Offsets on GPIO block 1: From 7ed4f5c85ea28aab8af27f3ed95139ecc0ce4809 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 12 Sep 2018 12:58:43 +0200 Subject: [PATCH 1929/2608] drm/nouveau: Don't disable polling in fallback mode [ Upstream commit 118780066e30c34de3d9349710b51780bfa0ba83 ] When a fan is controlled via linear fallback without cstate, we shouldn't stop polling. Otherwise it won't be adjusted again and keeps running at an initial crazy pace. Fixes: 800efb4c2857 ("drm/nouveau/drm/therm/fan: add a fallback if no fan control is specified in the vbios") Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1103356 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107447 Reported-by: Thomas Blume Signed-off-by: Takashi Iwai Reviewed-by: Martin Peres Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c index 952a7cb0a59a..692d4d96766a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c @@ -131,11 +131,12 @@ nvkm_therm_update(struct nvkm_therm *therm, int mode) duty = nvkm_therm_update_linear(therm); break; case NVBIOS_THERM_FAN_OTHER: - if (therm->cstate) + if (therm->cstate) { duty = therm->cstate; - else + poll = false; + } else { duty = nvkm_therm_update_linear_fallback(therm); - poll = false; + } break; } immd = false; From 56c3bf188da8367ee3a5cddfc2dd89c1f847da1e Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Thu, 13 Dec 2018 22:44:08 -0500 Subject: [PATCH 1930/2608] drm/nouveau/falcon: avoid touching registers if engine is off [ Upstream commit a5176a4cb85bb6213daadf691097cf411da35df2 ] Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108980 Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nvkm/engine/falcon.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c b/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c index 2e7b4e2105ef..62cb376e2c01 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -107,8 +108,10 @@ nvkm_falcon_fini(struct nvkm_engine *engine, bool suspend) } } - nvkm_mask(device, base + 0x048, 0x00000003, 0x00000000); - nvkm_wr32(device, base + 0x014, 0xffffffff); + if (nvkm_mc_enabled(device, engine->subdev.index)) { + nvkm_mask(device, base + 0x048, 0x00000003, 0x00000000); + nvkm_wr32(device, base + 0x014, 0xffffffff); + } return 0; } From 682fb20df442ebbfa2570606b62c9e9308ea1e3a Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Tue, 8 Jan 2019 18:30:56 +0000 Subject: [PATCH 1931/2608] cifs: Limit memory used by lock request calls to a page [ Upstream commit 92a8109e4d3a34fb6b115c9098b51767dc933444 ] The code tries to allocate a contiguous buffer with a size supplied by the server (maxBuf). This could fail if memory is fragmented since it results in high order allocations for commonly used server implementations. It is also wasteful since there are probably few locks in the usual case. Limit the buffer to be no larger than a page to avoid memory allocation failures due to fragmentation. Signed-off-by: Ross Lagerwall Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/file.c | 8 ++++++++ fs/cifs/smb2file.c | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 1e176e11dbfa..852d7d1dcbbd 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1128,6 +1128,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) return -EINVAL; } + BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > + PAGE_SIZE); + max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), + PAGE_SIZE); max_num = (max_buf - sizeof(struct smb_hdr)) / sizeof(LOCKING_ANDX_RANGE); buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); @@ -1466,6 +1470,10 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) return -EINVAL; + BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > + PAGE_SIZE); + max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), + PAGE_SIZE); max_num = (max_buf - sizeof(struct smb_hdr)) / sizeof(LOCKING_ANDX_RANGE); buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 79078533f807..1add404618f0 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -130,6 +130,8 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, if (max_buf < sizeof(struct smb2_lock_element)) return -EINVAL; + BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE); + max_buf = min_t(unsigned int, max_buf, PAGE_SIZE); max_num = max_buf / sizeof(struct smb2_lock_element); buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL); if (!buf) @@ -266,6 +268,8 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile) return -EINVAL; } + BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE); + max_buf = min_t(unsigned int, max_buf, PAGE_SIZE); max_num = max_buf / sizeof(struct smb2_lock_element); buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL); if (!buf) { From 7127088830da7d8fe7f3892ec3ebf20e6731e049 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 11 Feb 2019 14:32:40 -0800 Subject: [PATCH 1932/2608] Revert "Input: elan_i2c - add ACPI ID for touchpad in ASUS Aspire F5-573G" commit f420c54e4b12c1361c6ed313002ee7bd7ac58362 upstream. This reverts commit 7db54c89f0b30a101584e09d3729144e6170059d as it breaks Acer Aspire V-371 and other devices. According to Elan: "Acer Aspire F5-573G is MS Precision touchpad which should use hid multitouch driver. ELAN0501 should not be added in elan_i2c." Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202503 Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index f2bf8fa1ab04..368871a398a5 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1251,7 +1251,6 @@ MODULE_DEVICE_TABLE(i2c, elan_id); static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0000", 0 }, { "ELAN0100", 0 }, - { "ELAN0501", 0 }, { "ELAN0600", 0 }, { "ELAN0602", 0 }, { "ELAN0605", 0 }, From 4477381f772685afb33b4b81be23520be48ad0f0 Mon Sep 17 00:00:00 2001 From: Mauro Ciancio Date: Mon, 14 Jan 2019 10:24:53 -0300 Subject: [PATCH 1933/2608] Input: elan_i2c - add ACPI ID for touchpad in Lenovo V330-15ISK commit 7ad222b3aed350adfc27ee7eec4587ffe55dfdce upstream. This adds ELAN0617 to the ACPI table to support Elan touchpad found in Lenovo V330-15ISK. Signed-off-by: Mauro Ciancio Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 368871a398a5..fce70f4ef004 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1261,6 +1261,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN060C", 0 }, { "ELAN0611", 0 }, { "ELAN0612", 0 }, + { "ELAN0617", 0 }, { "ELAN0618", 0 }, { "ELAN061C", 0 }, { "ELAN061D", 0 }, From 23822b55552fcdd3e7c476c53275c5b74c33226b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 13 Feb 2019 07:57:02 +0100 Subject: [PATCH 1934/2608] perf/core: Fix impossible ring-buffer sizes warning commit 528871b456026e6127d95b1b2bd8e3a003dc1614 upstream. The following commit: 9dff0aa95a32 ("perf/core: Don't WARN() for impossible ring-buffer sizes") results in perf recording failures with larger mmap areas: root@skl:/tmp# perf record -g -a failed to mmap with 12 (Cannot allocate memory) The root cause is that the following condition is buggy: if (order_base_2(size) >= MAX_ORDER) goto fail; The problem is that @size is in bytes and MAX_ORDER is in pages, so the right test is: if (order_base_2(size) >= PAGE_SHIFT+MAX_ORDER) goto fail; Fix it. Reported-by: "Jin, Yao" Bisected-by: Borislav Petkov Analyzed-by: Peter Zijlstra Cc: Julien Thierry Cc: Mark Rutland Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Fixes: 9dff0aa95a32 ("perf/core: Don't WARN() for impossible ring-buffer sizes") Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 8b311340b241..489dc6b60053 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -719,7 +719,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) size = sizeof(struct ring_buffer); size += nr_pages * sizeof(void *); - if (order_base_2(size) >= MAX_ORDER) + if (order_base_2(size) >= PAGE_SHIFT+MAX_ORDER) goto fail; rb = kzalloc(size, GFP_KERNEL); From b1b0ca701bd69e834492bf3e74e40f4c28432f03 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 4 Feb 2019 13:35:32 +0100 Subject: [PATCH 1935/2608] perf/x86: Add check_period PMU callback commit 81ec3f3c4c4d78f2d3b6689c9816bfbdf7417dbb upstream. Vince (and later on Ravi) reported crashes in the BTS code during fuzzing with the following backtrace: general protection fault: 0000 [#1] SMP PTI ... RIP: 0010:perf_prepare_sample+0x8f/0x510 ... Call Trace: ? intel_pmu_drain_bts_buffer+0x194/0x230 intel_pmu_drain_bts_buffer+0x160/0x230 ? tick_nohz_irq_exit+0x31/0x40 ? smp_call_function_single_interrupt+0x48/0xe0 ? call_function_single_interrupt+0xf/0x20 ? call_function_single_interrupt+0xa/0x20 ? x86_schedule_events+0x1a0/0x2f0 ? x86_pmu_commit_txn+0xb4/0x100 ? find_busiest_group+0x47/0x5d0 ? perf_event_set_state.part.42+0x12/0x50 ? perf_mux_hrtimer_restart+0x40/0xb0 intel_pmu_disable_event+0xae/0x100 ? intel_pmu_disable_event+0xae/0x100 x86_pmu_stop+0x7a/0xb0 x86_pmu_del+0x57/0x120 event_sched_out.isra.101+0x83/0x180 group_sched_out.part.103+0x57/0xe0 ctx_sched_out+0x188/0x240 ctx_resched+0xa8/0xd0 __perf_event_enable+0x193/0x1e0 event_function+0x8e/0xc0 remote_function+0x41/0x50 flush_smp_call_function_queue+0x68/0x100 generic_smp_call_function_single_interrupt+0x13/0x30 smp_call_function_single_interrupt+0x3e/0xe0 call_function_single_interrupt+0xf/0x20 The reason is that while event init code does several checks for BTS events and prevents several unwanted config bits for BTS event (like precise_ip), the PERF_EVENT_IOC_PERIOD allows to create BTS event without those checks being done. Following sequence will cause the crash: If we create an 'almost' BTS event with precise_ip and callchains, and it into a BTS event it will crash the perf_prepare_sample() function because precise_ip events are expected to come in with callchain data initialized, but that's not the case for intel_pmu_drain_bts_buffer() caller. Adding a check_period callback to be called before the period is changed via PERF_EVENT_IOC_PERIOD. It will deny the change if the event would become BTS. Plus adding also the limit_period check as well. Reported-by: Vince Weaver Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Naveen N. Rao Cc: Ravi Bangoria Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20190204123532.GA4794@krava Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/core.c | 14 ++++++++++++++ arch/x86/events/intel/core.c | 9 +++++++++ arch/x86/events/perf_event.h | 16 ++++++++++++++-- include/linux/perf_event.h | 5 +++++ kernel/events/core.c | 16 ++++++++++++++++ 5 files changed, 58 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 7d12b0d1f359..e14a39598e8a 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2250,6 +2250,19 @@ void perf_check_microcode(void) x86_pmu.check_microcode(); } +static int x86_pmu_check_period(struct perf_event *event, u64 value) +{ + if (x86_pmu.check_period && x86_pmu.check_period(event, value)) + return -EINVAL; + + if (value && x86_pmu.limit_period) { + if (x86_pmu.limit_period(event, value) > value) + return -EINVAL; + } + + return 0; +} + static struct pmu pmu = { .pmu_enable = x86_pmu_enable, .pmu_disable = x86_pmu_disable, @@ -2274,6 +2287,7 @@ static struct pmu pmu = { .event_idx = x86_pmu_event_idx, .sched_task = x86_pmu_sched_task, .task_ctx_size = sizeof(struct x86_perf_task_context), + .check_period = x86_pmu_check_period, }; void arch_perf_update_userpage(struct perf_event *event, diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 1cb5ff3ee728..9f556c94a0b8 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3445,6 +3445,11 @@ static void intel_pmu_sched_task(struct perf_event_context *ctx, intel_pmu_lbr_sched_task(ctx, sched_in); } +static int intel_pmu_check_period(struct perf_event *event, u64 value) +{ + return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0; +} + PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); PMU_FORMAT_ATTR(ldlat, "config1:0-15"); @@ -3525,6 +3530,8 @@ static __initconst const struct x86_pmu core_pmu = { .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, .cpu_dead = intel_pmu_cpu_dead, + + .check_period = intel_pmu_check_period, }; static struct attribute *intel_pmu_attrs[]; @@ -3568,6 +3575,8 @@ static __initconst const struct x86_pmu intel_pmu = { .guest_get_msrs = intel_guest_get_msrs, .sched_task = intel_pmu_sched_task, + + .check_period = intel_pmu_check_period, }; static __init void intel_clovertown_quirk(void) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 3c51fcaf1e34..fbbc10338987 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -639,6 +639,11 @@ struct x86_pmu { * Intel host/guest support (KVM) */ struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); + + /* + * Check period value for PERF_EVENT_IOC_PERIOD ioctl. + */ + int (*check_period) (struct perf_event *event, u64 period); }; struct x86_perf_task_context { @@ -848,7 +853,7 @@ static inline int amd_pmu_init(void) #ifdef CONFIG_CPU_SUP_INTEL -static inline bool intel_pmu_has_bts(struct perf_event *event) +static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period) { struct hw_perf_event *hwc = &event->hw; unsigned int hw_event, bts_event; @@ -859,7 +864,14 @@ static inline bool intel_pmu_has_bts(struct perf_event *event) hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - return hw_event == bts_event && hwc->sample_period == 1; + return hw_event == bts_event && period == 1; +} + +static inline bool intel_pmu_has_bts(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + return intel_pmu_has_bts_period(event, hwc->sample_period); } int intel_pmu_save_and_restart(struct perf_event *event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8e22f24ded6a..956d76744c91 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -446,6 +446,11 @@ struct pmu { * Filter events for PMU-specific reasons. */ int (*filter_match) (struct perf_event *event); /* optional */ + + /* + * Check period value for PERF_EVENT_IOC_PERIOD ioctl. + */ + int (*check_period) (struct perf_event *event, u64 value); /* optional */ }; /** diff --git a/kernel/events/core.c b/kernel/events/core.c index 991af683ef9e..17d5d41464c6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4738,6 +4738,11 @@ static void __perf_event_period(struct perf_event *event, } } +static int perf_event_check_period(struct perf_event *event, u64 value) +{ + return event->pmu->check_period(event, value); +} + static int perf_event_period(struct perf_event *event, u64 __user *arg) { u64 value; @@ -4754,6 +4759,9 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) if (event->attr.freq && value > sysctl_perf_event_sample_rate) return -EINVAL; + if (perf_event_check_period(event, value)) + return -EINVAL; + event_function_call(event, __perf_event_period, &value); return 0; @@ -8951,6 +8959,11 @@ static int perf_pmu_nop_int(struct pmu *pmu) return 0; } +static int perf_event_nop_int(struct perf_event *event, u64 value) +{ + return 0; +} + static DEFINE_PER_CPU(unsigned int, nop_txn_flags); static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags) @@ -9251,6 +9264,9 @@ got_cpu_context: pmu->pmu_disable = perf_pmu_nop_void; } + if (!pmu->check_period) + pmu->check_period = perf_event_nop_int; + if (!pmu->event_idx) pmu->event_idx = perf_event_idx_default; From 3ca538b799b31b9e51878c8375ba25017d7a8faa Mon Sep 17 00:00:00 2001 From: Jurica Vukadin Date: Thu, 7 Feb 2019 16:29:37 +0100 Subject: [PATCH 1936/2608] ALSA: hda - Add quirk for HP EliteBook 840 G5 commit 4cd3016ce996494f78fdfd87ea35c8ca5d0b413e upstream. This enables mute LED support and fixes switching jacks when the laptop is docked. Signed-off-by: Jurica Vukadin Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index fb1cec46380d..d14516f31679 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -962,6 +962,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x807C, "HP EliteBook 820 G3", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x80FD, "HP ProBook 640 G2", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK), + SND_PCI_QUIRK(0x103c, 0x83b2, "HP EliteBook 840 G5", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x83b3, "HP EliteBook 830 G5", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x83d3, "HP ProBook 640 G4", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE), From 240f1d1a1c8183c07a12ab322e56f16678efc679 Mon Sep 17 00:00:00 2001 From: Manuel Reinhardt Date: Thu, 31 Jan 2019 15:32:35 +0100 Subject: [PATCH 1937/2608] ALSA: usb-audio: Fix implicit fb endpoint setup by quirk commit 2bc16b9f3223d049b57202ee702fcb5b9b507019 upstream. The commit a60945fd08e4 ("ALSA: usb-audio: move implicit fb quirks to separate function") introduced an error in the handling of quirks for implicit feedback endpoints. This commit fixes this. If a quirk successfully sets up an implicit feedback endpoint, usb-audio no longer tries to find the implicit fb endpoint itself. Fixes: a60945fd08e4 ("ALSA: usb-audio: move implicit fb quirks to separate function") Signed-off-by: Manuel Reinhardt Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/pcm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index d8a46d46bcd2..b1a1eb1f65aa 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -313,6 +313,9 @@ static int search_roland_implicit_fb(struct usb_device *dev, int ifnum, return 0; } +/* Setup an implicit feedback endpoint from a quirk. Returns 0 if no quirk + * applies. Returns 1 if a quirk was found. + */ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, struct usb_device *dev, struct usb_interface_descriptor *altsd, @@ -391,7 +394,7 @@ add_sync_ep: subs->data_endpoint->sync_master = subs->sync_endpoint; - return 0; + return 1; } static int set_sync_endpoint(struct snd_usb_substream *subs, @@ -430,6 +433,10 @@ static int set_sync_endpoint(struct snd_usb_substream *subs, if (err < 0) return err; + /* endpoint set by quirk */ + if (err > 0) + return 0; + if (altsd->bNumEndpoints < 2) return 0; From 45d51c117ff36cea60b1ab827f77b6af1afe525d Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 14 Feb 2019 12:08:58 +0800 Subject: [PATCH 1938/2608] kvm: vmx: Fix entry number check for add_atomic_switch_msr() commit 98ae70cc476e833332a2c6bb72f941a25f0de226 upstream. Commit ca83b4a7f2d068da79a0 ("x86/KVM/VMX: Add find_msr() helper function") introduces the helper function find_msr(), which returns -ENOENT when not find the msr in vmx->msr_autoload.guest/host. Correct checking contion of no more available entry in vmx->msr_autoload. Fixes: ca83b4a7f2d0 ("x86/KVM/VMX: Add find_msr() helper function") Cc: stable@vger.kernel.org Signed-off-by: Xiaoyao Li Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1f5de4314291..8e5a977bf50e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2230,7 +2230,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, if (!entry_only) j = find_msr(&m->host, msr); - if (i == NR_AUTOLOAD_MSRS || j == NR_AUTOLOAD_MSRS) { + if ((i < 0 && m->guest.nr == NR_AUTOLOAD_MSRS) || + (j < 0 && m->host.nr == NR_AUTOLOAD_MSRS)) { printk_once(KERN_WARNING "Not enough msr switch entries. " "Can't add msr %x\n", msr); return; From bde50d13e4b1a4327da633d8765e4cc1f60513a2 Mon Sep 17 00:00:00 2001 From: Jonathan Bakker Date: Wed, 6 Feb 2019 10:45:37 -0800 Subject: [PATCH 1939/2608] Input: bma150 - register input device after setting private data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 90cc55f067f6ca0e64e5e52883ece47d8af7b67b upstream. Otherwise we introduce a race condition where userspace can request input before we're ready leading to null pointer dereference such as input: bma150 as /devices/platform/i2c-gpio-2/i2c-5/5-0038/input/input3 Unable to handle kernel NULL pointer dereference at virtual address 00000018 pgd = (ptrval) [00000018] *pgd=55dac831, *pte=00000000, *ppte=00000000 Internal error: Oops: 17 [#1] PREEMPT ARM Modules linked in: bma150 input_polldev [last unloaded: bma150] CPU: 0 PID: 2870 Comm: accelerometer Not tainted 5.0.0-rc3-dirty #46 Hardware name: Samsung S5PC110/S5PV210-based board PC is at input_event+0x8/0x60 LR is at bma150_report_xyz+0x9c/0xe0 [bma150] pc : [<80450f70>] lr : [<7f0a614c>] psr: 800d0013 sp : a4c1fd78 ip : 00000081 fp : 00020000 r10: 00000000 r9 : a5e2944c r8 : a7455000 r7 : 00000016 r6 : 00000101 r5 : a7617940 r4 : 80909048 r3 : fffffff2 r2 : 00000000 r1 : 00000003 r0 : 00000000 Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 54e34019 DAC: 00000051 Process accelerometer (pid: 2870, stack limit = 0x(ptrval)) Stackck: (0xa4c1fd78 to 0xa4c20000) fd60: fffffff3 fc813f6c fd80: 40410581 d7530ce3 a5e2817c a7617f00 a5e29404 a5e2817c 00000000 7f008324 fda0: a5e28000 8044f59c a5fdd9d0 a5e2945c a46a4a00 a5e29668 a7455000 80454f10 fdc0: 80909048 a5e29668 a5fdd9d0 a46a4a00 806316d0 00000000 a46a4a00 801df5f0 fde0: 00000000 d7530ce3 a4c1fec0 a46a4a00 00000000 a5fdd9d0 a46a4a08 801df53c fe00: 00000000 801d74bc a4c1fec0 00000000 a4c1ff70 00000000 a7038da8 00000000 fe20: a46a4a00 801e91fc a411bbe0 801f2e88 00000004 00000000 80909048 00000041 fe40: 00000000 00020000 00000000 dead4ead a6a88da0 00000000 ffffe000 806fcae8 fe60: a4c1fec8 00000000 80909048 00000002 a5fdd9d0 a7660110 a411bab0 00000001 fe80: dead4ead ffffffff ffffffff a4c1fe8c a4c1fe8c d7530ce3 20000013 80909048 fea0: 80909048 a4c1ff70 00000001 fffff000 a4c1e000 00000005 00026038 801eabd8 fec0: a7660110 a411bab0 b9394901 00000006 a696201b 76fb3000 00000000 a7039720 fee0: a5fdd9d0 00000101 00000002 00000096 00000000 00000000 00000000 a4c1ff00 ff00: a6b310f4 805cb174 a6b310f4 00000010 00000fe0 00000010 a4c1e000 d7530ce3 ff20: 00000003 a5f41400 a5f41424 00000000 a6962000 00000000 00000003 00000002 ff40: ffffff9c 000a0000 80909048 d7530ce3 a6962000 00000003 80909048 ffffff9c ff60: a6962000 801d890c 00000000 00000000 00020000 a7590000 00000004 00000100 ff80: 00000001 d7530ce3 000288b8 00026320 000288b8 00000005 80101204 a4c1e000 ffa0: 00000005 80101000 000288b8 00026320 000288b8 000a0000 00000000 00000000 ffc0: 000288b8 00026320 000288b8 00000005 7eef3bac 000264e8 00028ad8 00026038 ffe0: 00000005 7eef3300 76f76e91 76f78546 800d0030 000288b8 00000000 00000000 [<80450f70>] (input_event) from [] (0xa5e2817c) Code: e1a08148 eaffffa8 e351001f 812fff1e (e590c018) ---[ end trace 1c691ee85f2ff243 ]--- Signed-off-by: Jonathan Bakker Signed-off-by: Paweł Chmiel Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/bma150.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/input/misc/bma150.c b/drivers/input/misc/bma150.c index 1efcfdf9f8a8..dd9dd4e40827 100644 --- a/drivers/input/misc/bma150.c +++ b/drivers/input/misc/bma150.c @@ -481,13 +481,14 @@ static int bma150_register_input_device(struct bma150_data *bma150) idev->close = bma150_irq_close; input_set_drvdata(idev, bma150); + bma150->input = idev; + error = input_register_device(idev); if (error) { input_free_device(idev); return error; } - bma150->input = idev; return 0; } @@ -510,15 +511,15 @@ static int bma150_register_polled_device(struct bma150_data *bma150) bma150_init_input_device(bma150, ipoll_dev->input); + bma150->input_polled = ipoll_dev; + bma150->input = ipoll_dev->input; + error = input_register_polled_device(ipoll_dev); if (error) { input_free_polled_device(ipoll_dev); return error; } - bma150->input_polled = ipoll_dev; - bma150->input = ipoll_dev->input; - return 0; } From e0b6920afa874e73937e7d1accbdd1b5d4a9bcf9 Mon Sep 17 00:00:00 2001 From: Matti Kurkela Date: Thu, 7 Feb 2019 23:49:23 -0800 Subject: [PATCH 1940/2608] Input: elantech - enable 3rd button support on Fujitsu CELSIUS H780 commit e8b22d0a329f0fb5c7ef95406872d268f01ee3b1 upstream. Like Fujitsu CELSIUS H760, the H780 also has a three-button Elantech touchpad, but the driver needs to be told so to enable the middle touchpad button. The elantech_dmi_force_crc_enabled quirk was not necessary with the H780. Also document the fw_version and caps values detected for both H760 and H780 models. Signed-off-by: Matti Kurkela Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elantech.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 84c69e962230..fda33fc3ffcc 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1121,6 +1121,8 @@ static int elantech_get_resolution_v4(struct psmouse *psmouse, * Asus UX31 0x361f00 20, 15, 0e clickpad * Asus UX32VD 0x361f02 00, 15, 0e clickpad * Avatar AVIU-145A2 0x361f00 ? clickpad + * Fujitsu CELSIUS H760 0x570f02 40, 14, 0c 3 hw buttons (**) + * Fujitsu CELSIUS H780 0x5d0f02 41, 16, 0d 3 hw buttons (**) * Fujitsu LIFEBOOK E544 0x470f00 d0, 12, 09 2 hw buttons * Fujitsu LIFEBOOK E546 0x470f00 50, 12, 09 2 hw buttons * Fujitsu LIFEBOOK E547 0x470f00 50, 12, 09 2 hw buttons @@ -1173,6 +1175,13 @@ static const struct dmi_system_id elantech_dmi_has_middle_button[] = { DMI_MATCH(DMI_PRODUCT_NAME, "CELSIUS H760"), }, }, + { + /* Fujitsu H780 also has a middle button */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "CELSIUS H780"), + }, + }, #endif { } }; From 716926f4096552ddf849b2c71815e0fbf88289f7 Mon Sep 17 00:00:00 2001 From: Sandeep Patil Date: Tue, 12 Feb 2019 15:36:11 -0800 Subject: [PATCH 1941/2608] mm: proc: smaps_rollup: fix pss_locked calculation commit 27dd768ed8db48beefc4d9e006c58e7a00342bde upstream. The 'pss_locked' field of smaps_rollup was being calculated incorrectly. It accumulated the current pss everytime a locked VMA was found. Fix that by adding to 'pss_locked' the same time as that of 'pss' if the vma being walked is locked. Link: http://lkml.kernel.org/r/20190203065425.14650-1-sspatil@android.com Fixes: 493b0e9d945f ("mm: add /proc/pid/smaps_rollup") Signed-off-by: Sandeep Patil Acked-by: Vlastimil Babka Reviewed-by: Joel Fernandes (Google) Cc: Alexey Dobriyan Cc: Daniel Colascione Cc: [4.14.x, 4.19.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/task_mmu.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 2b47757c9c68..5e63c459dc61 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -459,7 +459,7 @@ struct mem_size_stats { }; static void smaps_account(struct mem_size_stats *mss, struct page *page, - bool compound, bool young, bool dirty) + bool compound, bool young, bool dirty, bool locked) { int i, nr = compound ? 1 << compound_order(page) : 1; unsigned long size = nr * PAGE_SIZE; @@ -486,24 +486,31 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, else mss->private_clean += size; mss->pss += (u64)size << PSS_SHIFT; + if (locked) + mss->pss_locked += (u64)size << PSS_SHIFT; return; } for (i = 0; i < nr; i++, page++) { int mapcount = page_mapcount(page); + unsigned long pss = (PAGE_SIZE << PSS_SHIFT); if (mapcount >= 2) { if (dirty || PageDirty(page)) mss->shared_dirty += PAGE_SIZE; else mss->shared_clean += PAGE_SIZE; - mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; + mss->pss += pss / mapcount; + if (locked) + mss->pss_locked += pss / mapcount; } else { if (dirty || PageDirty(page)) mss->private_dirty += PAGE_SIZE; else mss->private_clean += PAGE_SIZE; - mss->pss += PAGE_SIZE << PSS_SHIFT; + mss->pss += pss; + if (locked) + mss->pss_locked += pss; } } } @@ -526,6 +533,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; + bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; if (pte_present(*pte)) { @@ -568,7 +576,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, if (!page) return; - smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte)); + smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -577,6 +585,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; + bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page; /* FOLL_DUMP will return -EFAULT on huge zero page */ @@ -591,7 +600,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, /* pass */; else VM_BUG_ON_PAGE(1, page); - smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd)); + smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked); } #else static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, @@ -792,11 +801,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) } } #endif - /* mmap_sem is held in m_start */ walk_page_vma(vma, &smaps_walk); - if (vma->vm_flags & VM_LOCKED) - mss->pss_locked += mss->pss; if (!rollup_mode) { show_map_vma(m, vma, is_pid); From 169cede8169d99d2a7f602629a39fe079ea84e6f Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Mon, 31 Dec 2018 11:53:55 +0000 Subject: [PATCH 1942/2608] alpha: fix page fault handling for r16-r18 targets commit 491af60ffb848b59e82f7c9145833222e0bf27a5 upstream. Fix page fault handling code to fixup r16-r18 registers. Before the patch code had off-by-two registers bug. This bug caused overwriting of ps,pc,gp registers instead of fixing intended r16,r17,r18 (see `struct pt_regs`). More details: Initially Dmitry noticed a kernel bug as a failure on strace test suite. Test passes unmapped userspace pointer to io_submit: ```c #include #include #include #include int main(void) { unsigned long ctx = 0; if (syscall(__NR_io_setup, 1, &ctx)) err(1, "io_setup"); const size_t page_size = sysconf(_SC_PAGESIZE); const size_t size = page_size * 2; void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (MAP_FAILED == ptr) err(1, "mmap(%zu)", size); if (munmap(ptr, size)) err(1, "munmap"); syscall(__NR_io_submit, ctx, 1, ptr + page_size); syscall(__NR_io_destroy, ctx); return 0; } ``` Running this test causes kernel to crash when handling page fault: ``` Unable to handle kernel paging request at virtual address ffffffffffff9468 CPU 3 aio(26027): Oops 0 pc = [] ra = [] ps = 0000 Not tainted pc is at sys_io_submit+0x108/0x200 ra is at sys_io_submit+0x6c/0x200 v0 = fffffc00c58e6300 t0 = fffffffffffffff2 t1 = 000002000025e000 t2 = fffffc01f159fef8 t3 = fffffc0001009640 t4 = fffffc0000e0f6e0 t5 = 0000020001002e9e t6 = 4c41564e49452031 t7 = fffffc01f159c000 s0 = 0000000000000002 s1 = 000002000025e000 s2 = 0000000000000000 s3 = 0000000000000000 s4 = 0000000000000000 s5 = fffffffffffffff2 s6 = fffffc00c58e6300 a0 = fffffc00c58e6300 a1 = 0000000000000000 a2 = 000002000025e000 a3 = 00000200001ac260 a4 = 00000200001ac1e8 a5 = 0000000000000001 t8 = 0000000000000008 t9 = 000000011f8bce30 t10= 00000200001ac440 t11= 0000000000000000 pv = fffffc00006fd320 at = 0000000000000000 gp = 0000000000000000 sp = 00000000265fd174 Disabling lock debugging due to kernel taint Trace: [] entSys+0xa4/0xc0 ``` Here `gp` has invalid value. `gp is s overwritten by a fixup for the following page fault handler in `io_submit` syscall handler: ``` __se_sys_io_submit ... ldq a1,0(t1) bne t0,4280 <__se_sys_io_submit+0x180> ``` After a page fault `t0` should contain -EFALUT and `a1` is 0. Instead `gp` was overwritten in place of `a1`. This happens due to a off-by-two bug in `dpf_reg()` for `r16-r18` (aka `a0-a2`). I think the bug went unnoticed for a long time as `gp` is one of scratch registers. Any kernel function call would re-calculate `gp`. Dmitry tracked down the bug origin back to 2.1.32 kernel version where trap_a{0,1,2} fields were inserted into struct pt_regs. And even before that `dpf_reg()` contained off-by-one error. Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: linux-alpha@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reported-and-reviewed-by: "Dmitry V. Levin" Cc: stable@vger.kernel.org # v2.1.32+ Bug: https://bugs.gentoo.org/672040 Signed-off-by: Sergei Trofimovich Signed-off-by: Matt Turner Signed-off-by: Greg Kroah-Hartman --- arch/alpha/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index cd3c572ee912..e9392302c5da 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -78,7 +78,7 @@ __load_new_mm_context(struct mm_struct *next_mm) /* Macro for exception fixup code to access integer registers. */ #define dpf_reg(r) \ (((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 : \ - (r) <= 18 ? (r)+8 : (r)-10]) + (r) <= 18 ? (r)+10 : (r)-10]) asmlinkage void do_page_fault(unsigned long address, unsigned long mmcsr, From 94f42cc1ed4ae1456c5150b318dd0f78c203fb40 Mon Sep 17 00:00:00 2001 From: Meelis Roos Date: Fri, 12 Oct 2018 12:27:51 +0300 Subject: [PATCH 1943/2608] alpha: Fix Eiger NR_IRQS to 128 commit bfc913682464f45bc4d6044084e370f9048de9d5 upstream. Eiger machine vector definition has nr_irqs 128, and working 2.6.26 boot shows SCSI getting IRQ-s 64 and 65. Current kernel boot fails because Symbios SCSI fails to request IRQ-s and does not find the disks. It has been broken at least since 3.18 - the earliest I could test with my gcc-5. The headers have moved around and possibly another order of defines has worked in the past - but since 128 seems to be correct and used, fix arch/alpha/include/asm/irq.h to have NR_IRQS=128 for Eiger. This fixes 4.19-rc7 boot on my Force Flexor A264 (Eiger subarch). Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Meelis Roos Signed-off-by: Matt Turner Signed-off-by: Greg Kroah-Hartman --- arch/alpha/include/asm/irq.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/alpha/include/asm/irq.h b/arch/alpha/include/asm/irq.h index 4d17cacd1462..432402c8e47f 100644 --- a/arch/alpha/include/asm/irq.h +++ b/arch/alpha/include/asm/irq.h @@ -56,15 +56,15 @@ #elif defined(CONFIG_ALPHA_DP264) || \ defined(CONFIG_ALPHA_LYNX) || \ - defined(CONFIG_ALPHA_SHARK) || \ - defined(CONFIG_ALPHA_EIGER) + defined(CONFIG_ALPHA_SHARK) # define NR_IRQS 64 #elif defined(CONFIG_ALPHA_TITAN) #define NR_IRQS 80 #elif defined(CONFIG_ALPHA_RAWHIDE) || \ - defined(CONFIG_ALPHA_TAKARA) + defined(CONFIG_ALPHA_TAKARA) || \ + defined(CONFIG_ALPHA_EIGER) # define NR_IRQS 128 #elif defined(CONFIG_ALPHA_WILDFIRE) From 092fc7558dee1c818c8429093c09b02a7029c65b Mon Sep 17 00:00:00 2001 From: Andreas Ziegler Date: Wed, 16 Jan 2019 15:16:29 +0100 Subject: [PATCH 1944/2608] tracing/uprobes: Fix output for multiple string arguments commit 0722069a5374b904ec1a67f91249f90e1cfae259 upstream. When printing multiple uprobe arguments as strings the output for the earlier arguments would also include all later string arguments. This is best explained in an example: Consider adding a uprobe to a function receiving two strings as parameters which is at offset 0xa0 in strlib.so and we want to print both parameters when the uprobe is hit (on x86_64): $ echo 'p:func /lib/strlib.so:0xa0 +0(%di):string +0(%si):string' > \ /sys/kernel/debug/tracing/uprobe_events When the function is called as func("foo", "bar") and we hit the probe, the trace file shows a line like the following: [...] func: (0x7f7e683706a0) arg1="foobar" arg2="bar" Note the extra "bar" printed as part of arg1. This behaviour stacks up for additional string arguments. The strings are stored in a dynamically growing part of the uprobe buffer by fetch_store_string() after copying them from userspace via strncpy_from_user(). The return value of strncpy_from_user() is then directly used as the required size for the string. However, this does not take the terminating null byte into account as the documentation for strncpy_from_user() cleary states that it "[...] returns the length of the string (not including the trailing NUL)" even though the null byte will be copied to the destination. Therefore, subsequent calls to fetch_store_string() will overwrite the terminating null byte of the most recently fetched string with the first character of the current string, leading to the "accumulation" of strings in earlier arguments in the output. Fix this by incrementing the return value of strncpy_from_user() by one if we did not hit the maximum buffer size. Link: http://lkml.kernel.org/r/20190116141629.5752-1-andreas.ziegler@fau.de Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 5baaa59ef09e ("tracing/probes: Implement 'memory' fetch method for uprobes") Acked-by: Masami Hiramatsu Signed-off-by: Andreas Ziegler Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_uprobe.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 86718c85d8d3..fdf2ea4d64ec 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -153,7 +153,14 @@ static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, ret = strncpy_from_user(dst, src, maxlen); if (ret == maxlen) - dst[--ret] = '\0'; + dst[ret - 1] = '\0'; + else if (ret >= 0) + /* + * Include the terminating null byte. In this case it + * was copied by strncpy_from_user but not accounted + * for in ret. + */ + ret++; if (ret < 0) { /* Failed to fetch string */ ((u8 *)get_rloc_data(dest))[0] = '\0'; From c87d8ef13b04e97483f2698a339552f6a692bf84 Mon Sep 17 00:00:00 2001 From: Hedi Berriche Date: Wed, 13 Feb 2019 19:34:13 +0000 Subject: [PATCH 1945/2608] x86/platform/UV: Use efi_runtime_lock to serialise BIOS calls commit f331e766c4be33f4338574f3c9f7f77e98ab4571 upstream. Calls into UV firmware must be protected against concurrency, expose the efi_runtime_lock to the UV platform, and use it to serialise UV BIOS calls. Signed-off-by: Hedi Berriche Signed-off-by: Borislav Petkov Reviewed-by: Ard Biesheuvel Reviewed-by: Russ Anderson Reviewed-by: Dimitri Sivanich Reviewed-by: Mike Travis Cc: Andy Shevchenko Cc: Bhupesh Sharma Cc: Darren Hart Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: linux-efi Cc: platform-driver-x86@vger.kernel.org Cc: stable@vger.kernel.org # v4.9+ Cc: Steve Wahl Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190213193413.25560-5-hedi.berriche@hpe.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/uv/bios.h | 8 +++++++- arch/x86/platform/uv/bios_uv.c | 23 +++++++++++++++++++++-- drivers/firmware/efi/runtime-wrappers.c | 7 +++++++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index e652a7cc6186..3f697a9e3f59 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -48,7 +48,8 @@ enum { BIOS_STATUS_SUCCESS = 0, BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, BIOS_STATUS_EINVAL = -EINVAL, - BIOS_STATUS_UNAVAIL = -EBUSY + BIOS_STATUS_UNAVAIL = -EBUSY, + BIOS_STATUS_ABORT = -EINTR, }; /* Address map parameters */ @@ -167,4 +168,9 @@ extern long system_serial_number; extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ +/* + * EFI runtime lock; cf. firmware/efi/runtime-wrappers.c for details + */ +extern struct semaphore __efi_uv_runtime_lock; + #endif /* _ASM_X86_UV_BIOS_H */ diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 4a6a5a26c582..eb33432f2f24 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -29,7 +29,8 @@ struct uv_systab *uv_systab; -s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) +static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, + u64 a4, u64 a5) { struct uv_systab *tab = uv_systab; s64 ret; @@ -51,6 +52,19 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) return ret; } + +s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) +{ + s64 ret; + + if (down_interruptible(&__efi_uv_runtime_lock)) + return BIOS_STATUS_ABORT; + + ret = __uv_bios_call(which, a1, a2, a3, a4, a5); + up(&__efi_uv_runtime_lock); + + return ret; +} EXPORT_SYMBOL_GPL(uv_bios_call); s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, @@ -59,10 +73,15 @@ s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, unsigned long bios_flags; s64 ret; + if (down_interruptible(&__efi_uv_runtime_lock)) + return BIOS_STATUS_ABORT; + local_irq_save(bios_flags); - ret = uv_bios_call(which, a1, a2, a3, a4, a5); + ret = __uv_bios_call(which, a1, a2, a3, a4, a5); local_irq_restore(bios_flags); + up(&__efi_uv_runtime_lock); + return ret; } diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index ae54870b2788..dd7f63354ca0 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -49,6 +49,13 @@ void efi_call_virt_check_flags(unsigned long flags, const char *call) local_irq_restore(flags); } +/* + * Expose the EFI runtime lock to the UV platform + */ +#ifdef CONFIG_X86_UV +extern struct semaphore __efi_uv_runtime_lock __alias(efi_runtime_lock); +#endif + /* * According to section 7.1 of the UEFI spec, Runtime Services are not fully * reentrant, and there are particular combinations of calls that need to be From 910e3b31548298c8762e0c635f192e3d76fef9ee Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 11 Feb 2019 23:27:42 -0600 Subject: [PATCH 1946/2608] signal: Restore the stop PTRACE_EVENT_EXIT commit cf43a757fd49442bc38f76088b70c2299eed2c2f upstream. In the middle of do_exit() there is there is a call "ptrace_event(PTRACE_EVENT_EXIT, code);" That call places the process in TACKED_TRACED aka "(TASK_WAKEKILL | __TASK_TRACED)" and waits for for the debugger to release the task or SIGKILL to be delivered. Skipping past dequeue_signal when we know a fatal signal has already been delivered resulted in SIGKILL remaining pending and TIF_SIGPENDING remaining set. This in turn caused the scheduler to not sleep in PTACE_EVENT_EXIT as it figured a fatal signal was pending. This also caused ptrace_freeze_traced in ptrace_check_attach to fail because it left a per thread SIGKILL pending which is what fatal_signal_pending tests for. This difference in signal state caused strace to report strace: Exit of unknown pid NNNNN ignored Therefore update the signal handling state like dequeue_signal would when removing a per thread SIGKILL, by removing SIGKILL from the per thread signal mask and clearing TIF_SIGPENDING. Acked-by: Oleg Nesterov Reported-by: Oleg Nesterov Reported-by: Ivan Delalande Cc: stable@vger.kernel.org Fixes: 35634ffa1751 ("signal: Always notice exiting tasks") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- kernel/signal.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 04b3a621b3cc..619c6160f64f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2268,9 +2268,12 @@ relock: } /* Has this task already been marked for death? */ - ksig->info.si_signo = signr = SIGKILL; - if (signal_group_exit(signal)) + if (signal_group_exit(signal)) { + ksig->info.si_signo = signr = SIGKILL; + sigdelset(¤t->pending.signal, SIGKILL); + recalc_sigpending(); goto fatal; + } for (;;) { struct k_sigaction *ka; From f4dc9d3867aeb2f8adaa1457fd32cfbf46607a8e Mon Sep 17 00:00:00 2001 From: Nate Dailey Date: Thu, 7 Feb 2019 14:19:01 -0500 Subject: [PATCH 1947/2608] md/raid1: don't clear bitmap bits on interrupted recovery. commit dfcc34c99f3ebc16b787b118763bf9cb6b1efc7a upstream. sync_request_write no longer submits writes to a Faulty device. This has the unfortunate side effect that bitmap bits can be incorrectly cleared if a recovery is interrupted (previously, end_sync_write would have prevented this). This means the next recovery may not copy everything it should, potentially corrupting data. Add a function for doing the proper md_bitmap_end_sync, called from end_sync_write and the Faulty case in sync_request_write. backport note to 4.14: s/md_bitmap_end_sync/bitmap_end_sync Cc: stable@vger.kernel.org 4.14+ Fixes: 0c9d5b127f69 ("md/raid1: avoid reusing a resync bio after error handling.") Reviewed-by: Jack Wang Tested-by: Jack Wang Signed-off-by: Nate Dailey Signed-off-by: Song Liu Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid1.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 205f86f1a6cb..31c4391f6a62 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1854,6 +1854,20 @@ static void end_sync_read(struct bio *bio) reschedule_retry(r1_bio); } +static void abort_sync_write(struct mddev *mddev, struct r1bio *r1_bio) +{ + sector_t sync_blocks = 0; + sector_t s = r1_bio->sector; + long sectors_to_go = r1_bio->sectors; + + /* make sure these bits don't get cleared. */ + do { + bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1); + s += sync_blocks; + sectors_to_go -= sync_blocks; + } while (sectors_to_go > 0); +} + static void end_sync_write(struct bio *bio) { int uptodate = !bio->bi_status; @@ -1865,16 +1879,7 @@ static void end_sync_write(struct bio *bio) struct md_rdev *rdev = conf->mirrors[find_bio_disk(r1_bio, bio)].rdev; if (!uptodate) { - sector_t sync_blocks = 0; - sector_t s = r1_bio->sector; - long sectors_to_go = r1_bio->sectors; - /* make sure these bits doesn't get cleared. */ - do { - bitmap_end_sync(mddev->bitmap, s, - &sync_blocks, 1); - s += sync_blocks; - sectors_to_go -= sync_blocks; - } while (sectors_to_go > 0); + abort_sync_write(mddev, r1_bio); set_bit(WriteErrorSeen, &rdev->flags); if (!test_and_set_bit(WantReplacement, &rdev->flags)) set_bit(MD_RECOVERY_NEEDED, & @@ -2164,8 +2169,10 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) (i == r1_bio->read_disk || !test_bit(MD_RECOVERY_SYNC, &mddev->recovery)))) continue; - if (test_bit(Faulty, &conf->mirrors[i].rdev->flags)) + if (test_bit(Faulty, &conf->mirrors[i].rdev->flags)) { + abort_sync_write(mddev, r1_bio); continue; + } bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); if (test_bit(FailFast, &conf->mirrors[i].rdev->flags)) From 24975d23bd7b383b42b6201e0c7cf675a2133f5b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 12 Feb 2019 14:28:03 +0100 Subject: [PATCH 1948/2608] x86/a.out: Clear the dump structure initially commit 10970e1b4be9c74fce8ab6e3c34a7d718f063f2c upstream. dump_thread32() in aout_core_dump() does not clear the user32 structure allocated on the stack as the first thing on function entry. As a result, the dump.u_comm, dump.u_ar0 and dump.signal which get assigned before the clearing, get overwritten. Rename that function to fill_dump() to make it clear what it does and call it first thing. This was caught while staring at a patch by Derek Robson . Signed-off-by: Borislav Petkov Cc: Derek Robson Cc: Linus Torvalds Cc: Michael Matz Cc: x86@kernel.org Cc: Link: https://lkml.kernel.org/r/20190202005512.3144-1-robsonde@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/ia32/ia32_aout.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 8e02b30cf08e..3ebd77770f98 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -51,7 +51,7 @@ static unsigned long get_dr(int n) /* * fill in the user structure for a core dump.. */ -static void dump_thread32(struct pt_regs *regs, struct user32 *dump) +static void fill_dump(struct pt_regs *regs, struct user32 *dump) { u32 fs, gs; memset(dump, 0, sizeof(*dump)); @@ -157,10 +157,12 @@ static int aout_core_dump(struct coredump_params *cprm) fs = get_fs(); set_fs(KERNEL_DS); has_dumped = 1; + + fill_dump(cprm->regs, &dump); + strncpy(dump.u_comm, current->comm, sizeof(current->comm)); dump.u_ar0 = offsetof(struct user32, regs); dump.signal = cprm->siginfo->si_signo; - dump_thread32(cprm->regs, &dump); /* * If the size of the dump file exceeds the rlimit, then see From 73fb3894976e34b8d6b667163b37027c59b750ea Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 8 Feb 2019 10:52:07 -0500 Subject: [PATCH 1949/2608] dm crypt: don't overallocate the integrity tag space commit ff0c129d3b5ecb3df7c8f5e2236582bf745b6c5f upstream. bio_sectors() returns the value in the units of 512-byte sectors (no matter what the real sector size of the device). dm-crypt multiplies bio_sectors() by on_disk_tag_size to calculate the space allocated for integrity tags. If dm-crypt is running with sector size larger than 512b, it allocates more data than is needed. Device Mapper trims the extra space when passing the bio to dm-integrity, so this bug didn't result in any visible misbehavior. But it must be fixed to avoid wasteful memory allocation for the block integrity payload. Fixes: ef43aa38063a6 ("dm crypt: add cryptographic data integrity protection (authenticated encryption)") Cc: stable@vger.kernel.org # 4.12+ Reported-by: Milan Broz Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 1f6d8b6be5c7..0d2005e5b24c 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -935,7 +935,7 @@ static int dm_crypt_integrity_io_alloc(struct dm_crypt_io *io, struct bio *bio) if (IS_ERR(bip)) return PTR_ERR(bip); - tag_len = io->cc->on_disk_tag_size * bio_sectors(bio); + tag_len = io->cc->on_disk_tag_size * (bio_sectors(bio) >> io->cc->sector_shift); bip->bip_iter.bi_size = tag_len; bip->bip_iter.bi_sector = io->cc->start + io->sector; From 9998b44f053e936b627afa20bb10ebd5b0119c51 Mon Sep 17 00:00:00 2001 From: Nikos Tsironis Date: Thu, 14 Feb 2019 20:38:47 +0200 Subject: [PATCH 1950/2608] dm thin: fix bug where bio that overwrites thin block ignores FUA commit 4ae280b4ee3463fa57bbe6eede26b97daff8a0f1 upstream. When provisioning a new data block for a virtual block, either because the block was previously unallocated or because we are breaking sharing, if the whole block of data is being overwritten the bio that triggered the provisioning is issued immediately, skipping copying or zeroing of the data block. When this bio completes the new mapping is inserted in to the pool's metadata by process_prepared_mapping(), where the bio completion is signaled to the upper layers. This completion is signaled without first committing the metadata. If the bio in question has the REQ_FUA flag set and the system crashes right after its completion and before the next metadata commit, then the write is lost despite the REQ_FUA flag requiring that I/O completion for this request must only be signaled after the data has been committed to non-volatile storage. Fix this by deferring the completion of overwrite bios, with the REQ_FUA flag set, until after the metadata has been committed. Cc: stable@vger.kernel.org Signed-off-by: Nikos Tsironis Acked-by: Joe Thornber Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin.c | 55 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 40b624d8255d..18d6a8a10d5d 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -257,6 +257,7 @@ struct pool { spinlock_t lock; struct bio_list deferred_flush_bios; + struct bio_list deferred_flush_completions; struct list_head prepared_mappings; struct list_head prepared_discards; struct list_head prepared_discards_pt2; @@ -950,6 +951,39 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) mempool_free(m, m->tc->pool->mapping_pool); } +static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio) +{ + struct pool *pool = tc->pool; + unsigned long flags; + + /* + * If the bio has the REQ_FUA flag set we must commit the metadata + * before signaling its completion. + */ + if (!bio_triggers_commit(tc, bio)) { + bio_endio(bio); + return; + } + + /* + * Complete bio with an error if earlier I/O caused changes to the + * metadata that can't be committed, e.g, due to I/O errors on the + * metadata device. + */ + if (dm_thin_aborted_changes(tc->td)) { + bio_io_error(bio); + return; + } + + /* + * Batch together any bios that trigger commits and then issue a + * single commit for them in process_deferred_bios(). + */ + spin_lock_irqsave(&pool->lock, flags); + bio_list_add(&pool->deferred_flush_completions, bio); + spin_unlock_irqrestore(&pool->lock, flags); +} + static void process_prepared_mapping(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; @@ -982,7 +1016,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) */ if (bio) { inc_remap_and_issue_cell(tc, m->cell, m->data_block); - bio_endio(bio); + complete_overwrite_bio(tc, bio); } else { inc_all_io_entry(tc->pool, m->cell->holder); remap_and_issue(tc, m->cell->holder, m->data_block); @@ -2328,7 +2362,7 @@ static void process_deferred_bios(struct pool *pool) { unsigned long flags; struct bio *bio; - struct bio_list bios; + struct bio_list bios, bio_completions; struct thin_c *tc; tc = get_first_thin(pool); @@ -2339,26 +2373,36 @@ static void process_deferred_bios(struct pool *pool) } /* - * If there are any deferred flush bios, we must commit - * the metadata before issuing them. + * If there are any deferred flush bios, we must commit the metadata + * before issuing them or signaling their completion. */ bio_list_init(&bios); + bio_list_init(&bio_completions); + spin_lock_irqsave(&pool->lock, flags); bio_list_merge(&bios, &pool->deferred_flush_bios); bio_list_init(&pool->deferred_flush_bios); + + bio_list_merge(&bio_completions, &pool->deferred_flush_completions); + bio_list_init(&pool->deferred_flush_completions); spin_unlock_irqrestore(&pool->lock, flags); - if (bio_list_empty(&bios) && + if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) && !(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool))) return; if (commit(pool)) { + bio_list_merge(&bios, &bio_completions); + while ((bio = bio_list_pop(&bios))) bio_io_error(bio); return; } pool->last_commit_jiffies = jiffies; + while ((bio = bio_list_pop(&bio_completions))) + bio_endio(bio); + while ((bio = bio_list_pop(&bios))) generic_make_request(bio); } @@ -2965,6 +3009,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout); spin_lock_init(&pool->lock); bio_list_init(&pool->deferred_flush_bios); + bio_list_init(&pool->deferred_flush_completions); INIT_LIST_HEAD(&pool->prepared_mappings); INIT_LIST_HEAD(&pool->prepared_discards); INIT_LIST_HEAD(&pool->prepared_discards_pt2); From 36c495e92aaab36a4cec639f855c467e7f10dc73 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Thu, 7 Feb 2019 10:54:53 +0200 Subject: [PATCH 1951/2608] drm/i915: Prevent a race during I915_GEM_MMAP ioctl with WC set commit 2e7bd10e05afb866b5fb13eda25095c35d7a27cc upstream. Make sure the underlying VMA in the process address space is the same as it was during vm_mmap to avoid applying WC to wrong VMA. A more long-term solution would be to have vm_mmap_locked variant in linux/mmap.h for when caller wants to hold mmap_sem for an extended duration. v2: - Refactor the compare function Fixes: 1816f9236303 ("drm/i915: Support creation of unbound wc user mappings for objects") Reported-by: Adam Zabrocki Suggested-by: Linus Torvalds Signed-off-by: Joonas Lahtinen Cc: # v4.0+ Cc: Akash Goel Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Adam Zabrocki Reviewed-by: Chris Wilson Reviewed-by: Tvrtko Ursulin #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20190207085454.10598-1-joonas.lahtinen@linux.intel.com (cherry picked from commit 5c4604e757ba9b193b09768d75a7d2105a5b883f) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1f19e6d9a717..5d8a67c65141 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1633,6 +1633,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, return 0; } +static inline bool +__vma_matches(struct vm_area_struct *vma, struct file *filp, + unsigned long addr, unsigned long size) +{ + if (vma->vm_file != filp) + return false; + + return vma->vm_start == addr && (vma->vm_end - vma->vm_start) == size; +} + /** * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address * it is mapped to. @@ -1691,7 +1701,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, return -EINTR; } vma = find_vma(mm, addr); - if (vma) + if (vma && __vma_matches(vma, obj->base.filp, addr, args->size)) vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); else From e1e5fa73e466eb3ecaffb5b6bdc47809fc21ab86 Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Tue, 30 Oct 2018 12:24:33 +0530 Subject: [PATCH 1952/2608] sched, trace: Fix prev_state output in sched_switch tracepoint commit 3054426dc68e5d63aa6a6e9b91ac4ec78e3f3805 upstream. commit 3f5fe9fef5b2 ("sched/debug: Fix task state recording/printout") tried to fix the problem introduced by a previous commit efb40f588b43 ("sched/tracing: Fix trace_sched_switch task-state printing"). However the prev_state output in sched_switch is still broken. task_state_index() uses fls() which considers the LSB as 1. Left shifting 1 by this value gives an incorrect mapping to the task state. Fix this by decrementing the value returned by __get_task_state() before shifting. Link: http://lkml.kernel.org/r/1540882473-1103-1-git-send-email-pkondeti@codeaurora.org Cc: stable@vger.kernel.org Fixes: 3f5fe9fef5b2 ("sched/debug: Fix task state recording/printout") Signed-off-by: Pavankumar Kondeti Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- include/trace/events/sched.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 0812cd5408c9..6e692a52936c 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -107,6 +107,8 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, #ifdef CREATE_TRACE_POINTS static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) { + unsigned int state; + #ifdef CONFIG_SCHED_DEBUG BUG_ON(p != current); #endif /* CONFIG_SCHED_DEBUG */ @@ -118,7 +120,15 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct * if (preempt) return TASK_REPORT_MAX; - return 1 << __get_task_state(p); + /* + * task_state_index() uses fls() and returns a value from 0-8 range. + * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using + * it for left shift operation to get the correct task->state + * mapping. + */ + state = __get_task_state(p); + + return state ? (1 << (state - 1)) : state; } #endif /* CREATE_TRACE_POINTS */ From 16de7dede23b2cf0c6a17b23e3317ef9b0c9ac9c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Dec 2018 14:35:14 +0100 Subject: [PATCH 1953/2608] futex: Cure exit race commit da791a667536bf8322042e38ca85d55a78d3c273 upstream. Stefan reported, that the glibc tst-robustpi4 test case fails occasionally. That case creates the following race between sys_exit() and sys_futex_lock_pi(): CPU0 CPU1 sys_exit() sys_futex() do_exit() futex_lock_pi() exit_signals(tsk) No waiters: tsk->flags |= PF_EXITING; *uaddr == 0x00000PID mm_release(tsk) Set waiter bit exit_robust_list(tsk) { *uaddr = 0x80000PID; Set owner died attach_to_pi_owner() { *uaddr = 0xC0000000; tsk = get_task(PID); } if (!tsk->flags & PF_EXITING) { ... attach(); tsk->flags |= PF_EXITPIDONE; } else { if (!(tsk->flags & PF_EXITPIDONE)) return -EAGAIN; return -ESRCH; <--- FAIL } ESRCH is returned all the way to user space, which triggers the glibc test case assert. Returning ESRCH unconditionally is wrong here because the user space value has been changed by the exiting task to 0xC0000000, i.e. the FUTEX_OWNER_DIED bit is set and the futex PID value has been cleared. This is a valid state and the kernel has to handle it, i.e. taking the futex. Cure it by rereading the user space value when PF_EXITING and PF_EXITPIDONE is set in the task which 'owns' the futex. If the value has changed, let the kernel retry the operation, which includes all regular sanity checks and correctly handles the FUTEX_OWNER_DIED case. If it hasn't changed, then return ESRCH as there is no way to distinguish this case from malfunctioning user space. This happens when the exiting task did not have a robust list, the robust list was corrupted or the user space value in the futex was simply bogus. Reported-by: Stefan Liebler Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Heiko Carstens Cc: Darren Hart Cc: Ingo Molnar Cc: Sasha Levin Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=200467 Link: https://lkml.kernel.org/r/20181210152311.986181245@linutronix.de Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 69 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 6 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index abe04a2bb5b9..29d708d0b3d1 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1166,11 +1166,65 @@ out_error: return ret; } +static int handle_exit_race(u32 __user *uaddr, u32 uval, + struct task_struct *tsk) +{ + u32 uval2; + + /* + * If PF_EXITPIDONE is not yet set, then try again. + */ + if (tsk && !(tsk->flags & PF_EXITPIDONE)) + return -EAGAIN; + + /* + * Reread the user space value to handle the following situation: + * + * CPU0 CPU1 + * + * sys_exit() sys_futex() + * do_exit() futex_lock_pi() + * futex_lock_pi_atomic() + * exit_signals(tsk) No waiters: + * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID + * mm_release(tsk) Set waiter bit + * exit_robust_list(tsk) { *uaddr = 0x80000PID; + * Set owner died attach_to_pi_owner() { + * *uaddr = 0xC0000000; tsk = get_task(PID); + * } if (!tsk->flags & PF_EXITING) { + * ... attach(); + * tsk->flags |= PF_EXITPIDONE; } else { + * if (!(tsk->flags & PF_EXITPIDONE)) + * return -EAGAIN; + * return -ESRCH; <--- FAIL + * } + * + * Returning ESRCH unconditionally is wrong here because the + * user space value has been changed by the exiting task. + * + * The same logic applies to the case where the exiting task is + * already gone. + */ + if (get_futex_value_locked(&uval2, uaddr)) + return -EFAULT; + + /* If the user space value has changed, try again. */ + if (uval2 != uval) + return -EAGAIN; + + /* + * The exiting task did not have a robust list, the robust list was + * corrupted or the user space value in *uaddr is simply bogus. + * Give up and tell user space. + */ + return -ESRCH; +} + /* * Lookup the task for the TID provided from user space and attach to * it after doing proper sanity checks. */ -static int attach_to_pi_owner(u32 uval, union futex_key *key, +static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, struct futex_pi_state **ps) { pid_t pid = uval & FUTEX_TID_MASK; @@ -1180,12 +1234,15 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, /* * We are the first waiter - try to look up the real owner and attach * the new pi_state to it, but bail out when TID = 0 [1] + * + * The !pid check is paranoid. None of the call sites should end up + * with pid == 0, but better safe than sorry. Let the caller retry */ if (!pid) - return -ESRCH; + return -EAGAIN; p = futex_find_get_task(pid); if (!p) - return -ESRCH; + return handle_exit_race(uaddr, uval, NULL); if (unlikely(p->flags & PF_KTHREAD)) { put_task_struct(p); @@ -1205,7 +1262,7 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, * set, we know that the task has finished the * cleanup: */ - int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN; + int ret = handle_exit_race(uaddr, uval, p); raw_spin_unlock_irq(&p->pi_lock); put_task_struct(p); @@ -1262,7 +1319,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval, * We are the first waiter - try to look up the owner based on * @uval and attach to it. */ - return attach_to_pi_owner(uval, key, ps); + return attach_to_pi_owner(uaddr, uval, key, ps); } static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) @@ -1370,7 +1427,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, * attach to the owner. If that fails, no harm done, we only * set the FUTEX_WAITERS bit in the user space variable. */ - return attach_to_pi_owner(uval, key, ps); + return attach_to_pi_owner(uaddr, newval, key, ps); } /** From 38ffd6c9cbde2e6df502605ead9214e314c9eb48 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Mon, 21 May 2018 22:57:37 +0200 Subject: [PATCH 1954/2608] pinctrl: msm: fix gpio-hog related boot issues commit a86caa9ba5d70696ceb35d1d39caa20d8b641387 upstream. Sven Eckelmann reported an issue with the current IPQ4019 pinctrl. Setting up any gpio-hog in the device-tree for his device would "kill the bootup completely": | [ 0.477838] msm_serial 78af000.serial: could not find pctldev for node /soc/pinctrl@1000000/serial_pinmux, deferring probe | [ 0.499828] spi_qup 78b5000.spi: could not find pctldev for node /soc/pinctrl@1000000/spi_0_pinmux, deferring probe | [ 1.298883] requesting hog GPIO enable USB2 power (chip 1000000.pinctrl, offset 58) failed, -517 | [ 1.299609] gpiochip_add_data: GPIOs 0..99 (1000000.pinctrl) failed to register | [ 1.308589] ipq4019-pinctrl 1000000.pinctrl: Failed register gpiochip | [ 1.316586] msm_serial 78af000.serial: could not find pctldev for node /soc/pinctrl@1000000/serial_pinmux, deferring probe | [ 1.322415] spi_qup 78b5000.spi: could not find pctldev for node /soc/pinctrl@1000000/spi_0_pinmux, deferri This was also verified on a RT-AC58U (IPQ4018) which would no longer boot, if a gpio-hog was specified. (Tried forcing the USB LED PIN (GPIO0) to high.). The problem is that Pinctrl+GPIO registration is currently peformed in the following order in pinctrl-msm.c: 1. pinctrl_register() 2. gpiochip_add() 3. gpiochip_add_pin_range() The actual error code -517 == -EPROBE_DEFER is coming from pinctrl_get_device_gpio_range(), which is called through: gpiochip_add of_gpiochip_add of_gpiochip_scan_gpios gpiod_hog gpiochip_request_own_desc __gpiod_request chip->request gpiochip_generic_request pinctrl_gpio_request pinctrl_get_device_gpio_range pinctrl_get_device_gpio_range() is unable to find any valid pin ranges, since nothing has been added to the pinctrldev_list yet. so the range can't be found, and the operation fails with -EPROBE_DEFER. This patch fixes the issue by adding the "gpio-ranges" property to the pinctrl device node of all upstream Qcom SoC. The pin ranges are then added by the gpio core. In order to remain compatible with older, existing DTs (and ACPI) a check for the "gpio-ranges" property has been added to msm_gpio_init(). This prevents the driver of adding the same entry to the pinctrldev_list twice. Reported-by: Sven Eckelmann Tested-by: Sven Eckelmann [ipq4019] Reviewed-by: Bjorn Andersson Signed-off-by: Christian Lamparter Signed-off-by: Linus Walleij Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-msm.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 31632c087504..8f0368330a04 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -839,11 +839,24 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) return ret; } - ret = gpiochip_add_pin_range(&pctrl->chip, dev_name(pctrl->dev), 0, 0, chip->ngpio); - if (ret) { - dev_err(pctrl->dev, "Failed to add pin range\n"); - gpiochip_remove(&pctrl->chip); - return ret; + /* + * For DeviceTree-supported systems, the gpio core checks the + * pinctrl's device node for the "gpio-ranges" property. + * If it is present, it takes care of adding the pin ranges + * for the driver. In this case the driver can skip ahead. + * + * In order to remain compatible with older, existing DeviceTree + * files which don't set the "gpio-ranges" property or systems that + * utilize ACPI the driver has to call gpiochip_add_pin_range(). + */ + if (!of_property_read_bool(pctrl->dev->of_node, "gpio-ranges")) { + ret = gpiochip_add_pin_range(&pctrl->chip, + dev_name(pctrl->dev), 0, 0, chip->ngpio); + if (ret) { + dev_err(pctrl->dev, "Failed to add pin range\n"); + gpiochip_remove(&pctrl->chip); + return ret; + } } ret = gpiochip_irqchip_add(chip, From ae4199db18af1c4a5beee9288b0ba6c15c420922 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Mon, 12 Feb 2018 23:59:51 +0100 Subject: [PATCH 1955/2608] uapi/if_ether.h: move __UAPI_DEF_ETHHDR libc define commit da360299b6734135a5f66d7db458dcc7801c826a upstream. This fixes a compile problem of some user space applications by not including linux/libc-compat.h in uapi/if_ether.h. linux/libc-compat.h checks which "features" the header files, included from the libc, provide to make the Linux kernel uapi header files only provide no conflicting structures and enums. If a user application mixes kernel headers and libc headers it could happen that linux/libc-compat.h gets included too early where not all other libc headers are included yet. Then the linux/libc-compat.h would not prevent all the redefinitions and we run into compile problems. This patch removes the include of linux/libc-compat.h from uapi/if_ether.h to fix the recently introduced case, but not all as this is more or less impossible. It is no problem to do the check directly in the if_ether.h file and not in libc-compat.h as this does not need any fancy glibc header detection as glibc never provided struct ethhdr and should define __UAPI_DEF_ETHHDR by them self when they will provide this. The following test program did not compile correctly any more: #include #include #include int main(void) { return 0; } Fixes: 6926e041a892 ("uapi/if_ether.h: prevent redefinition of struct ethhdr") Reported-by: Guillaume Nault Cc: # 4.15 Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller Cc: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/if_ether.h | 6 +++++- include/uapi/linux/libc-compat.h | 6 ------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 60ec9114e28f..1d1157edcf40 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -23,7 +23,6 @@ #define _UAPI_LINUX_IF_ETHER_H #include -#include /* * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble @@ -151,6 +150,11 @@ * This is an Ethernet frame header. */ +/* allow libcs like musl to deactivate this, glibc does not implement this. */ +#ifndef __UAPI_DEF_ETHHDR +#define __UAPI_DEF_ETHHDR 1 +#endif + #if __UAPI_DEF_ETHHDR struct ethhdr { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index fc29efaa918c..8254c937c9f4 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -264,10 +264,4 @@ #endif /* __GLIBC__ */ -/* Definitions for if_ether.h */ -/* allow libcs like musl to deactivate this, glibc does not implement this. */ -#ifndef __UAPI_DEF_ETHHDR -#define __UAPI_DEF_ETHHDR 1 -#endif - #endif /* _UAPI_LIBC_COMPAT_H */ From 5461ace2e2f7b0ebdeca502c743ea819106ecc52 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 20 Feb 2019 10:20:56 +0100 Subject: [PATCH 1956/2608] Linux 4.14.102 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d5b20b618517..837059a07bb3 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 101 +SUBLEVEL = 102 EXTRAVERSION = NAME = Petit Gorille From 1b18aad11bf87f722efeb491fd8acf0e4a57fb5f Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 11 Feb 2019 13:40:21 -0500 Subject: [PATCH 1957/2608] dsa: mv88e6xxx: Ensure all pending interrupts are handled prior to exit [ Upstream commit 7c0db24cc431e2196d98a5d5ddaa9088e2fcbfe5 ] The GPIO interrupt controller on the espressobin board only supports edge interrupts. If one enables the use of hardware interrupts in the device tree for the 88E6341, it is possible to miss an edge. When this happens, the INTn pin on the Marvell switch is stuck low and no further interrupts occur. I found after adding debug statements to mv88e6xxx_g1_irq_thread_work() that there is a race in handling device interrupts (e.g. PHY link interrupts). Some interrupts are directly cleared by reading the Global 1 status register. However, the device interrupt flag, for example, is not cleared until all the unmasked SERDES and PHY ports are serviced. This is done by reading the relevant SERDES and PHY status register. The code only services interrupts whose status bit is set at the time of reading its status register. If an interrupt event occurs after its status is read and before all interrupts are serviced, then this event will not be serviced and the INTn output pin will remain low. This is not a problem with polling or level interrupts since the handler will be called again to process the event. However, it's a big problem when using level interrupts. The fix presented here is to add a loop around the code servicing switch interrupts. If any pending interrupts remain after the current set has been handled, we loop and process the new set. If there are no pending interrupts after servicing, we are sure that INTn has gone high and we will get an edge when a new event occurs. Tested on espressobin board. Fixes: dc30c35be720 ("net: dsa: mv88e6xxx: Implement interrupt support.") Signed-off-by: John David Anglin Tested-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6xxx/chip.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 34998ecd9cc9..a3543d637736 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -258,6 +258,7 @@ static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id) unsigned int sub_irq; unsigned int n; u16 reg; + u16 ctl1; int err; mutex_lock(&chip->reg_lock); @@ -267,13 +268,28 @@ static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id) if (err) goto out; - for (n = 0; n < chip->g1_irq.nirqs; ++n) { - if (reg & (1 << n)) { - sub_irq = irq_find_mapping(chip->g1_irq.domain, n); - handle_nested_irq(sub_irq); - ++nhandled; + do { + for (n = 0; n < chip->g1_irq.nirqs; ++n) { + if (reg & (1 << n)) { + sub_irq = irq_find_mapping(chip->g1_irq.domain, + n); + handle_nested_irq(sub_irq); + ++nhandled; + } } - } + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &ctl1); + if (err) + goto unlock; + err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, ®); +unlock: + mutex_unlock(&chip->reg_lock); + if (err) + goto out; + ctl1 &= GENMASK(chip->g1_irq.nirqs, 0); + } while (reg & ctl1); + out: return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE); } From 3aafc4459c4144ef8ab243cfb5e6947f5ba46a35 Mon Sep 17 00:00:00 2001 From: Zhiqiang Liu Date: Mon, 11 Feb 2019 10:57:46 +0800 Subject: [PATCH 1958/2608] net: fix IPv6 prefix route residue [ Upstream commit e75913c93f7cd5f338ab373c34c93a655bd309cb ] Follow those steps: # ip addr add 2001:123::1/32 dev eth0 # ip addr add 2001:123:456::2/64 dev eth0 # ip addr del 2001:123::1/32 dev eth0 # ip addr del 2001:123:456::2/64 dev eth0 and then prefix route of 2001:123::1/32 will still exist. This is because ipv6_prefix_equal in check_cleanup_prefix_route func does not check whether two IPv6 addresses have the same prefix length. If the prefix of one address starts with another shorter address prefix, even though their prefix lengths are different, the return value of ipv6_prefix_equal is true. Here I add a check of whether two addresses have the same prefix to decide whether their prefixes are equal. Fixes: 5b84efecb7d9 ("ipv6 addrconf: don't cleanup prefix route for IFA_F_NOPREFIXROUTE") Signed-off-by: Zhiqiang Liu Reported-by: Wenhao Zhang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/addrconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9ac6f6232294..c47161e92407 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1124,7 +1124,8 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires) list_for_each_entry(ifa, &idev->addr_list, if_list) { if (ifa == ifp) continue; - if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr, + if (ifa->prefix_len != ifp->prefix_len || + !ipv6_prefix_equal(&ifa->addr, &ifp->addr, ifp->prefix_len)) continue; if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE)) From 4e4d02b992fb8e5d95c78c373b5d6b3211c86598 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 6 Feb 2019 19:18:04 +0100 Subject: [PATCH 1959/2608] net: ipv4: use a dedicated counter for icmp_v4 redirect packets [ Upstream commit c09551c6ff7fe16a79a42133bcecba5fc2fc3291 ] According to the algorithm described in the comment block at the beginning of ip_rt_send_redirect, the host should try to send 'ip_rt_redirect_number' ICMP redirect packets with an exponential backoff and then stop sending them at all assuming that the destination ignores redirects. If the device has previously sent some ICMP error packets that are rate-limited (e.g TTL expired) and continues to receive traffic, the redirect packets will never be transmitted. This happens since peer->rate_tokens will be typically greater than 'ip_rt_redirect_number' and so it will never be reset even if the redirect silence timeout (ip_rt_redirect_silence) has elapsed without receiving any packet requiring redirects. Fix it by using a dedicated counter for the number of ICMP redirect packets that has been sent by the host I have not been able to identify a given commit that introduced the issue since ip_rt_send_redirect implements the same rate-limiting algorithm from commit 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/inetpeer.h | 1 + net/ipv4/inetpeer.c | 1 + net/ipv4/route.c | 7 +++++-- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 00b5e7825508..74ff688568a0 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -39,6 +39,7 @@ struct inet_peer { u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ + u32 n_redirects; unsigned long rate_last; /* * Once inet_peer is queued for deletion (refcnt == 0), following field diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 64007ce87273..f9cef27907ed 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -215,6 +215,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, atomic_set(&p->rid, 0); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; + p->n_redirects = 0; /* 60*HZ is arbitrary, but chosen enough high so that the first * calculation of tokens is at its maximum. */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7afa8d2463d8..cb30f4e4e553 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -904,13 +904,15 @@ void ip_rt_send_redirect(struct sk_buff *skb) /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. */ - if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) + if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) { peer->rate_tokens = 0; + peer->n_redirects = 0; + } /* Too many ignored redirects; do not send anything * set dst.rate_last to the last seen redirected packet. */ - if (peer->rate_tokens >= ip_rt_redirect_number) { + if (peer->n_redirects >= ip_rt_redirect_number) { peer->rate_last = jiffies; goto out_put_peer; } @@ -927,6 +929,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->rate_tokens; + ++peer->n_redirects; #ifdef CONFIG_IP_ROUTE_VERBOSE if (log_martians && peer->rate_tokens == ip_rt_redirect_number) From 28beec65abccff5f39f33ea9d9f0a3a3d2b97226 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 7 Feb 2019 14:13:18 +0100 Subject: [PATCH 1960/2608] vsock: cope with memory allocation failure at socket creation time [ Upstream commit 225d9464268599a5b4d094d02ec17808e44c7553 ] In the unlikely event that the kmalloc call in vmci_transport_socket_init() fails, we end-up calling vmci_transport_destruct() with a NULL vmci_trans() and oopsing. This change addresses the above explicitly checking for zero vmci_trans() at destruction time. Reported-by: Xiumei Mu Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Signed-off-by: Paolo Abeni Reviewed-by: Stefano Garzarella Reviewed-by: Jorgen Hansen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/vmw_vsock/vmci_transport.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index bf7c51644446..ad3f47a714f3 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1648,6 +1648,10 @@ static void vmci_transport_cleanup(struct work_struct *work) static void vmci_transport_destruct(struct vsock_sock *vsk) { + /* transport can be NULL if we hit a failure at init() time */ + if (!vmci_trans(vsk)) + return; + /* Ensure that the detach callback doesn't use the sk/vsk * we are about to destruct. */ From 58cdc5947af63eb33fd631803101d37e5d4b4301 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Feb 2019 12:27:38 -0800 Subject: [PATCH 1961/2608] vxlan: test dev->flags & IFF_UP before calling netif_rx() [ Upstream commit 4179cb5a4c924cd233eaadd081882425bc98f44e ] netif_rx() must be called under a strict contract. At device dismantle phase, core networking clears IFF_UP and flush_all_backlogs() is called after rcu grace period to make sure no incoming packet might be in a cpu backlog and still referencing the device. Most drivers call netif_rx() from their interrupt handler, and since the interrupts are disabled at device dismantle, netif_rx() does not have to check dev->flags & IFF_UP Virtual drivers do not have this guarantee, and must therefore make the check themselves. Otherwise we risk use-after-free and/or crashes. Note this patch also fixes a small issue that came with commit ce6502a8f957 ("vxlan: fix a use after free in vxlan_encap_bypass"), since the dev->stats.rx_dropped change was done on the wrong device. Fixes: d342894c5d2f ("vxlan: virtual extensible lan") Fixes: ce6502a8f957 ("vxlan: fix a use after free in vxlan_encap_bypass") Signed-off-by: Eric Dumazet Cc: Petr Machata Cc: Ido Schimmel Cc: Roopa Prabhu Cc: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/vxlan.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 13d39a72fe0d..a1b40b9c4906 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2002,7 +2002,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, struct pcpu_sw_netstats *tx_stats, *rx_stats; union vxlan_addr loopback; union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip; - struct net_device *dev = skb->dev; + struct net_device *dev; int len = skb->len; tx_stats = this_cpu_ptr(src_vxlan->dev->tstats); @@ -2022,9 +2022,15 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, #endif } + rcu_read_lock(); + dev = skb->dev; + if (unlikely(!(dev->flags & IFF_UP))) { + kfree_skb(skb); + goto drop; + } + if (dst_vxlan->cfg.flags & VXLAN_F_LEARN) - vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source, 0, - vni); + vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni); u64_stats_update_begin(&tx_stats->syncp); tx_stats->tx_packets++; @@ -2037,8 +2043,10 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, rx_stats->rx_bytes += len; u64_stats_update_end(&rx_stats->syncp); } else { +drop: dev->stats.rx_dropped++; } + rcu_read_unlock(); } static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, From 48dc41afef9fe5b63a5fbc82852bb6a913f5a99c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 26 Dec 2018 11:28:24 +0000 Subject: [PATCH 1962/2608] hwmon: (lm80) Fix missing unlock on error in set_fan_div() [ Upstream commit 07bd14ccc3049f9c0147a91a4227a571f981601a ] Add the missing unlock before return from function set_fan_div() in the error handling case. Fixes: c9c63915519b ("hwmon: (lm80) fix a missing check of the status of SMBus read") Signed-off-by: Wei Yongjun Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/lm80.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c index 0e30fa00204c..f9b8e3e23a8e 100644 --- a/drivers/hwmon/lm80.c +++ b/drivers/hwmon/lm80.c @@ -393,8 +393,10 @@ static ssize_t set_fan_div(struct device *dev, struct device_attribute *attr, } rv = lm80_read_value(client, LM80_REG_FANDIV); - if (rv < 0) + if (rv < 0) { + mutex_unlock(&data->update_lock); return rv; + } reg = (rv & ~(3 << (2 * (nr + 1)))) | (data->fan_div[nr] << (2 * (nr + 1))); lm80_write_value(client, LM80_REG_FANDIV, reg); From 052d09258f0d33e96a1d2ec63a6763046d3d9160 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 17 Feb 2019 07:18:41 +0000 Subject: [PATCH 1963/2608] mlxsw: __mlxsw_sp_port_headroom_set(): Fix a use of local variable [ Upstream commit 289460404f6947ef1c38e67d680be9a84161250b ] The function-local variable "delay" enters the loop interpreted as delay in bits. However, inside the loop it gets overwritten by the result of mlxsw_sp_pg_buf_delay_get(), and thus leaves the loop as quantity in cells. Thus on second and further loop iterations, the headroom for a given priority is configured with a wrong size. Fix by introducing a loop-local variable, delay_cells. Rename thres to thres_cells for consistency. Fixes: f417f04da589 ("mlxsw: spectrum: Refactor port buffer configuration") Signed-off-by: Petr Machata Acked-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 7892e6b8d2e8..8ff92649f9ce 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1161,8 +1161,9 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { bool configure = false; bool pfc = false; + u16 thres_cells; + u16 delay_cells; bool lossy; - u16 thres; for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) { if (prio_tc[j] == i) { @@ -1176,10 +1177,11 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, continue; lossy = !(pfc || pause_en); - thres = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu); - delay = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay, pfc, - pause_en); - mlxsw_sp_pg_buf_pack(pbmc_pl, i, thres + delay, thres, lossy); + thres_cells = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu); + delay_cells = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay, + pfc, pause_en); + mlxsw_sp_pg_buf_pack(pbmc_pl, i, thres_cells + delay_cells, + thres_cells, lossy); } return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); From 6e4c01ee785c2192fcc4be234cedde3706309a7e Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Mon, 18 Feb 2019 10:44:44 +0800 Subject: [PATCH 1964/2608] net: crypto set sk to NULL when af_alg_release. [ Upstream commit 9060cb719e61b685ec0102574e10337fa5f445ea ] KASAN has found use-after-free in sockfs_setattr. The existed commit 6d8c50dcb029 ("socket: close race condition between sock_close() and sockfs_setattr()") is to fix this simillar issue, but it seems to ignore that crypto module forgets to set the sk to NULL after af_alg_release. KASAN report details as below: BUG: KASAN: use-after-free in sockfs_setattr+0x120/0x150 Write of size 4 at addr ffff88837b956128 by task syz-executor0/4186 CPU: 2 PID: 4186 Comm: syz-executor0 Not tainted xxx + #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: dump_stack+0xca/0x13e print_address_description+0x79/0x330 ? vprintk_func+0x5e/0xf0 kasan_report+0x18a/0x2e0 ? sockfs_setattr+0x120/0x150 sockfs_setattr+0x120/0x150 ? sock_register+0x2d0/0x2d0 notify_change+0x90c/0xd40 ? chown_common+0x2ef/0x510 chown_common+0x2ef/0x510 ? chmod_common+0x3b0/0x3b0 ? __lock_is_held+0xbc/0x160 ? __sb_start_write+0x13d/0x2b0 ? __mnt_want_write+0x19a/0x250 do_fchownat+0x15c/0x190 ? __ia32_sys_chmod+0x80/0x80 ? trace_hardirqs_on_thunk+0x1a/0x1c __x64_sys_fchownat+0xbf/0x160 ? lockdep_hardirqs_on+0x39a/0x5e0 do_syscall_64+0xc8/0x580 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462589 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fb4b2c83c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000104 RAX: ffffffffffffffda RBX: 000000000072bfa0 RCX: 0000000000462589 RDX: 0000000000000000 RSI: 00000000200000c0 RDI: 0000000000000007 RBP: 0000000000000005 R08: 0000000000001000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fb4b2c846bc R13: 00000000004bc733 R14: 00000000006f5138 R15: 00000000ffffffff Allocated by task 4185: kasan_kmalloc+0xa0/0xd0 __kmalloc+0x14a/0x350 sk_prot_alloc+0xf6/0x290 sk_alloc+0x3d/0xc00 af_alg_accept+0x9e/0x670 hash_accept+0x4a3/0x650 __sys_accept4+0x306/0x5c0 __x64_sys_accept4+0x98/0x100 do_syscall_64+0xc8/0x580 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 4184: __kasan_slab_free+0x12e/0x180 kfree+0xeb/0x2f0 __sk_destruct+0x4e6/0x6a0 sk_destruct+0x48/0x70 __sk_free+0xa9/0x270 sk_free+0x2a/0x30 af_alg_release+0x5c/0x70 __sock_release+0xd3/0x280 sock_close+0x1a/0x20 __fput+0x27f/0x7f0 task_work_run+0x136/0x1b0 exit_to_usermode_loop+0x1a7/0x1d0 do_syscall_64+0x461/0x580 entry_SYSCALL_64_after_hwframe+0x49/0xbe Syzkaller reproducer: r0 = perf_event_open(&(0x7f0000000000)={0x0, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @perf_config_ext}, 0x0, 0x0, 0xffffffffffffffff, 0x0) r1 = socket$alg(0x26, 0x5, 0x0) getrusage(0x0, 0x0) bind(r1, &(0x7f00000001c0)=@alg={0x26, 'hash\x00', 0x0, 0x0, 'sha256-ssse3\x00'}, 0x80) r2 = accept(r1, 0x0, 0x0) r3 = accept4$unix(r2, 0x0, 0x0, 0x0) r4 = dup3(r3, r0, 0x0) fchownat(r4, &(0x7f00000000c0)='\x00', 0x0, 0x0, 0x1000) Fixes: 6d8c50dcb029 ("socket: close race condition between sock_close() and sockfs_setattr()") Signed-off-by: Mao Wenan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- crypto/af_alg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 42dfdd1fd6d8..f816a7289104 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -122,8 +122,10 @@ static void alg_do_release(const struct af_alg_type *type, void *private) int af_alg_release(struct socket *sock) { - if (sock->sk) + if (sock->sk) { sock_put(sock->sk); + sock->sk = NULL; + } return 0; } EXPORT_SYMBOL_GPL(af_alg_release); From 5d3d720df4a922e42d0590ddb3d685f1e0365c76 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Fri, 15 Feb 2019 17:58:54 +0100 Subject: [PATCH 1965/2608] net: Fix for_each_netdev_feature on Big endian [ Upstream commit 3b89ea9c5902acccdbbdec307c85edd1bf52515e ] The features attribute is of type u64 and stored in the native endianes on the system. The for_each_set_bit() macro takes a pointer to a 32 bit array and goes over the bits in this area. On little Endian systems this also works with an u64 as the most significant bit is on the highest address, but on big endian the words are swapped. When we expect bit 15 here we get bit 47 (15 + 32). This patch converts it more or less to its own for_each_set_bit() implementation which works on 64 bit integers directly. This is then completely in host endianness and should work like expected. Fixes: fd867d51f ("net/core: generic support for disabling netdev features down stack") Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdev_features.h | 23 +++++++++++++++++++++-- net/core/dev.c | 4 ++-- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index b1b0ca7ccb2b..79f1c2f649c6 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -11,6 +11,7 @@ #define _LINUX_NETDEV_FEATURES_H #include +#include typedef u64 netdev_features_t; @@ -143,8 +144,26 @@ enum { #define NETIF_F_HW_ESP_TX_CSUM __NETIF_F(HW_ESP_TX_CSUM) #define NETIF_F_RX_UDP_TUNNEL_PORT __NETIF_F(RX_UDP_TUNNEL_PORT) -#define for_each_netdev_feature(mask_addr, bit) \ - for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) +/* Finds the next feature with the highest number of the range of start till 0. + */ +static inline int find_next_netdev_feature(u64 feature, unsigned long start) +{ + /* like BITMAP_LAST_WORD_MASK() for u64 + * this sets the most significant 64 - start to 0. + */ + feature &= ~0ULL >> (-start & ((sizeof(feature) * 8) - 1)); + + return fls64(feature) - 1; +} + +/* This goes for the MSB to the LSB through the set feature bits, + * mask_addr should be a u64 and bit an int + */ +#define for_each_netdev_feature(mask_addr, bit) \ + for ((bit) = find_next_netdev_feature((mask_addr), \ + NETDEV_FEATURE_COUNT); \ + (bit) >= 0; \ + (bit) = find_next_netdev_feature((mask_addr), (bit) - 1)) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/net/core/dev.c b/net/core/dev.c index 54ba5b5bc55c..93a1b07990b8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7260,7 +7260,7 @@ static netdev_features_t netdev_sync_upper_features(struct net_device *lower, netdev_features_t feature; int feature_bit; - for_each_netdev_feature(&upper_disables, feature_bit) { + for_each_netdev_feature(upper_disables, feature_bit) { feature = __NETIF_F_BIT(feature_bit); if (!(upper->wanted_features & feature) && (features & feature)) { @@ -7280,7 +7280,7 @@ static void netdev_sync_lower_features(struct net_device *upper, netdev_features_t feature; int feature_bit; - for_each_netdev_feature(&upper_disables, feature_bit) { + for_each_netdev_feature(upper_disables, feature_bit) { feature = __NETIF_F_BIT(feature_bit); if (!(features & feature) && (lower->features & feature)) { netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n", From 7193b3d0c850db575607e562695c950b95fc82ee Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Fri, 15 Feb 2019 17:17:08 +0100 Subject: [PATCH 1966/2608] net: phy: xgmiitorgmii: Support generic PHY status read [ Upstream commit 197f9ab7f08ce4b9ece662f747c3991b2f0fbb57 ] Some PHY drivers like the generic one do not provide a read_status callback on their own but rely on genphy_read_status being called directly. With the current code, this results in a NULL function pointer call. Call genphy_read_status instead when there is no specific callback. Fixes: f411a6160bd4 ("net: phy: Add gmiitorgmii converter support") Signed-off-by: Paul Kocialkowski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/xilinx_gmii2rgmii.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c index 7a14e8170e82..aef525467af0 100644 --- a/drivers/net/phy/xilinx_gmii2rgmii.c +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -42,7 +42,10 @@ static int xgmiitorgmii_read_status(struct phy_device *phydev) u16 val = 0; int err; - err = priv->phy_drv->read_status(phydev); + if (priv->phy_drv->read_status) + err = priv->phy_drv->read_status(phydev); + else + err = genphy_read_status(phydev); if (err < 0) return err; From c15b7addede2b75303428c956faac3e78eafac46 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Mon, 18 Feb 2019 14:35:03 +0100 Subject: [PATCH 1967/2608] net: stmmac: Fix a race in EEE enable callback [ Upstream commit 8a7493e58ad688eb23b81e45461c5d314f4402f1 ] We are saving the status of EEE even before we try to enable it. This leads to a race with XMIT function that tries to arm EEE timer before we set it up. Fix this by only saving the EEE parameters after all operations are performed with success. Signed-off-by: Jose Abreu Fixes: d765955d2ae0 ("stmmac: add the Energy Efficient Ethernet support") Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../ethernet/stmicro/stmmac/stmmac_ethtool.c | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index af30b4857c3b..c3c6335cbe9a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -675,25 +675,27 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, struct ethtool_eee *edata) { struct stmmac_priv *priv = netdev_priv(dev); + int ret; - priv->eee_enabled = edata->eee_enabled; - - if (!priv->eee_enabled) + if (!edata->eee_enabled) { stmmac_disable_eee_mode(priv); - else { + } else { /* We are asking for enabling the EEE but it is safe * to verify all by invoking the eee_init function. * In case of failure it will return an error. */ - priv->eee_enabled = stmmac_eee_init(priv); - if (!priv->eee_enabled) + edata->eee_enabled = stmmac_eee_init(priv); + if (!edata->eee_enabled) return -EOPNOTSUPP; - - /* Do not change tx_lpi_timer in case of failure */ - priv->tx_lpi_timer = edata->tx_lpi_timer; } - return phy_ethtool_set_eee(dev->phydev, edata); + ret = phy_ethtool_set_eee(dev->phydev, edata); + if (ret) + return ret; + + priv->eee_enabled = edata->eee_enabled; + priv->tx_lpi_timer = edata->tx_lpi_timer; + return 0; } static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv) From b690bf8fdb338ba1a09d2a436514d48c81f916a8 Mon Sep 17 00:00:00 2001 From: Alexandre Torgue Date: Fri, 15 Feb 2019 10:49:09 +0100 Subject: [PATCH 1968/2608] net: stmmac: handle endianness in dwmac4_get_timestamp [ Upstream commit 224babd62d6f19581757a6d8bae3bf9501fc10de ] GMAC IP is little-endian and used on several kind of CPU (big or little endian). Main callbacks functions of the stmmac drivers take care about it. It was not the case for dwmac4_get_timestamp function. Fixes: ba1ffd74df74 ("stmmac: fix PTP support for GMAC4") Signed-off-by: Alexandre Torgue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 7e089bf906b4..37b77e7da132 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -238,15 +238,18 @@ static inline u64 dwmac4_get_timestamp(void *desc, u32 ats) static int dwmac4_rx_check_timestamp(void *desc) { struct dma_desc *p = (struct dma_desc *)desc; + unsigned int rdes0 = le32_to_cpu(p->des0); + unsigned int rdes1 = le32_to_cpu(p->des1); + unsigned int rdes3 = le32_to_cpu(p->des3); u32 own, ctxt; int ret = 1; - own = p->des3 & RDES3_OWN; - ctxt = ((p->des3 & RDES3_CONTEXT_DESCRIPTOR) + own = rdes3 & RDES3_OWN; + ctxt = ((rdes3 & RDES3_CONTEXT_DESCRIPTOR) >> RDES3_CONTEXT_DESCRIPTOR_SHIFT); if (likely(!own && ctxt)) { - if ((p->des0 == 0xffffffff) && (p->des1 == 0xffffffff)) + if ((rdes0 == 0xffffffff) && (rdes1 == 0xffffffff)) /* Corrupted value */ ret = -EINVAL; else From 546b8f668e91baa55cd47ddeaeef607e75369043 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 19 Feb 2019 23:45:29 +0800 Subject: [PATCH 1969/2608] sky2: Increase D3 delay again [ Upstream commit 1765f5dcd00963e33f1b8a4e0f34061fbc0e2f7f ] Another platform requires even longer delay to make the device work correctly after S3. So increase the delay to 300ms. BugLink: https://bugs.launchpad.net/bugs/1798921 Signed-off-by: Kai-Heng Feng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/sky2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index b12e3a4f9439..3954bc1d2333 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -5087,7 +5087,7 @@ static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&hw->restart_work, sky2_restart); pci_set_drvdata(pdev, hw); - pdev->d3_delay = 200; + pdev->d3_delay = 300; return 0; From d5a74d2b466b7d6595b8e28db64fb66f7fd5e724 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 19 Feb 2019 14:53:44 +0800 Subject: [PATCH 1970/2608] vhost: correctly check the return value of translate_desc() in log_used() [ Upstream commit 816db7663565cd23f74ed3d5c9240522e3fb0dda ] When fail, translate_desc() returns negative value, otherwise the number of iovs. So we should fail when the return value is negative instead of a blindly check against zero. Detected by CoverityScan, CID# 1442593: Control flow issues (DEADCODE) Fixes: cc5e71075947 ("vhost: log dirty page correctly") Acked-by: Michael S. Tsirkin Reported-by: Stephen Hemminger Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 37fcb3ca89f1..d7c22ae5c368 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1776,7 +1776,7 @@ static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) ret = translate_desc(vq, (uintptr_t)vq->used + used_offset, len, iov, 64, VHOST_ACCESS_WO); - if (ret) + if (ret < 0) return ret; for (i = 0; i < ret; i++) { From 8565b7fb8dfa0bc97818c2417e813de6147f5d76 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 16 Feb 2019 13:44:39 -0800 Subject: [PATCH 1971/2608] net: Add header for usage of fls64() [ Upstream commit 8681ef1f3d295bd3600315325f3b3396d76d02f6 ] Fixes: 3b89ea9c5902 ("net: Fix for_each_netdev_feature on Big endian") Suggested-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdev_features.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 79f1c2f649c6..de123f436f1a 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -11,6 +11,7 @@ #define _LINUX_NETDEV_FEATURES_H #include +#include #include typedef u64 netdev_features_t; From 55103aa317afce598ee93ba3a3c5eb64ce4e6238 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Feb 2019 13:36:21 -0800 Subject: [PATCH 1972/2608] tcp: tcp_v4_err() should be more careful [ Upstream commit 2c4cc9712364c051b1de2d175d5fbea6be948ebf ] ICMP handlers are not very often stressed, we should make them more resilient to bugs that might surface in the future. If there is no packet in retransmit queue, we should avoid a NULL deref. Signed-off-by: Eric Dumazet Reported-by: soukjin bae Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 31b34c0c2d5f..e593301f442f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -475,14 +475,15 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (sock_owned_by_user(sk)) break; + skb = tcp_write_queue_head(sk); + if (WARN_ON_ONCE(!skb)) + break; + icsk->icsk_backoff--; icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : TCP_TIMEOUT_INIT; icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); - skb = tcp_write_queue_head(sk); - BUG_ON(!skb); - tcp_mstamp_refresh(tp); delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp); remaining = icsk->icsk_rto - From 859838ff68a119a34a562b7a8384204c2c2782b2 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 15 Feb 2019 14:44:18 -0800 Subject: [PATCH 1973/2608] net: Do not allocate page fragments that are not skb aligned [ Upstream commit 3bed3cc4156eedf652b4df72bdb35d4f1a2a739d ] This patch addresses the fact that there are drivers, specifically tun, that will call into the network page fragment allocators with buffer sizes that are not cache aligned. Doing this could result in data alignment and DMA performance issues as these fragment pools are also shared with the skb allocator and any other devices that will use napi_alloc_frags or netdev_alloc_frags. Fixes: ffde7328a36d ("net: Split netdev_alloc_frag into __alloc_page_frag and add __napi_alloc_frag") Reported-by: Jann Horn Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6dbd2c54b2c9..1b39aef5cf82 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -353,6 +353,8 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) */ void *netdev_alloc_frag(unsigned int fragsz) { + fragsz = SKB_DATA_ALIGN(fragsz); + return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); } EXPORT_SYMBOL(netdev_alloc_frag); @@ -366,6 +368,8 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) void *napi_alloc_frag(unsigned int fragsz) { + fragsz = SKB_DATA_ALIGN(fragsz); + return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); } EXPORT_SYMBOL(napi_alloc_frag); From 3a493b762f31fc0c571051cdfb0d80f49498e5fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Feb 2019 13:36:20 -0800 Subject: [PATCH 1974/2608] tcp: clear icsk_backoff in tcp_write_queue_purge() [ Upstream commit 04c03114be82194d4a4858d41dba8e286ad1787c ] soukjin bae reported a crash in tcp_v4_err() handling ICMP_DEST_UNREACH after tcp_write_queue_head(sk) returned a NULL pointer. Current logic should have prevented this : if (seq != tp->snd_una || !icsk->icsk_retransmits || !icsk->icsk_backoff || fastopen) break; Problem is the write queue might have been purged and icsk_backoff has not been cleared. Signed-off-by: Eric Dumazet Reported-by: soukjin bae Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 1 + net/ipv4/tcp.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 0c828aac7e04..d0c2dbe94e21 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1622,6 +1622,7 @@ static inline void tcp_write_queue_purge(struct sock *sk) tcp_clear_all_retrans_hints(tcp_sk(sk)); tcp_init_send_head(sk); tcp_sk(sk)->packets_out = 0; + inet_csk(sk)->icsk_backoff = 0; } static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fd14501ac3af..00ae9a1d44ed 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2347,7 +2347,6 @@ int tcp_disconnect(struct sock *sk, int flags) tp->write_seq += tp->max_window + 2; if (tp->write_seq == 0) tp->write_seq = 1; - icsk->icsk_backoff = 0; tp->snd_cwnd = 2; icsk->icsk_probes_out = 0; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; From aa1e52cbb1e45dec65c88f370d32362252380f49 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 15 Feb 2019 13:42:02 -0500 Subject: [PATCH 1975/2608] sunrpc: fix 4 more call sites that were using stack memory with a scatterlist commit e7afe6c1d486b516ed586dcc10b3e7e3e85a9c2b upstream. While trying to reproduce a reported kernel panic on arm64, I discovered that AUTH_GSS basically doesn't work at all with older enctypes on arm64 systems with CONFIG_VMAP_STACK enabled. It turns out there still a few places using stack memory with scatterlists, causing krb5_encrypt() and krb5_decrypt() to produce incorrect results (or a BUG if CONFIG_DEBUG_SG is enabled). Tested with cthon on v4.0/v4.1/v4.2 with krb5/krb5i/krb5p using des3-cbc-sha1 and arcfour-hmac-md5. Signed-off-by: Scott Mayhew Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/auth_gss/gss_krb5_seqnum.c | 49 +++++++++++++++++++++------ 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index c8b9082f4a9d..2d2ed6772fe4 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -44,7 +44,7 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum, unsigned char *cksum, unsigned char *buf) { struct crypto_skcipher *cipher; - unsigned char plain[8]; + unsigned char *plain; s32 code; dprintk("RPC: %s:\n", __func__); @@ -53,6 +53,10 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum, if (IS_ERR(cipher)) return PTR_ERR(cipher); + plain = kmalloc(8, GFP_NOFS); + if (!plain) + return -ENOMEM; + plain[0] = (unsigned char) ((seqnum >> 24) & 0xff); plain[1] = (unsigned char) ((seqnum >> 16) & 0xff); plain[2] = (unsigned char) ((seqnum >> 8) & 0xff); @@ -69,6 +73,7 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum, code = krb5_encrypt(cipher, cksum, plain, buf, 8); out: crypto_free_skcipher(cipher); + kfree(plain); return code; } s32 @@ -78,12 +83,17 @@ krb5_make_seq_num(struct krb5_ctx *kctx, u32 seqnum, unsigned char *cksum, unsigned char *buf) { - unsigned char plain[8]; + unsigned char *plain; + s32 code; if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) return krb5_make_rc4_seq_num(kctx, direction, seqnum, cksum, buf); + plain = kmalloc(8, GFP_NOFS); + if (!plain) + return -ENOMEM; + plain[0] = (unsigned char) (seqnum & 0xff); plain[1] = (unsigned char) ((seqnum >> 8) & 0xff); plain[2] = (unsigned char) ((seqnum >> 16) & 0xff); @@ -94,7 +104,9 @@ krb5_make_seq_num(struct krb5_ctx *kctx, plain[6] = direction; plain[7] = direction; - return krb5_encrypt(key, cksum, plain, buf, 8); + code = krb5_encrypt(key, cksum, plain, buf, 8); + kfree(plain); + return code; } static s32 @@ -102,7 +114,7 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum, unsigned char *buf, int *direction, s32 *seqnum) { struct crypto_skcipher *cipher; - unsigned char plain[8]; + unsigned char *plain; s32 code; dprintk("RPC: %s:\n", __func__); @@ -115,20 +127,28 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum, if (code) goto out; + plain = kmalloc(8, GFP_NOFS); + if (!plain) { + code = -ENOMEM; + goto out; + } + code = krb5_decrypt(cipher, cksum, buf, plain, 8); if (code) - goto out; + goto out_plain; if ((plain[4] != plain[5]) || (plain[4] != plain[6]) || (plain[4] != plain[7])) { code = (s32)KG_BAD_SEQ; - goto out; + goto out_plain; } *direction = plain[4]; *seqnum = ((plain[0] << 24) | (plain[1] << 16) | (plain[2] << 8) | (plain[3])); +out_plain: + kfree(plain); out: crypto_free_skcipher(cipher); return code; @@ -141,26 +161,33 @@ krb5_get_seq_num(struct krb5_ctx *kctx, int *direction, u32 *seqnum) { s32 code; - unsigned char plain[8]; struct crypto_skcipher *key = kctx->seq; + unsigned char *plain; dprintk("RPC: krb5_get_seq_num:\n"); if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) return krb5_get_rc4_seq_num(kctx, cksum, buf, direction, seqnum); + plain = kmalloc(8, GFP_NOFS); + if (!plain) + return -ENOMEM; if ((code = krb5_decrypt(key, cksum, buf, plain, 8))) - return code; + goto out; if ((plain[4] != plain[5]) || (plain[4] != plain[6]) || - (plain[4] != plain[7])) - return (s32)KG_BAD_SEQ; + (plain[4] != plain[7])) { + code = (s32)KG_BAD_SEQ; + goto out; + } *direction = plain[4]; *seqnum = ((plain[0]) | (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24)); - return 0; +out: + kfree(plain); + return code; } From c01e01adf9df0e9b32f0f5f7058b66b68155002c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Feb 2019 12:41:05 -0800 Subject: [PATCH 1976/2608] net/x25: do not hold the cpu too long in x25_new_lci() commit cf657d22ee1f0e887326a92169f2e28dc932fd10 upstream. Due to quadratic behavior of x25_new_lci(), syzbot was able to trigger an rcu stall. Fix this by not blocking BH for the whole duration of the function, and inserting a reschedule point when possible. If we care enough, using a bitmap could get rid of the quadratic behavior. syzbot report : rcu: INFO: rcu_preempt self-detected stall on CPU rcu: 0-...!: (10500 ticks this GP) idle=4fa/1/0x4000000000000002 softirq=283376/283376 fqs=0 rcu: (t=10501 jiffies g=383105 q=136) rcu: rcu_preempt kthread starved for 10502 jiffies! g383105 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402 ->cpu=0 rcu: RCU grace-period kthread stack dump: rcu_preempt I28928 10 2 0x80000000 Call Trace: context_switch kernel/sched/core.c:2844 [inline] __schedule+0x817/0x1cc0 kernel/sched/core.c:3485 schedule+0x92/0x180 kernel/sched/core.c:3529 schedule_timeout+0x4db/0xfd0 kernel/time/timer.c:1803 rcu_gp_fqs_loop kernel/rcu/tree.c:1948 [inline] rcu_gp_kthread+0x956/0x17a0 kernel/rcu/tree.c:2105 kthread+0x357/0x430 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 NMI backtrace for cpu 0 CPU: 0 PID: 8759 Comm: syz-executor2 Not tainted 5.0.0-rc4+ #51 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 nmi_cpu_backtrace.cold+0x63/0xa4 lib/nmi_backtrace.c:101 nmi_trigger_cpumask_backtrace+0x1be/0x236 lib/nmi_backtrace.c:62 arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38 trigger_single_cpu_backtrace include/linux/nmi.h:164 [inline] rcu_dump_cpu_stacks+0x183/0x1cf kernel/rcu/tree.c:1211 print_cpu_stall kernel/rcu/tree.c:1348 [inline] check_cpu_stall kernel/rcu/tree.c:1422 [inline] rcu_pending kernel/rcu/tree.c:3018 [inline] rcu_check_callbacks.cold+0x500/0xa4a kernel/rcu/tree.c:2521 update_process_times+0x32/0x80 kernel/time/timer.c:1635 tick_sched_handle+0xa2/0x190 kernel/time/tick-sched.c:161 tick_sched_timer+0x47/0x130 kernel/time/tick-sched.c:1271 __run_hrtimer kernel/time/hrtimer.c:1389 [inline] __hrtimer_run_queues+0x33e/0xde0 kernel/time/hrtimer.c:1451 hrtimer_interrupt+0x314/0x770 kernel/time/hrtimer.c:1509 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1035 [inline] smp_apic_timer_interrupt+0x120/0x570 arch/x86/kernel/apic/apic.c:1060 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807 RIP: 0010:__read_once_size include/linux/compiler.h:193 [inline] RIP: 0010:queued_write_lock_slowpath+0x13e/0x290 kernel/locking/qrwlock.c:86 Code: 00 00 fc ff df 4c 8d 2c 01 41 83 c7 03 41 0f b6 45 00 41 38 c7 7c 08 84 c0 0f 85 0c 01 00 00 8b 03 3d 00 01 00 00 74 1a f3 90 <41> 0f b6 55 00 41 38 d7 7c eb 84 d2 74 e7 48 89 df e8 6c 0f 4f 00 RSP: 0018:ffff88805f117bd8 EFLAGS: 00000206 ORIG_RAX: ffffffffffffff13 RAX: 0000000000000300 RBX: ffffffff89413ba0 RCX: 1ffffffff1282774 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffff89413ba0 RBP: ffff88805f117c70 R08: 1ffffffff1282774 R09: fffffbfff1282775 R10: fffffbfff1282774 R11: ffffffff89413ba3 R12: 00000000000000ff R13: fffffbfff1282774 R14: 1ffff1100be22f7d R15: 0000000000000003 queued_write_lock include/asm-generic/qrwlock.h:104 [inline] do_raw_write_lock+0x1d6/0x290 kernel/locking/spinlock_debug.c:203 __raw_write_lock_bh include/linux/rwlock_api_smp.h:204 [inline] _raw_write_lock_bh+0x3b/0x50 kernel/locking/spinlock.c:312 x25_insert_socket+0x21/0xe0 net/x25/af_x25.c:267 x25_bind+0x273/0x340 net/x25/af_x25.c:705 __sys_bind+0x23f/0x290 net/socket.c:1505 __do_sys_bind net/socket.c:1516 [inline] __se_sys_bind net/socket.c:1514 [inline] __x64_sys_bind+0x73/0xb0 net/socket.c:1514 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457e39 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fafccd0dc78 EFLAGS: 00000246 ORIG_RAX: 0000000000000031 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457e39 RDX: 0000000000000012 RSI: 0000000020000240 RDI: 0000000000000004 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fafccd0e6d4 R13: 00000000004bdf8b R14: 00000000004ce4b8 R15: 00000000ffffffff Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 8752 Comm: syz-executor4 Not tainted 5.0.0-rc4+ #51 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__x25_find_socket+0x78/0x120 net/x25/af_x25.c:328 Code: 89 f8 48 c1 e8 03 80 3c 18 00 0f 85 a6 00 00 00 4d 8b 64 24 68 4d 85 e4 74 7f e8 03 97 3d fb 49 83 ec 68 74 74 e8 f8 96 3d fb <49> 8d bc 24 88 04 00 00 48 89 f8 48 c1 e8 03 0f b6 04 18 84 c0 74 RSP: 0018:ffff8880639efc58 EFLAGS: 00000246 RAX: 0000000000040000 RBX: dffffc0000000000 RCX: ffffc9000e677000 RDX: 0000000000040000 RSI: ffffffff863244b8 RDI: ffff88806a764628 RBP: ffff8880639efc80 R08: ffff8880a80d05c0 R09: fffffbfff1282775 R10: fffffbfff1282774 R11: ffffffff89413ba3 R12: ffff88806a7645c0 R13: 0000000000000001 R14: ffff88809f29ac00 R15: 0000000000000000 FS: 00007fe8d0c58700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b32823000 CR3: 00000000672eb000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: x25_new_lci net/x25/af_x25.c:357 [inline] x25_connect+0x374/0xdf0 net/x25/af_x25.c:786 __sys_connect+0x266/0x330 net/socket.c:1686 __do_sys_connect net/socket.c:1697 [inline] __se_sys_connect net/socket.c:1694 [inline] __x64_sys_connect+0x73/0xb0 net/socket.c:1694 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457e39 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fe8d0c57c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457e39 RDX: 0000000000000012 RSI: 0000000020000200 RDI: 0000000000000004 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe8d0c586d4 R13: 00000000004be378 R14: 00000000004ceb00 R15: 00000000ffffffff Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Andrew Hendry Cc: linux-x25@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/x25/af_x25.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index ac095936552d..47f600564f24 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -352,17 +352,15 @@ static unsigned int x25_new_lci(struct x25_neigh *nb) unsigned int lci = 1; struct sock *sk; - read_lock_bh(&x25_list_lock); - - while ((sk = __x25_find_socket(lci, nb)) != NULL) { + while ((sk = x25_find_socket(lci, nb)) != NULL) { sock_put(sk); if (++lci == 4096) { lci = 0; break; } + cond_resched(); } - read_unlock_bh(&x25_list_lock); return lci; } From d8eba3df4dd9c57f233e9f3ca022f34dfcd9b5ec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Feb 2019 15:38:44 -0800 Subject: [PATCH 1977/2608] mISDN: fix a race in dev_expire_timer() commit bdcc5bc25548ef6b08e2e43937148f907c212292 upstream. Since mISDN_close() uses dev->pending to iterate over active timers, there is a chance that one timer got removed from the ->pending list in dev_expire_timer() but that the thread has not called yet wake_up_interruptible() So mISDN_close() could miss this and free dev before completion of at least one dev_expire_timer() syzbot was able to catch this race : BUG: KASAN: use-after-free in register_lock_class+0x140c/0x1bf0 kernel/locking/lockdep.c:827 Write of size 8 at addr ffff88809fc18948 by task syz-executor1/24769 CPU: 1 PID: 24769 Comm: syz-executor1 Not tainted 5.0.0-rc5 #60 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 __asan_report_store8_noabort+0x17/0x20 mm/kasan/generic_report.c:140 register_lock_class+0x140c/0x1bf0 kernel/locking/lockdep.c:827 __lock_acquire+0x11f/0x4700 kernel/locking/lockdep.c:3224 lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x95/0xcd kernel/locking/spinlock.c:152 __wake_up_common_lock+0xc7/0x190 kernel/sched/wait.c:120 __wake_up+0xe/0x10 kernel/sched/wait.c:145 dev_expire_timer+0xe4/0x3b0 drivers/isdn/mISDN/timerdev.c:174 call_timer_fn+0x190/0x720 kernel/time/timer.c:1325 protocol 88fb is buggy, dev hsr_slave_0 protocol 88fb is buggy, dev hsr_slave_1 expire_timers kernel/time/timer.c:1362 [inline] __run_timers kernel/time/timer.c:1681 [inline] __run_timers kernel/time/timer.c:1649 [inline] run_timer_softirq+0x652/0x1700 kernel/time/timer.c:1694 __do_softirq+0x266/0x95a kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0x180/0x1d0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0x14a/0x570 arch/x86/kernel/apic/apic.c:1062 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807 RIP: 0010:__sanitizer_cov_trace_pc+0x26/0x50 kernel/kcov.c:101 Code: 90 90 90 90 55 48 89 e5 48 8b 75 08 65 48 8b 04 25 40 ee 01 00 65 8b 15 98 12 92 7e 81 e2 00 01 1f 00 75 2b 8b 90 d8 12 00 00 <83> fa 02 75 20 48 8b 88 e0 12 00 00 8b 80 dc 12 00 00 48 8b 11 48 RSP: 0018:ffff8880589b7a60 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: ffff888087ce25c0 RBX: 0000000000000001 RCX: ffffffff818f8ca3 RDX: 0000000000000000 RSI: ffffffff818f8b48 RDI: 0000000000000001 RBP: ffff8880589b7a60 R08: ffff888087ce25c0 R09: ffffed1015d25bd0 R10: ffffed1015d25bcf R11: ffff8880ae92de7b R12: ffffea0001ae4680 R13: ffffea0001ae4688 R14: 0000000000000000 R15: ffffea0001b41648 PageIdle include/linux/page-flags.h:398 [inline] page_is_idle include/linux/page_idle.h:29 [inline] mark_page_accessed+0x618/0x1140 mm/swap.c:398 touch_buffer fs/buffer.c:59 [inline] __find_get_block+0x312/0xcc0 fs/buffer.c:1298 sb_find_get_block include/linux/buffer_head.h:338 [inline] recently_deleted fs/ext4/ialloc.c:682 [inline] find_inode_bit.isra.0+0x202/0x510 fs/ext4/ialloc.c:722 __ext4_new_inode+0x14ad/0x52c0 fs/ext4/ialloc.c:914 ext4_symlink+0x3f8/0xbe0 fs/ext4/namei.c:3096 vfs_symlink fs/namei.c:4126 [inline] vfs_symlink+0x378/0x5d0 fs/namei.c:4112 do_symlinkat+0x22b/0x290 fs/namei.c:4153 __do_sys_symlink fs/namei.c:4172 [inline] __se_sys_symlink fs/namei.c:4170 [inline] __x64_sys_symlink+0x59/0x80 fs/namei.c:4170 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457b67 Code: 0f 1f 00 b8 5c 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 6d bb fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 58 00 00 00 0f 05 <48> 3d 01 f0 ff ff 0f 83 4d bb fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fff045ce0f8 EFLAGS: 00000202 ORIG_RAX: 0000000000000058 RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 0000000000457b67 RDX: 00007fff045ce173 RSI: 00000000004bd63f RDI: 00007fff045ce160 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 R10: 0000000000000075 R11: 0000000000000202 R12: 0000000000000000 R13: 0000000000000001 R14: 000000000000029b R15: 0000000000000001 Allocated by task 24763: save_stack+0x45/0xd0 mm/kasan/common.c:73 set_track mm/kasan/common.c:85 [inline] __kasan_kmalloc mm/kasan/common.c:496 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:469 kasan_kmalloc+0x9/0x10 mm/kasan/common.c:504 kmem_cache_alloc_trace+0x151/0x760 mm/slab.c:3609 kmalloc include/linux/slab.h:545 [inline] mISDN_open+0x9a/0x270 drivers/isdn/mISDN/timerdev.c:59 misc_open+0x398/0x4c0 drivers/char/misc.c:141 chrdev_open+0x247/0x6b0 fs/char_dev.c:417 do_dentry_open+0x47d/0x1130 fs/open.c:771 vfs_open+0xa0/0xd0 fs/open.c:880 do_last fs/namei.c:3418 [inline] path_openat+0x10d7/0x4690 fs/namei.c:3534 do_filp_open+0x1a1/0x280 fs/namei.c:3564 do_sys_open+0x3fe/0x5d0 fs/open.c:1063 __do_sys_openat fs/open.c:1090 [inline] __se_sys_openat fs/open.c:1084 [inline] __x64_sys_openat+0x9d/0x100 fs/open.c:1084 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 24762: save_stack+0x45/0xd0 mm/kasan/common.c:73 set_track mm/kasan/common.c:85 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:458 kasan_slab_free+0xe/0x10 mm/kasan/common.c:466 __cache_free mm/slab.c:3487 [inline] kfree+0xcf/0x230 mm/slab.c:3806 mISDN_close+0x2a1/0x390 drivers/isdn/mISDN/timerdev.c:97 __fput+0x2df/0x8d0 fs/file_table.c:278 ____fput+0x16/0x20 fs/file_table.c:309 task_work_run+0x14a/0x1c0 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_usermode_loop+0x273/0x2c0 arch/x86/entry/common.c:166 prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline] syscall_return_slowpath arch/x86/entry/common.c:268 [inline] do_syscall_64+0x52d/0x610 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff88809fc18900 which belongs to the cache kmalloc-192 of size 192 The buggy address is located 72 bytes inside of 192-byte region [ffff88809fc18900, ffff88809fc189c0) The buggy address belongs to the page: page:ffffea00027f0600 count:1 mapcount:0 mapping:ffff88812c3f0040 index:0xffff88809fc18000 flags: 0x1fffc0000000200(slab) raw: 01fffc0000000200 ffffea000269f648 ffffea00029f7408 ffff88812c3f0040 raw: ffff88809fc18000 ffff88809fc18000 000000010000000b 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88809fc18800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff88809fc18880: 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88809fc18900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88809fc18980: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff88809fc18a00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Signed-off-by: Eric Dumazet Cc: Karsten Keil Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/mISDN/timerdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c index b1e135fc1fb5..7f42fb6b61f2 100644 --- a/drivers/isdn/mISDN/timerdev.c +++ b/drivers/isdn/mISDN/timerdev.c @@ -170,8 +170,8 @@ dev_expire_timer(unsigned long data) spin_lock_irqsave(&timer->dev->lock, flags); if (timer->id >= 0) list_move_tail(&timer->list, &timer->dev->expired); - spin_unlock_irqrestore(&timer->dev->lock, flags); wake_up_interruptible(&timer->dev->wait); + spin_unlock_irqrestore(&timer->dev->lock, flags); } static int From d283ed959311fc51be88d1fdc65121f80263a422 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Jan 2019 10:40:59 -0800 Subject: [PATCH 1978/2608] ax25: fix possible use-after-free commit 63530aba7826a0f8e129874df9c4d264f9db3f9e upstream. syzbot found that ax25 routes where not properly protected against concurrent use [1]. In this particular report the bug happened while copying ax25->digipeat. Fix this problem by making sure we call ax25_get_route() while ax25_route_lock is held, so that no modification could happen while using the route. The current two ax25_get_route() callers do not sleep, so this change should be fine. Once we do that, ax25_get_route() no longer needs to grab a reference on the found route. [1] ax25_connect(): syz-executor0 uses autobind, please contact jreuter@yaina.de BUG: KASAN: use-after-free in memcpy include/linux/string.h:352 [inline] BUG: KASAN: use-after-free in kmemdup+0x42/0x60 mm/util.c:113 Read of size 66 at addr ffff888066641a80 by task syz-executor2/531 ax25_connect(): syz-executor0 uses autobind, please contact jreuter@yaina.de CPU: 1 PID: 531 Comm: syz-executor2 Not tainted 5.0.0-rc2+ #10 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1db/0x2d0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 check_memory_region_inline mm/kasan/generic.c:185 [inline] check_memory_region+0x123/0x190 mm/kasan/generic.c:191 memcpy+0x24/0x50 mm/kasan/common.c:130 memcpy include/linux/string.h:352 [inline] kmemdup+0x42/0x60 mm/util.c:113 kmemdup include/linux/string.h:425 [inline] ax25_rt_autobind+0x25d/0x750 net/ax25/ax25_route.c:424 ax25_connect.cold+0x30/0xa4 net/ax25/af_ax25.c:1224 __sys_connect+0x357/0x490 net/socket.c:1664 __do_sys_connect net/socket.c:1675 [inline] __se_sys_connect net/socket.c:1672 [inline] __x64_sys_connect+0x73/0xb0 net/socket.c:1672 do_syscall_64+0x1a3/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x458099 Code: 6d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 3b b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f870ee22c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000458099 RDX: 0000000000000048 RSI: 0000000020000080 RDI: 0000000000000005 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 ax25_connect(): syz-executor4 uses autobind, please contact jreuter@yaina.de R10: 0000000000000000 R11: 0000000000000246 R12: 00007f870ee236d4 R13: 00000000004be48e R14: 00000000004ce9a8 R15: 00000000ffffffff Allocated by task 526: save_stack+0x45/0xd0 mm/kasan/common.c:73 set_track mm/kasan/common.c:85 [inline] __kasan_kmalloc mm/kasan/common.c:496 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:469 kasan_kmalloc+0x9/0x10 mm/kasan/common.c:504 ax25_connect(): syz-executor5 uses autobind, please contact jreuter@yaina.de kmem_cache_alloc_trace+0x151/0x760 mm/slab.c:3609 kmalloc include/linux/slab.h:545 [inline] ax25_rt_add net/ax25/ax25_route.c:95 [inline] ax25_rt_ioctl+0x3b9/0x1270 net/ax25/ax25_route.c:233 ax25_ioctl+0x322/0x10b0 net/ax25/af_ax25.c:1763 sock_do_ioctl+0xe2/0x400 net/socket.c:950 sock_ioctl+0x32f/0x6c0 net/socket.c:1074 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:509 [inline] do_vfs_ioctl+0x107b/0x17d0 fs/ioctl.c:696 ksys_ioctl+0xab/0xd0 fs/ioctl.c:713 __do_sys_ioctl fs/ioctl.c:720 [inline] __se_sys_ioctl fs/ioctl.c:718 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:718 do_syscall_64+0x1a3/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe ax25_connect(): syz-executor5 uses autobind, please contact jreuter@yaina.de Freed by task 550: save_stack+0x45/0xd0 mm/kasan/common.c:73 set_track mm/kasan/common.c:85 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:458 kasan_slab_free+0xe/0x10 mm/kasan/common.c:466 __cache_free mm/slab.c:3487 [inline] kfree+0xcf/0x230 mm/slab.c:3806 ax25_rt_add net/ax25/ax25_route.c:92 [inline] ax25_rt_ioctl+0x304/0x1270 net/ax25/ax25_route.c:233 ax25_ioctl+0x322/0x10b0 net/ax25/af_ax25.c:1763 sock_do_ioctl+0xe2/0x400 net/socket.c:950 sock_ioctl+0x32f/0x6c0 net/socket.c:1074 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:509 [inline] do_vfs_ioctl+0x107b/0x17d0 fs/ioctl.c:696 ksys_ioctl+0xab/0xd0 fs/ioctl.c:713 __do_sys_ioctl fs/ioctl.c:720 [inline] __se_sys_ioctl fs/ioctl.c:718 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:718 do_syscall_64+0x1a3/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff888066641a80 which belongs to the cache kmalloc-96 of size 96 The buggy address is located 0 bytes inside of 96-byte region [ffff888066641a80, ffff888066641ae0) The buggy address belongs to the page: page:ffffea0001999040 count:1 mapcount:0 mapping:ffff88812c3f04c0 index:0x0 flags: 0x1fffc0000000200(slab) ax25_connect(): syz-executor4 uses autobind, please contact jreuter@yaina.de raw: 01fffc0000000200 ffffea0001817948 ffffea0002341dc8 ffff88812c3f04c0 raw: 0000000000000000 ffff888066641000 0000000100000020 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888066641980: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ffff888066641a00: 00 00 00 00 00 00 00 00 02 fc fc fc fc fc fc fc >ffff888066641a80: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ^ ffff888066641b00: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ffff888066641b80: 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc fc Signed-off-by: Eric Dumazet Cc: Ralf Baechle Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ax25.h | 12 ++++++++++++ net/ax25/ax25_ip.c | 4 ++-- net/ax25/ax25_route.c | 19 ++++++++----------- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/include/net/ax25.h b/include/net/ax25.h index 76fb39c272a7..e667bca42ca4 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -200,6 +200,18 @@ static inline void ax25_hold_route(ax25_route *ax25_rt) void __ax25_put_route(ax25_route *ax25_rt); +extern rwlock_t ax25_route_lock; + +static inline void ax25_route_lock_use(void) +{ + read_lock(&ax25_route_lock); +} + +static inline void ax25_route_lock_unuse(void) +{ + read_unlock(&ax25_route_lock); +} + static inline void ax25_put_route(ax25_route *ax25_rt) { if (refcount_dec_and_test(&ax25_rt->refcount)) diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index 183b1c583d56..dd526c91363d 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -114,6 +114,7 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) dst = (ax25_address *)(bp + 1); src = (ax25_address *)(bp + 8); + ax25_route_lock_use(); route = ax25_get_route(dst, NULL); if (route) { digipeat = route->digipeat; @@ -206,9 +207,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) ax25_queue_xmit(skb, dev); put: - if (route) - ax25_put_route(route); + ax25_route_lock_unuse(); return NETDEV_TX_OK; } diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 0446b892618a..7f369f1db7ae 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -40,7 +40,7 @@ #include static ax25_route *ax25_route_list; -static DEFINE_RWLOCK(ax25_route_lock); +DEFINE_RWLOCK(ax25_route_lock); void ax25_rt_device_down(struct net_device *dev) { @@ -349,6 +349,7 @@ const struct file_operations ax25_route_fops = { * Find AX.25 route * * Only routes with a reference count of zero can be destroyed. + * Must be called with ax25_route_lock read locked. */ ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) { @@ -356,7 +357,6 @@ ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) ax25_route *ax25_def_rt = NULL; ax25_route *ax25_rt; - read_lock(&ax25_route_lock); /* * Bind to the physical interface we heard them on, or the default * route if none is found; @@ -379,11 +379,6 @@ ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) if (ax25_spe_rt != NULL) ax25_rt = ax25_spe_rt; - if (ax25_rt != NULL) - ax25_hold_route(ax25_rt); - - read_unlock(&ax25_route_lock); - return ax25_rt; } @@ -414,9 +409,12 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) ax25_route *ax25_rt; int err = 0; - if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL) + ax25_route_lock_use(); + ax25_rt = ax25_get_route(addr, NULL); + if (!ax25_rt) { + ax25_route_lock_unuse(); return -EHOSTUNREACH; - + } if ((ax25->ax25_dev = ax25_dev_ax25dev(ax25_rt->dev)) == NULL) { err = -EHOSTUNREACH; goto put; @@ -451,8 +449,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) } put: - ax25_put_route(ax25_rt); - + ax25_route_lock_unuse(); return err; } From c793fa334c7b5b685957720ef0842e7cddab62a6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 23 Feb 2019 09:06:44 +0100 Subject: [PATCH 1979/2608] Linux 4.14.103 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 837059a07bb3..52d150b1f5ef 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 102 +SUBLEVEL = 103 EXTRAVERSION = NAME = Petit Gorille From d783d3ceb4005584751ca01f71004f1fb4c30f8f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 5 Feb 2019 16:37:40 +0100 Subject: [PATCH 1980/2608] ARM: 8834/1: Fix: kprobes: optimized kprobes illegal instruction commit 0ac569bf6a7983c0c5747d6df8db9dc05bc92b6c upstream. commit e46daee53bb5 ("ARM: 8806/1: kprobes: Fix false positive with FORTIFY_SOURCE") introduced a regression in optimized kprobes. It triggers "invalid instruction" oopses when using kprobes instrumentation through lttng and perf. This commit was introduced in kernel v4.20, and has been backported to stable kernels 4.19 and 4.14. This crash was also reported by Hongzhi Song on the redhat bugzilla where the patch was originally introduced. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1639397 Link: https://bugs.lttng.org/issues/1174 Link: https://lore.kernel.org/lkml/342740659.2887.1549307721609.JavaMail.zimbra@efficios.com Fixes: e46daee53bb5 ("ARM: 8806/1: kprobes: Fix false positive with FORTIFY_SOURCE") Signed-off-by: Mathieu Desnoyers Reported-by: Robert Berger Tested-by: Robert Berger Acked-by: Kees Cook Cc: Robert Berger Cc: Masami Hiramatsu Cc: William Cohen Cc: Laura Abbott Cc: Kees Cook Cc: # v4.14+ Cc: linux-arm-kernel@lists.infradead.org Cc: patches@armlinux.org.uk Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/probes/kprobes/opt-arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index 2c118a6ab358..0dc23fc227ed 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -247,7 +247,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or } /* Copy arch-dep-instance from template. */ - memcpy(code, (unsigned char *)optprobe_template_entry, + memcpy(code, (unsigned long *)&optprobe_template_entry, TMPL_END_IDX * sizeof(kprobe_opcode_t)); /* Adjust buffer according to instruction. */ From ee6e4d34b4d9b5da00ac502e68e4d2ac04c5c1f3 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 14 Feb 2019 15:29:50 +0000 Subject: [PATCH 1981/2608] tracing: Fix number of entries in trace header commit 9e7382153f80ba45a0bbcd540fb77d4b15f6e966 upstream. The following commit 441dae8f2f29 ("tracing: Add support for display of tgid in trace output") removed the call to print_event_info() from print_func_help_header_irq() which results in the ftrace header not reporting the number of entries written in the buffer. As this wasn't the original intent of the patch, re-introduce the call to print_event_info() to restore the orginal behaviour. Link: http://lkml.kernel.org/r/20190214152950.4179-1-quentin.perret@arm.com Acked-by: Joel Fernandes Cc: stable@vger.kernel.org Fixes: 441dae8f2f29 ("tracing: Add support for display of tgid in trace output") Signed-off-by: Quentin Perret Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e9cbb96cd99e..bd6e6142473f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3381,6 +3381,8 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file const char tgid_space[] = " "; const char space[] = " "; + print_event_info(buf, m); + seq_printf(m, "# %s _-----=> irqs-off\n", tgid ? tgid_space : space); seq_printf(m, "# %s / _----=> need-resched\n", From b3f5fbb23264a0a7cebbcd1b24c4b494b3229fe7 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 15 Feb 2019 20:14:15 +0000 Subject: [PATCH 1982/2608] MIPS: eBPF: Always return sign extended 32b values commit 13443154f6cac61d148471ede6d7f1f6b5ea946a upstream. The function prototype used to call JITed eBPF code (ie. the type of the struct bpf_prog bpf_func field) returns an unsigned int. The MIPS n64 ABI that MIPS64 kernels target defines that 32 bit integers should always be sign extended when passed in registers as either arguments or return values. This means that when returning any value which may not already be sign extended (ie. of type REG_64BIT or REG_32BIT_ZERO_EX) we need to perform that sign extension in order to comply with the n64 ABI. Without this we see strange looking test failures from test_bpf.ko, such as: test_bpf: #65 ALU64_MOV_X: dst = 4294967295 jited:1 ret -1 != -1 FAIL (1 times) Although the return value printed matches the expected value, this is only because printf is only examining the least significant 32 bits of the 64 bit register value we returned. The register holding the expected value is sign extended whilst the v0 register was set to a zero extended value by our JITed code, so when compared by a conditional branch instruction the values are not equal. We already handle this when the return value register is of type REG_32BIT_ZERO_EX, so simply extend this to also cover REG_64BIT. Signed-off-by: Paul Burton Fixes: b6bd53f9c4e8 ("MIPS: Add missing file for eBPF JIT.") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- arch/mips/net/ebpf_jit.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 962b0259b4b6..dd537cba4449 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -348,12 +348,15 @@ static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg) const struct bpf_prog *prog = ctx->skf; int stack_adjust = ctx->stack_size; int store_offset = stack_adjust - 8; + enum reg_val_type td; int r0 = MIPS_R_V0; - if (dest_reg == MIPS_R_RA && - get_reg_val_type(ctx, prog->len, BPF_REG_0) == REG_32BIT_ZERO_EX) + if (dest_reg == MIPS_R_RA) { /* Don't let zero extended value escape. */ - emit_instr(ctx, sll, r0, r0, 0); + td = get_reg_val_type(ctx, prog->len, BPF_REG_0); + if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) + emit_instr(ctx, sll, r0, r0, 0); + } if (ctx->flags & EBPF_SAVE_RA) { emit_instr(ctx, ld, MIPS_R_RA, store_offset, MIPS_R_SP); From 5f9d7b24e6fae276260c6dafc3c7d753c766a947 Mon Sep 17 00:00:00 2001 From: Rakesh Pillai Date: Fri, 15 Feb 2019 14:16:02 +0530 Subject: [PATCH 1983/2608] mac80211: Restore vif beacon interval if start ap fails commit 83e37e0bdd1470bbe6612250b745ad39b1a7b130 upstream. The starting of AP interface can fail due to invalid beacon interval, which does not match the minimum gcd requirement set by the wifi driver. In such case, the beacon interval of that interface gets updated with that invalid beacon interval. The next time that interface is brought up in AP mode, an interface combination check is performed and the beacon interval is taken from the previously set value. In a case where an invalid beacon interval, i.e. a beacon interval value which does not satisfy the minimum gcd criteria set by the driver, is set, all the subsequent trials to bring that interface in AP mode will fail, even if the subsequent trials have a valid beacon interval. To avoid this, in case of a failure in bringing up an interface in AP mode due to interface combination error, the interface beacon interval which is stored in bss conf, needs to be restored with the last working value of beacon interval. Tested on ath10k using WCN3990. Cc: stable@vger.kernel.org Fixes: 0c317a02ca98 ("cfg80211: support virtual interfaces with different beacon intervals") Signed-off-by: Rakesh Pillai Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/cfg.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 63558335e41e..ebc8045ddee6 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -884,6 +884,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, BSS_CHANGED_P2P_PS | BSS_CHANGED_TXPOWER; int err; + int prev_beacon_int; old = sdata_dereference(sdata->u.ap.beacon, sdata); if (old) @@ -906,6 +907,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->needed_rx_chains = sdata->local->rx_chains; + prev_beacon_int = sdata->vif.bss_conf.beacon_int; sdata->vif.bss_conf.beacon_int = params->beacon_interval; mutex_lock(&local->mtx); @@ -914,8 +916,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (!err) ieee80211_vif_copy_chanctx_to_vlans(sdata, false); mutex_unlock(&local->mtx); - if (err) + if (err) { + sdata->vif.bss_conf.beacon_int = prev_beacon_int; return err; + } /* * Apply control port protocol, this allows us to From 62bb080fa345122d14eea764e577bc29458ffdb3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 14 Feb 2019 22:03:25 +0800 Subject: [PATCH 1984/2608] mac80211: Free mpath object when rhashtable insertion fails commit 4ff3a9d14c6c06eaa4e5976c61599ea2bd9e81b2 upstream. When rhashtable insertion fails the mesh table code doesn't free the now-orphan mesh path object. This patch fixes that. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mesh_pathtbl.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 97269caafecd..1ce068865629 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -448,17 +448,15 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata, } while (unlikely(ret == -EEXIST && !mpath)); - if (ret && ret != -EEXIST) - return ERR_PTR(ret); - - /* At this point either new_mpath was added, or we found a - * matching entry already in the table; in the latter case - * free the unnecessary new entry. - */ - if (ret == -EEXIST) { + if (ret) { kfree(new_mpath); + + if (ret != -EEXIST) + return ERR_PTR(ret); + new_mpath = mpath; } + sdata->u.mesh.mesh_paths_generation++; return new_mpath; } @@ -488,6 +486,9 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata, &new_mpath->rhash, mesh_rht_params); + if (ret) + kfree(new_mpath); + sdata->u.mesh.mpp_paths_generation++; return ret; } From 41b824a63da3af6c9f0abf51c830df6ced8e098d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 5 Feb 2019 20:30:27 +0100 Subject: [PATCH 1985/2608] libceph: handle an empty authorize reply commit 0fd3fd0a9bb0b02b6435bb7070e9f7b82a23f068 upstream. The authorize reply can be empty, for example when the ticket used to build the authorizer is too old and TAG_BADAUTHORIZER is returned from the service. Calling ->verify_authorizer_reply() results in an attempt to decrypt and validate (somewhat) random data in au->buf (most likely the signature block from calc_signature()), which fails and ends up in con_fault_finish() with !con->auth_retry. The ticket isn't invalidated and the connection is retried again and again until a new ticket is obtained from the monitor: libceph: osd2 192.168.122.1:6809 bad authorize reply libceph: osd2 192.168.122.1:6809 bad authorize reply libceph: osd2 192.168.122.1:6809 bad authorize reply libceph: osd2 192.168.122.1:6809 bad authorize reply Let TAG_BADAUTHORIZER handler kick in and increment con->auth_retry. Cc: stable@vger.kernel.org Fixes: 5c056fdc5b47 ("libceph: verify authorize reply on connect") Link: https://tracker.ceph.com/issues/20164 Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/messenger.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5fd222dc64b3..081a41c75341 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2057,6 +2057,8 @@ static int process_connect(struct ceph_connection *con) dout("process_connect on %p tag %d\n", con, (int)con->in_tag); if (con->auth) { + int len = le32_to_cpu(con->in_reply.authorizer_len); + /* * Any connection that defines ->get_authorizer() * should also define ->add_authorizer_challenge() and @@ -2066,8 +2068,7 @@ static int process_connect(struct ceph_connection *con) */ if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) { ret = con->ops->add_authorizer_challenge( - con, con->auth->authorizer_reply_buf, - le32_to_cpu(con->in_reply.authorizer_len)); + con, con->auth->authorizer_reply_buf, len); if (ret < 0) return ret; @@ -2077,10 +2078,12 @@ static int process_connect(struct ceph_connection *con) return 0; } - ret = con->ops->verify_authorizer_reply(con); - if (ret < 0) { - con->error_msg = "bad authorize reply"; - return ret; + if (len) { + ret = con->ops->verify_authorizer_reply(con); + if (ret < 0) { + con->error_msg = "bad authorize reply"; + return ret; + } } } From eddbdfe4a2c4db0760f5d134e9f97c6bdcc975b6 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 11 Feb 2019 15:18:52 +0800 Subject: [PATCH 1986/2608] ceph: avoid repeatedly adding inode to mdsc->snap_flush_list commit 04242ff3ac0abbaa4362f97781dac268e6c3541a upstream. Otherwise, mdsc->snap_flush_list may get corrupted. Cc: stable@vger.kernel.org Signed-off-by: "Yan, Zheng" Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/snap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 8a2ca41e4b97..9b6207c84b68 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -616,7 +616,8 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, capsnap->size); spin_lock(&mdsc->snap_flush_lock); - list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); + if (list_empty(&ci->i_snap_flush_item)) + list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); spin_unlock(&mdsc->snap_flush_lock); return 1; /* caller may want to ceph_flush_snaps */ } From 3af9907c2a544d3d3a55f68cb75018a98b4c1899 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Wed, 20 Feb 2019 22:18:58 -0800 Subject: [PATCH 1987/2608] numa: change get_mempolicy() to use nr_node_ids instead of MAX_NUMNODES commit 050c17f239fd53adb55aa768d4f41bc76c0fe045 upstream. The system call, get_mempolicy() [1], passes an unsigned long *nodemask pointer and an unsigned long maxnode argument which specifies the length of the user's nodemask array in bits (which is rounded up). The manual page says that if the maxnode value is too small, get_mempolicy will return EINVAL but there is no system call to return this minimum value. To determine this value, some programs search /proc//status for a line starting with "Mems_allowed:" and use the number of digits in the mask to determine the minimum value. A recent change to the way this line is formatted [2] causes these programs to compute a value less than MAX_NUMNODES so get_mempolicy() returns EINVAL. Change get_mempolicy(), the older compat version of get_mempolicy(), and the copy_nodes_to_user() function to use nr_node_ids instead of MAX_NUMNODES, thus preserving the defacto method of computing the minimum size for the nodemask array and the maxnode argument. [1] http://man7.org/linux/man-pages/man2/get_mempolicy.2.html [2] https://lore.kernel.org/lkml/1545405631-6808-1-git-send-email-longman@redhat.com Link: http://lkml.kernel.org/r/20190211180245.22295-1-rcampbell@nvidia.com Fixes: 4fb8e5b89bcbbbb ("include/linux/nodemask.h: use nr_node_ids (not MAX_NUMNODES) in __nodemask_pr_numnodes()") Signed-off-by: Ralph Campbell Suggested-by: Alexander Duyck Cc: Waiman Long Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 1b93535d875f..1331645a3794 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1325,7 +1325,7 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode, nodemask_t *nodes) { unsigned long copy = ALIGN(maxnode-1, 64) / 8; - const int nbytes = BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long); + unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long); if (copy > nbytes) { if (copy > PAGE_SIZE) @@ -1486,7 +1486,7 @@ SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, int uninitialized_var(pval); nodemask_t nodes; - if (nmask != NULL && maxnode < MAX_NUMNODES) + if (nmask != NULL && maxnode < nr_node_ids) return -EINVAL; err = do_get_mempolicy(&pval, &nodes, addr, flags); @@ -1515,7 +1515,7 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, unsigned long nr_bits, alloc_size; DECLARE_BITMAP(bm, MAX_NUMNODES); - nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES); + nr_bits = min_t(unsigned long, maxnode-1, nr_node_ids); alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8; if (nmask) From e69eb7e8a0c70f2316228aefa3cbf87a190d8fd0 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 20 Feb 2019 22:19:42 -0800 Subject: [PATCH 1988/2608] proc, oom: do not report alien mms when setting oom_score_adj commit b2b469939e93458753cfbf8282ad52636495965e upstream. Tetsuo has reported that creating a thousands of processes sharing MM without SIGHAND (aka alien threads) and setting /proc//oom_score_adj will swamp the kernel log and takes ages [1] to finish. This is especially worrisome that all that printing is done under RCU lock and this can potentially trigger RCU stall or softlockup detector. The primary reason for the printk was to catch potential users who might depend on the behavior prior to 44a70adec910 ("mm, oom_adj: make sure processes sharing mm have same view of oom_score_adj") but after more than 2 years without a single report I guess it is safe to simply remove the printk altogether. The next step should be moving oom_score_adj over to the mm struct and remove all the tasks crawling as suggested by [2] [1] http://lkml.kernel.org/r/97fce864-6f75-bca5-14bc-12c9f890e740@i-love.sakura.ne.jp [2] http://lkml.kernel.org/r/20190117155159.GA4087@dhcp22.suse.cz Link: http://lkml.kernel.org/r/20190212102129.26288-1-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Tetsuo Handa Acked-by: Johannes Weiner Cc: David Rientjes Cc: Yong-Taek Lee Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/base.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 9063738ff1f0..64695dcf89f3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1108,10 +1108,6 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) task_lock(p); if (!p->vfork_done && process_shares_mm(p, mm)) { - pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n", - task_pid_nr(p), p->comm, - p->signal->oom_score_adj, oom_adj, - task_pid_nr(task), task->comm); p->signal->oom_score_adj = oom_adj; if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) p->signal->oom_score_adj_min = (short)oom_adj; From fe303ba7ab936c0f5dedf931fdef8fd419053336 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 14 Feb 2019 16:20:01 +0000 Subject: [PATCH 1989/2608] KEYS: allow reaching the keys quotas exactly commit a08bf91ce28ed3ae7b6fef35d843fef8dc8c2cd9 upstream. If the sysctl 'kernel.keys.maxkeys' is set to some number n, then actually users can only add up to 'n - 1' keys. Likewise for 'kernel.keys.maxbytes' and the root_* versions of these sysctls. But these sysctls are apparently supposed to be *maximums*, as per their names and all documentation I could find -- the keyrings(7) man page, Documentation/security/keys/core.rst, and all the mentions of EDQUOT meaning that the key quota was *exceeded* (as opposed to reached). Thus, fix the code to allow reaching the quotas exactly. Fixes: 0b77f5bfb45c ("keys: make the keyring quotas controllable through /proc/sys") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- security/keys/key.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/keys/key.c b/security/keys/key.c index 83bf4b4afd49..87172f99f73e 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -265,8 +265,8 @@ struct key *key_alloc(struct key_type *type, const char *desc, spin_lock(&user->lock); if (!(flags & KEY_ALLOC_QUOTA_OVERRUN)) { - if (user->qnkeys + 1 >= maxkeys || - user->qnbytes + quotalen >= maxbytes || + if (user->qnkeys + 1 > maxkeys || + user->qnbytes + quotalen > maxbytes || user->qnbytes + quotalen < user->qnbytes) goto no_quota; } From 368a5c8beed054f8069de5f480b99e8f1008ce1f Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Mon, 3 Dec 2018 13:31:17 +0530 Subject: [PATCH 1990/2608] mfd: ti_am335x_tscadc: Use PLATFORM_DEVID_AUTO while registering mfd cells [ Upstream commit b40ee006fe6a8a25093434e5d394128c356a48f3 ] Use PLATFORM_DEVID_AUTO to number mfd cells while registering, so that different instances are uniquely identified. This is required in order to support registering of multiple instances of same ti_am335x_tscadc IP. Signed-off-by: Vignesh R Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/ti_am335x_tscadc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c index 7dc1cbcd2fb8..5894d6c16fab 100644 --- a/drivers/mfd/ti_am335x_tscadc.c +++ b/drivers/mfd/ti_am335x_tscadc.c @@ -265,8 +265,9 @@ static int ti_tscadc_probe(struct platform_device *pdev) cell->pdata_size = sizeof(tscadc); } - err = mfd_add_devices(&pdev->dev, pdev->id, tscadc->cells, - tscadc->used_cells, NULL, 0, NULL); + err = mfd_add_devices(&pdev->dev, PLATFORM_DEVID_AUTO, + tscadc->cells, tscadc->used_cells, NULL, + 0, NULL); if (err < 0) goto err_disable_clk; From 46625b8a4687e321394643609291ebe6637a30e1 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 21 Dec 2018 15:06:33 -0800 Subject: [PATCH 1991/2608] pvcalls-back: set -ENOTCONN in pvcalls_conn_back_read [ Upstream commit e6587cdbd732eacb4c7ce592ed46f7bbcefb655f ] When a connection is closing we receive on pvcalls_sk_state_change notification. Instead of setting the connection as closed immediately (-ENOTCONN), let's read one more time from it: pvcalls_conn_back_read will set the connection as closed when necessary. That way, we avoid races between pvcalls_sk_state_change and pvcalls_back_ioworker. Signed-off-by: Stefano Stabellini Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin --- drivers/xen/pvcalls-back.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 169293c25a91..7185ebb63bd5 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -164,9 +164,10 @@ static void pvcalls_conn_back_read(void *opaque) /* write the data, then modify the indexes */ virt_wmb(); - if (ret < 0) + if (ret < 0) { + atomic_set(&map->read, 0); intf->in_error = ret; - else + } else intf->in_prod = prod + ret; /* update the indexes, then notify the other end */ virt_wmb(); @@ -296,7 +297,7 @@ static void pvcalls_sk_state_change(struct sock *sock) return; intf = map->ring; - intf->in_error = -ENOTCONN; + atomic_inc(&map->read); notify_remote_via_irq(map->irq); } From 8a195375096d2ce059b29f7c15a63204ebc6866a Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 17 Oct 2018 10:13:23 -0700 Subject: [PATCH 1992/2608] mfd: twl-core: Fix section annotations on {,un}protect_pm_master [ Upstream commit 8838555089f0345b87f4277fe5a8dd647dc65589 ] When building the kernel with Clang, the following section mismatch warning appears: WARNING: vmlinux.o(.text+0x3d84a3b): Section mismatch in reference from the function twl_probe() to the function .init.text:unprotect_pm_master() The function twl_probe() references the function __init unprotect_pm_master(). This is often because twl_probe lacks a __init annotation or the annotation of unprotect_pm_master is wrong. Remove the __init annotation on the *protect_pm_master functions so there is no more mismatch. Signed-off-by: Nathan Chancellor Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/twl-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c index d3133a371e27..8f993272901d 100644 --- a/drivers/mfd/twl-core.c +++ b/drivers/mfd/twl-core.c @@ -979,7 +979,7 @@ add_children(struct twl4030_platform_data *pdata, unsigned irq_base, * letting it generate the right frequencies for USB, MADC, and * other purposes. */ -static inline int __init protect_pm_master(void) +static inline int protect_pm_master(void) { int e = 0; @@ -988,7 +988,7 @@ static inline int __init protect_pm_master(void) return e; } -static inline int __init unprotect_pm_master(void) +static inline int unprotect_pm_master(void) { int e = 0; From 9856cd1a04dc15f8199147caf3f7e8ba2864cb15 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 17 Oct 2018 17:56:28 -0700 Subject: [PATCH 1993/2608] mfd: db8500-prcmu: Fix some section annotations [ Upstream commit a3888f62fe66429fad3be7f2ba962e1e08c26fd6 ] When building the kernel with Clang, the following section mismatch warnings appear: WARNING: vmlinux.o(.text+0x7239cc): Section mismatch in reference from the function db8500_prcmu_probe() to the function .init.text:init_prcm_registers() The function db8500_prcmu_probe() references the function __init init_prcm_registers(). This is often because db8500_prcmu_probe lacks a __init annotation or the annotation of init_prcm_registers is wrong. WARNING: vmlinux.o(.text+0x723e28): Section mismatch in reference from the function db8500_prcmu_probe() to the function .init.text:fw_project_name() The function db8500_prcmu_probe() references the function __init fw_project_name(). This is often because db8500_prcmu_probe lacks a __init annotation or the annotation of fw_project_name is wrong. db8500_prcmu_probe should not be marked as __init so remove the __init annotation from fw_project_name and init_prcm_registers. Signed-off-by: Nathan Chancellor Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/db8500-prcmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c index 5970b8def548..aec20e1c7d3d 100644 --- a/drivers/mfd/db8500-prcmu.c +++ b/drivers/mfd/db8500-prcmu.c @@ -2584,7 +2584,7 @@ static struct irq_chip prcmu_irq_chip = { .irq_unmask = prcmu_irq_unmask, }; -static __init char *fw_project_name(u32 project) +static char *fw_project_name(u32 project) { switch (project) { case PRCMU_FW_PROJECT_U8500: @@ -2732,7 +2732,7 @@ void __init db8500_prcmu_early_init(u32 phy_base, u32 size) INIT_WORK(&mb0_transfer.mask_work, prcmu_mask_work); } -static void __init init_prcm_registers(void) +static void init_prcm_registers(void) { u32 val; From bf0abe19df03fe398c7bc4fec954469d9ab467f0 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Mon, 22 Oct 2018 10:55:06 +0800 Subject: [PATCH 1994/2608] mfd: mt6397: Do not call irq_domain_remove if PMIC unsupported [ Upstream commit a177276aa098aa47a100d51a13eaaef029604b6d ] If the PMIC ID is unknown, the current code would call irq_domain_remove and panic, as pmic->irq_domain is only initialized by mt6397_irq_init. Return immediately with an error, if the chip ID is unsupported. Signed-off-by: Nicolas Boichat Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/mt6397-core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mfd/mt6397-core.c b/drivers/mfd/mt6397-core.c index 04a601f6aebe..0afadea996bb 100644 --- a/drivers/mfd/mt6397-core.c +++ b/drivers/mfd/mt6397-core.c @@ -309,8 +309,7 @@ static int mt6397_probe(struct platform_device *pdev) default: dev_err(&pdev->dev, "unsupported chip: %d\n", id); - ret = -ENODEV; - break; + return -ENODEV; } if (ret) { From c8f1d61766adeb1839eb39b04ddde5299bfc101a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 25 Oct 2018 15:43:44 +0300 Subject: [PATCH 1995/2608] mfd: ab8500-core: Return zero in get_register_interruptible() [ Upstream commit 10628e3ecf544fa2e4e24f8e112d95c37884dc98 ] This function is supposed to return zero on success or negative error codes on error. Unfortunately, there is a bug so it sometimes returns non-zero, positive numbers on success. I noticed this bug during review and I can't test it. It does appear that the return is sometimes propogated back to _regmap_read() where all non-zero returns are treated as failure so this may affect run time. Fixes: 47c1697508f2 ("mfd: Align ab8500 with the abx500 interface") Signed-off-by: Dan Carpenter Reviewed-by: Linus Walleij Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/ab8500-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index 30d09d177171..11ab17f64c64 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -261,7 +261,7 @@ static int get_register_interruptible(struct ab8500 *ab8500, u8 bank, mutex_unlock(&ab8500->lock); dev_vdbg(ab8500->dev, "rd: addr %#x => data %#x\n", addr, ret); - return ret; + return (ret < 0) ? ret : 0; } static int ab8500_get_register(struct device *dev, u8 bank, From 8949511dfaa0f4314402f632091e6add32d76d31 Mon Sep 17 00:00:00 2001 From: Dien Pham Date: Wed, 3 Oct 2018 15:58:41 +0200 Subject: [PATCH 1996/2608] mfd: bd9571mwv: Add volatile register to make DVFS work [ Upstream commit b0aff01e7aa6ad2d6998ef1323843212d1db8b04 ] Because BD9571MWV_DVFS_MONIVDAC is not defined in the volatile table, the physical register value is not updated by regmap and DVFS doesn't work as expected. Fix it! Fixes: d3ea21272094 ("mfd: Add ROHM BD9571MWV-M MFD PMIC driver") Signed-off-by: Dien Pham [wsa: rebase, add 'Fixes', reword commit message] Signed-off-by: Wolfram Sang Reviewed-by: Marek Vasut Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/bd9571mwv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/bd9571mwv.c b/drivers/mfd/bd9571mwv.c index 64e088dfe7b0..98192d4863e4 100644 --- a/drivers/mfd/bd9571mwv.c +++ b/drivers/mfd/bd9571mwv.c @@ -57,6 +57,7 @@ static const struct regmap_access_table bd9571mwv_writable_table = { }; static const struct regmap_range bd9571mwv_volatile_yes_ranges[] = { + regmap_reg_range(BD9571MWV_DVFS_MONIVDAC, BD9571MWV_DVFS_MONIVDAC), regmap_reg_range(BD9571MWV_GPIO_IN, BD9571MWV_GPIO_IN), regmap_reg_range(BD9571MWV_GPIO_INT, BD9571MWV_GPIO_INT), regmap_reg_range(BD9571MWV_INT_INTREQ, BD9571MWV_INT_INTREQ), From 74c14405e96dcdbebf07aa57f5737a24cb666271 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Mon, 19 Nov 2018 14:53:17 -0500 Subject: [PATCH 1997/2608] mfd: qcom_rpm: write fw_version to CTRL_REG [ Upstream commit 504e4175829c44328773b96ad9c538e4783a8d22 ] This is required as part of the initialization sequence on certain SoCs. If these registers are not initialized, the hardware can be unresponsive. This fixes the driver on apq8060 (HP TouchPad device). Signed-off-by: Jonathan Marek Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/qcom_rpm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mfd/qcom_rpm.c b/drivers/mfd/qcom_rpm.c index 52fafea06067..8d420c37b2a6 100644 --- a/drivers/mfd/qcom_rpm.c +++ b/drivers/mfd/qcom_rpm.c @@ -638,6 +638,10 @@ static int qcom_rpm_probe(struct platform_device *pdev) return -EFAULT; } + writel(fw_version[0], RPM_CTRL_REG(rpm, 0)); + writel(fw_version[1], RPM_CTRL_REG(rpm, 1)); + writel(fw_version[2], RPM_CTRL_REG(rpm, 2)); + dev_info(&pdev->dev, "RPM firmware %u.%u.%u\n", fw_version[0], fw_version[1], fw_version[2]); From 3c324254fce40ad0ab1c30a9d0be1ed7c6110afa Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 28 Nov 2018 10:04:22 +0000 Subject: [PATCH 1998/2608] mfd: wm5110: Add missing ASRC rate register [ Upstream commit 04c801c18ded421845324255e660147a6f58dcd6 ] Signed-off-by: Charles Keepax Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/wm5110-tables.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c index 1ee68bd440fb..16c6e2accfaa 100644 --- a/drivers/mfd/wm5110-tables.c +++ b/drivers/mfd/wm5110-tables.c @@ -1618,6 +1618,7 @@ static const struct reg_default wm5110_reg_default[] = { { 0x00000ECD, 0x0000 }, /* R3789 - HPLPF4_2 */ { 0x00000EE0, 0x0000 }, /* R3808 - ASRC_ENABLE */ { 0x00000EE2, 0x0000 }, /* R3810 - ASRC_RATE1 */ + { 0x00000EE3, 0x4000 }, /* R3811 - ASRC_RATE2 */ { 0x00000EF0, 0x0000 }, /* R3824 - ISRC 1 CTRL 1 */ { 0x00000EF1, 0x0000 }, /* R3825 - ISRC 1 CTRL 2 */ { 0x00000EF2, 0x0000 }, /* R3826 - ISRC 1 CTRL 3 */ @@ -2869,6 +2870,7 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg) case ARIZONA_ASRC_ENABLE: case ARIZONA_ASRC_STATUS: case ARIZONA_ASRC_RATE1: + case ARIZONA_ASRC_RATE2: case ARIZONA_ISRC_1_CTRL_1: case ARIZONA_ISRC_1_CTRL_2: case ARIZONA_ISRC_1_CTRL_3: From b0d7f5c733ca24aa549a54f78333102b4d197a24 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Sun, 9 Dec 2018 19:29:31 +0530 Subject: [PATCH 1999/2608] mfd: tps65218: Use devm_regmap_add_irq_chip and clean up error path in probe() [ Upstream commit 75d4c5e03c2ae9902ab521024b10291f6fc9515b ] Use devm_regmap_add_irq_chip and clean up error path in probe and also the remove function. Reported-by: Christian Hohnstaedt Signed-off-by: Keerthy Reviewed-by: Sebastian Reichel Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/tps65218.c | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/drivers/mfd/tps65218.c b/drivers/mfd/tps65218.c index 13834a0d2817..612f5ecda78f 100644 --- a/drivers/mfd/tps65218.c +++ b/drivers/mfd/tps65218.c @@ -243,9 +243,9 @@ static int tps65218_probe(struct i2c_client *client, mutex_init(&tps->tps_lock); - ret = regmap_add_irq_chip(tps->regmap, tps->irq, - IRQF_ONESHOT, 0, &tps65218_irq_chip, - &tps->irq_data); + ret = devm_regmap_add_irq_chip(&client->dev, tps->regmap, tps->irq, + IRQF_ONESHOT, 0, &tps65218_irq_chip, + &tps->irq_data); if (ret < 0) return ret; @@ -261,26 +261,9 @@ static int tps65218_probe(struct i2c_client *client, ARRAY_SIZE(tps65218_cells), NULL, 0, regmap_irq_get_domain(tps->irq_data)); - if (ret < 0) - goto err_irq; - - return 0; - -err_irq: - regmap_del_irq_chip(tps->irq, tps->irq_data); - return ret; } -static int tps65218_remove(struct i2c_client *client) -{ - struct tps65218 *tps = i2c_get_clientdata(client); - - regmap_del_irq_chip(tps->irq, tps->irq_data); - - return 0; -} - static const struct i2c_device_id tps65218_id_table[] = { { "tps65218", TPS65218 }, { }, @@ -293,7 +276,6 @@ static struct i2c_driver tps65218_driver = { .of_match_table = of_tps65218_match_table, }, .probe = tps65218_probe, - .remove = tps65218_remove, .id_table = tps65218_id_table, }; From 1f1408d9088c3bb2ce9c4ca066f24b3e147ba14e Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 20 Dec 2018 15:12:11 -0600 Subject: [PATCH 2000/2608] mfd: mc13xxx: Fix a missing check of a register-read failure [ Upstream commit 9e28989d41c0eab57ec0bb156617a8757406ff8a ] When mc13xxx_reg_read() fails, "old_adc0" is uninitialized and will contain random value. Further execution uses "old_adc0" even when mc13xxx_reg_read() fails. The fix checks the return value of mc13xxx_reg_read(), and exits the execution when it fails. Signed-off-by: Kangjie Lu Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/mc13xxx-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c index d7f54e492aa6..6c16f170529f 100644 --- a/drivers/mfd/mc13xxx-core.c +++ b/drivers/mfd/mc13xxx-core.c @@ -274,7 +274,9 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode, mc13xxx->adcflags |= MC13XXX_ADC_WORKING; - mc13xxx_reg_read(mc13xxx, MC13XXX_ADC0, &old_adc0); + ret = mc13xxx_reg_read(mc13xxx, MC13XXX_ADC0, &old_adc0); + if (ret) + goto out; adc0 = MC13XXX_ADC0_ADINC1 | MC13XXX_ADC0_ADINC2; adc1 = MC13XXX_ADC1_ADEN | MC13XXX_ADC1_ADTRIGIGN | MC13XXX_ADC1_ASC; From 3abbc145c7f7dd6f2ba3e89a8e2e691fb16e1f85 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 4 Jan 2019 06:03:40 +0000 Subject: [PATCH 2001/2608] xen/pvcalls: remove set but not used variable 'intf' [ Upstream commit 1f8ce09b36c41a026a37a24b20efa32000892a64 ] Fixes gcc '-Wunused-but-set-variable' warning: drivers/xen/pvcalls-back.c: In function 'pvcalls_sk_state_change': drivers/xen/pvcalls-back.c:286:28: warning: variable 'intf' set but not used [-Wunused-but-set-variable] It not used since e6587cdbd732 ("pvcalls-back: set -ENOTCONN in pvcalls_conn_back_read") Signed-off-by: YueHaibing Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin --- drivers/xen/pvcalls-back.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 7185ebb63bd5..abd6dbc29ac2 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -291,12 +291,10 @@ static int pvcalls_back_socket(struct xenbus_device *dev, static void pvcalls_sk_state_change(struct sock *sock) { struct sock_mapping *map = sock->sk_user_data; - struct pvcalls_data_intf *intf; if (map == NULL) return; - intf = map->ring; atomic_inc(&map->read); notify_remote_via_irq(map->irq); } From a096429cac56f3d4571ae7914f98bd2eb201b5fe Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Thu, 3 Jan 2019 12:02:39 +0200 Subject: [PATCH 2002/2608] qed: Fix qed_chain_set_prod() for PBL chains with non power of 2 page count [ Upstream commit 2d533a9287f2011632977e87ce2783f4c689c984 ] In PBL chains with non power of 2 page count, the producer is not at the beginning of the chain when index is 0 after a wrap. Therefore, after the producer index wrap around, page index should be calculated more carefully. Signed-off-by: Denis Bolotin Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/qed/qed_chain.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 59ddf9af909e..2dd0a9ed5b36 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -663,6 +663,37 @@ out: static inline void qed_chain_set_prod(struct qed_chain *p_chain, u32 prod_idx, void *p_prod_elem) { + if (p_chain->mode == QED_CHAIN_MODE_PBL) { + u32 cur_prod, page_mask, page_cnt, page_diff; + + cur_prod = is_chain_u16(p_chain) ? p_chain->u.chain16.prod_idx : + p_chain->u.chain32.prod_idx; + + /* Assume that number of elements in a page is power of 2 */ + page_mask = ~p_chain->elem_per_page_mask; + + /* Use "cur_prod - 1" and "prod_idx - 1" since producer index + * reaches the first element of next page before the page index + * is incremented. See qed_chain_produce(). + * Index wrap around is not a problem because the difference + * between current and given producer indices is always + * positive and lower than the chain's capacity. + */ + page_diff = (((cur_prod - 1) & page_mask) - + ((prod_idx - 1) & page_mask)) / + p_chain->elem_per_page; + + page_cnt = qed_chain_get_page_cnt(p_chain); + if (is_chain_u16(p_chain)) + p_chain->pbl.c.u16.prod_page_idx = + (p_chain->pbl.c.u16.prod_page_idx - + page_diff + page_cnt) % page_cnt; + else + p_chain->pbl.c.u32.prod_page_idx = + (p_chain->pbl.c.u32.prod_page_idx - + page_diff + page_cnt) % page_cnt; + } + if (is_chain_u16(p_chain)) p_chain->u.chain16.prod_idx = (u16) prod_idx; else From 10b1df1928ab64d5904d07cdb5445621046b02c5 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Thu, 3 Jan 2019 12:02:40 +0200 Subject: [PATCH 2003/2608] qed: Fix qed_ll2_post_rx_buffer_notify_fw() by adding a write memory barrier [ Upstream commit 46721c3d9e273aea880e9ff835b0e1271e1cd2fb ] Make sure chain element is updated before ringing the doorbell. Signed-off-by: Denis Bolotin Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index b73bcbeb5f27..cef619f0ce10 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -1487,6 +1487,10 @@ static void qed_ll2_post_rx_buffer_notify_fw(struct qed_hwfn *p_hwfn, cq_prod = qed_chain_get_prod_idx(&p_rx->rcq_chain); rx_prod.bd_prod = cpu_to_le16(bd_prod); rx_prod.cqe_prod = cpu_to_le16(cq_prod); + + /* Make sure chain element is updated before ringing the doorbell */ + dma_wmb(); + DIRECT_REG_WR(p_rx->set_prod_addr, *((u32 *)&rx_prod)); } From ada3ccb5e9b90961416734c882e3705af2695e35 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Fri, 4 Jan 2019 20:18:11 +0800 Subject: [PATCH 2004/2608] net: hns: Fix use after free identified by SLUB debug [ Upstream commit bb989501abcafa0de5f18b0ec0ec459b5b817908 ] When enable SLUB debug, than remove hns_enet_drv module, SLUB debug will identify a use after free bug: [134.189505] Unable to handle kernel paging request at virtual address 006b6b6b6b6b6b6b [134.197553] Mem abort info: [134.200381] ESR = 0x96000004 [134.203487] Exception class = DABT (current EL), IL = 32 bits [134.209497] SET = 0, FnV = 0 [134.212596] EA = 0, S1PTW = 0 [134.215777] Data abort info: [134.218701] ISV = 0, ISS = 0x00000004 [134.222596] CM = 0, WnR = 0 [134.225606] [006b6b6b6b6b6b6b] address between user and kernel address ranges [134.232851] Internal error: Oops: 96000004 [#1] SMP [134.237798] CPU: 21 PID: 27834 Comm: rmmod Kdump: loaded Tainted: G OE 4.19.5-1.2.34.aarch64 #1 [134.247856] Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.58 10/24/2018 [134.255181] pstate: 20000005 (nzCv daif -PAN -UAO) [134.260044] pc : hns_ae_put_handle+0x38/0x60 [134.264372] lr : hns_ae_put_handle+0x24/0x60 [134.268700] sp : ffff00001be93c50 [134.272054] x29: ffff00001be93c50 x28: ffff802faaec8040 [134.277442] x27: 0000000000000000 x26: 0000000000000000 [134.282830] x25: 0000000056000000 x24: 0000000000000015 [134.288284] x23: ffff0000096fe098 x22: ffff000001050070 [134.293671] x21: ffff801fb3c044a0 x20: ffff80afb75ec098 [134.303287] x19: ffff80afb75ec098 x18: 0000000000000000 [134.312945] x17: 0000000000000000 x16: 0000000000000000 [134.322517] x15: 0000000000000002 x14: 0000000000000000 [134.332030] x13: dead000000000100 x12: ffff7e02bea3c988 [134.341487] x11: ffff80affbee9e68 x10: 0000000000000000 [134.351033] x9 : 6fffff8000008101 x8 : 0000000000000000 [134.360569] x7 : dead000000000100 x6 : ffff000009579748 [134.370059] x5 : 0000000000210d00 x4 : 0000000000000000 [134.379550] x3 : 0000000000000001 x2 : 0000000000000000 [134.388813] x1 : 6b6b6b6b6b6b6b6b x0 : 0000000000000000 [134.397993] Process rmmod (pid: 27834, stack limit = 0x00000000d474b7fd) [134.408498] Call trace: [134.414611] hns_ae_put_handle+0x38/0x60 [134.422208] hnae_put_handle+0xd4/0x108 [134.429563] hns_nic_dev_remove+0x60/0xc0 [hns_enet_drv] [134.438342] platform_drv_remove+0x2c/0x70 [134.445958] device_release_driver_internal+0x174/0x208 [134.454810] driver_detach+0x70/0xd8 [134.461913] bus_remove_driver+0x64/0xe8 [134.469396] driver_unregister+0x34/0x60 [134.476822] platform_driver_unregister+0x20/0x30 [134.485130] hns_nic_dev_driver_exit+0x14/0x6e4 [hns_enet_drv] [134.494634] __arm64_sys_delete_module+0x238/0x290 struct hnae_handle is a member of struct hnae_vf_cb, so when vf_cb is freed, than use hnae_handle will cause use after free panic. This patch frees vf_cb after hnae_handle used. Signed-off-by: Yonglong Liu Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index a185a8be7999..53904f257b19 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -147,12 +147,10 @@ static void hns_ae_put_handle(struct hnae_handle *handle) struct hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle); int i; - vf_cb->mac_cb = NULL; - - kfree(vf_cb); - for (i = 0; i < handle->q_num; i++) hns_ae_get_ring_pair(handle->qs[i])->used_by_vf = 0; + + kfree(vf_cb); } static int hns_ae_wait_flow_down(struct hnae_handle *handle) From 0fe178a747ddbfdf7cf68d03ec9771e01ac95087 Mon Sep 17 00:00:00 2001 From: Alban Bedel Date: Mon, 7 Jan 2019 20:45:15 +0100 Subject: [PATCH 2005/2608] MIPS: ath79: Enable OF serial ports in the default config [ Upstream commit 565dc8a4f55e491935bfb04866068d21784ea9a4 ] CONFIG_SERIAL_OF_PLATFORM is needed to get a working console on the OF boards, enable it in the default config to get a working setup out of the box. Signed-off-by: Alban Bedel Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: Ralf Baechle Cc: James Hogan Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/configs/ath79_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/configs/ath79_defconfig b/arch/mips/configs/ath79_defconfig index 25ed914933e5..8a22978be1e6 100644 --- a/arch/mips/configs/ath79_defconfig +++ b/arch/mips/configs/ath79_defconfig @@ -72,6 +72,7 @@ CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_SERIAL_8250_PCI is not set CONFIG_SERIAL_8250_NR_UARTS=1 CONFIG_SERIAL_8250_RUNTIME_UARTS=1 +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_AR933X=y CONFIG_SERIAL_AR933X_CONSOLE=y # CONFIG_HW_RANDOM is not set From 4c541a5aec7dde179457cdff8b0f50c8d4ace050 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 4 Jan 2019 17:56:16 +0900 Subject: [PATCH 2006/2608] netfilter: nf_tables: fix leaking object reference count [ Upstream commit b91d9036883793122cf6575ca4dfbfbdd201a83d ] There is no code that decreases the reference count of stateful objects in error path of the nft_add_set_elem(). this causes a leak of reference count of stateful objects. Test commands: $nft add table ip filter $nft add counter ip filter c1 $nft add map ip filter m1 { type ipv4_addr : counter \;} $nft add element ip filter m1 { 1 : c1 } $nft add element ip filter m1 { 1 : c1 } $nft delete element ip filter m1 { 1 } $nft delete counter ip filter c1 Result: Error: Could not process rule: Device or resource busy delete counter ip filter c1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ At the second 'nft add element ip filter m1 { 1 : c1 }', the reference count of the 'c1' is increased then it tries to insert into the 'm1'. but the 'm1' already has same element so it returns -EEXIST. But it doesn't decrease the reference count of the 'c1' in the error path. Due to a leak of the reference count of the 'c1', the 'c1' can't be removed by 'nft delete counter ip filter c1'. Fixes: 8aeff920dcc9 ("netfilter: nf_tables: add stateful object reference to set elements") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 623ec29ade26..bf26e27ca456 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4046,6 +4046,8 @@ err6: err5: kfree(trans); err4: + if (obj) + obj->use--; kfree(elem.priv); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) From c9062c77b061ff44176ec06466857a271718999b Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 20 Dec 2018 11:16:07 +0800 Subject: [PATCH 2007/2608] scsi: qla4xxx: check return code of qla4xxx_copy_from_fwddb_param [ Upstream commit 72b4a0465f995175a2e22cf4a636bf781f1f28a7 ] The return code should be check while qla4xxx_copy_from_fwddb_param fails. Signed-off-by: YueHaibing Acked-by: Manish Rangankar Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla4xxx/ql4_os.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index a6aa08d9a171..22dc70a2138e 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -7241,6 +7241,8 @@ static int qla4xxx_sysfs_ddb_tgt_create(struct scsi_qla_host *ha, rc = qla4xxx_copy_from_fwddb_param(fnode_sess, fnode_conn, fw_ddb_entry); + if (rc) + goto free_sess; ql4_printk(KERN_INFO, ha, "%s: sysfs entry %s created\n", __func__, fnode_sess->dev.kobj.name); From d8c1fc1b64727399f196f72ef5ed766188c010ad Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Tue, 8 Jan 2019 13:50:43 -0700 Subject: [PATCH 2008/2608] scsi: isci: initialize shost fully before calling scsi_add_host() [ Upstream commit cc29a1b0a3f2597ce887d339222fa85b9307706d ] scsi_mq_setup_tags(), which is called by scsi_add_host(), calculates the command size to allocate based on the prot_capabilities. In the isci driver, scsi_host_set_prot() is called after scsi_add_host() so the command size gets calculated to be smaller than it needs to be. Eventually, scsi_mq_init_request() locates the 'prot_sdb' after the command assuming it was sized correctly and a buffer overrun may occur. However, seeing blk_mq_alloc_rqs() rounds up to the nearest cache line size, the mistake can go unnoticed. The bug was noticed after the struct request size was reduced by commit 9d037ad707ed ("block: remove req->timeout_list") Which likely reduced the allocated space for the request by an entire cache line, enough that the overflow could be hit and it caused a panic, on boot, at: RIP: 0010:t10_pi_complete+0x77/0x1c0 Call Trace: sd_done+0xf5/0x340 scsi_finish_command+0xc3/0x120 blk_done_softirq+0x83/0xb0 __do_softirq+0xa1/0x2e6 irq_exit+0xbc/0xd0 call_function_single_interrupt+0xf/0x20 sd_done() would call scsi_prot_sg_count() which reads the number of entities in 'prot_sdb', but seeing 'prot_sdb' is located after the end of the allocated space it reads a garbage number and erroneously calls t10_pi_complete(). To prevent this, the calls to scsi_host_set_prot() are moved into isci_host_alloc() before the call to scsi_add_host(). Out of caution, also move the similar call to scsi_host_set_guard(). Fixes: 3d2d75254915 ("[SCSI] isci: T10 DIF support") Link: http://lkml.kernel.org/r/da851333-eadd-163a-8c78-e1f4ec5ec857@deltatee.com Signed-off-by: Logan Gunthorpe Cc: Intel SCU Linux support Cc: Artur Paszkiewicz Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: Christoph Hellwig Cc: Jens Axboe Cc: Jeff Moyer Reviewed-by: Jeff Moyer Reviewed-by: Jens Axboe Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/isci/init.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c index 922e3e56c90d..c71e0f3b146a 100644 --- a/drivers/scsi/isci/init.c +++ b/drivers/scsi/isci/init.c @@ -591,6 +591,13 @@ static struct isci_host *isci_host_alloc(struct pci_dev *pdev, int id) shost->max_lun = ~0; shost->max_cmd_len = MAX_COMMAND_SIZE; + /* turn on DIF support */ + scsi_host_set_prot(shost, + SHOST_DIF_TYPE1_PROTECTION | + SHOST_DIF_TYPE2_PROTECTION | + SHOST_DIF_TYPE3_PROTECTION); + scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC); + err = scsi_add_host(shost, &pdev->dev); if (err) goto err_shost; @@ -678,13 +685,6 @@ static int isci_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_host_alloc; } pci_info->hosts[i] = h; - - /* turn on DIF support */ - scsi_host_set_prot(to_shost(h), - SHOST_DIF_TYPE1_PROTECTION | - SHOST_DIF_TYPE2_PROTECTION | - SHOST_DIF_TYPE3_PROTECTION); - scsi_host_set_guard(to_shost(h), SHOST_DIX_GUARD_CRC); } err = isci_setup_interrupts(pdev); From d6e33e07b8311764414969f5ddde93e052d226ea Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 9 Jan 2019 18:12:16 +0100 Subject: [PATCH 2009/2608] MIPS: jazz: fix 64bit build [ Upstream commit 41af167fbc0032f9d7562854f58114eaa9270336 ] 64bit JAZZ builds failed with linux-next/arch/mips/jazz/jazzdma.c: In function `vdma_init`: /linux-next/arch/mips/jazz/jazzdma.c:77:30: error: implicit declaration of function `KSEG1ADDR`; did you mean `CKSEG1ADDR`? [-Werror=implicit-function-declaration] pgtbl = (VDMA_PGTBL_ENTRY *)KSEG1ADDR(pgtbl); ^~~~~~~~~ CKSEG1ADDR /linux-next/arch/mips/jazz/jazzdma.c:77:10: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] pgtbl = (VDMA_PGTBL_ENTRY *)KSEG1ADDR(pgtbl); ^ In file included from /linux-next/arch/mips/include/asm/barrier.h:11:0, from /linux-next/include/linux/compiler.h:248, from /linux-next/include/linux/kernel.h:10, from /linux-next/arch/mips/jazz/jazzdma.c:11: /linux-next/arch/mips/include/asm/addrspace.h:41:29: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast] #define _ACAST32_ (_ATYPE_)(_ATYPE32_) /* widen if necessary */ ^ /linux-next/arch/mips/include/asm/addrspace.h:53:25: note: in expansion of macro `_ACAST32_` #define CPHYSADDR(a) ((_ACAST32_(a)) & 0x1fffffff) ^~~~~~~~~ /linux-next/arch/mips/jazz/jazzdma.c:84:44: note: in expansion of macro `CPHYSADDR` r4030_write_reg32(JAZZ_R4030_TRSTBL_BASE, CPHYSADDR(pgtbl)); Using correct casts and CKSEG1ADDR when dealing with the pgtbl setup fixes this. Signed-off-by: Thomas Bogendoerfer Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/jazz/jazzdma.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index d626a9a391cc..e3c3d9483e14 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -72,14 +72,15 @@ static int __init vdma_init(void) get_order(VDMA_PGTBL_SIZE)); BUG_ON(!pgtbl); dma_cache_wback_inv((unsigned long)pgtbl, VDMA_PGTBL_SIZE); - pgtbl = (VDMA_PGTBL_ENTRY *)KSEG1ADDR(pgtbl); + pgtbl = (VDMA_PGTBL_ENTRY *)CKSEG1ADDR((unsigned long)pgtbl); /* * Clear the R4030 translation table */ vdma_pgtbl_init(); - r4030_write_reg32(JAZZ_R4030_TRSTBL_BASE, CPHYSADDR(pgtbl)); + r4030_write_reg32(JAZZ_R4030_TRSTBL_BASE, + CPHYSADDR((unsigned long)pgtbl)); r4030_write_reg32(JAZZ_R4030_TRSTBL_LIM, VDMA_PGTBL_SIZE); r4030_write_reg32(JAZZ_R4030_TRSTBL_INV, 0); From b1bf951df56775f4401ece4101470936b47c27c1 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 8 Jan 2019 18:12:24 -0800 Subject: [PATCH 2010/2608] bpf: correctly set initial window on active Fast Open sender [ Upstream commit 31aa6503a15ba00182ea6dbbf51afb63bf9e851d ] The existing BPF TCP initial congestion window (TCP_BPF_IW) does not to work on (active) Fast Open sender. This is because it changes the (initial) window only if data_segs_out is zero -- but data_segs_out is also incremented on SYN-data. This patch fixes the issue by proerly accounting for SYN-data additionally. Fixes: fc7478103c84 ("bpf: Adds support for setting initial cwnd") Signed-off-by: Yuchung Cheng Reviewed-by: Neal Cardwell Acked-by: Lawrence Brakmo Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 542fd04bc44d..a8a9ff0568b9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3128,7 +3128,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, /* Only some options are supported */ switch (optname) { case TCP_BPF_IW: - if (val <= 0 || tp->data_segs_out > 0) + if (val <= 0 || tp->data_segs_out > tp->syn_data) ret = -EINVAL; else tp->snd_cwnd = val; From 486942ea8d78a034942a2f6729b8b013bc34a012 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 9 Jan 2019 10:05:56 +0100 Subject: [PATCH 2011/2608] net: stmmac: Fix PCI module removal leak [ Upstream commit 6dea7e1881fd86b80da64e476ac398008daed857 ] Since commit b7d0f08e9129, the enable / disable of PCI device is not managed which will result in IO regions not being automatically unmapped. As regions continue mapped it is currently not possible to remove and then probe again the PCI module of stmmac. Fix this by manually unmapping regions on remove callback. Changes from v1: - Fix build error Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Fixes: b7d0f08e9129 ("net: stmmac: Fix WoL for PCI-based setups") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index c54a50dbd5ac..d819e8eaba12 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -299,7 +299,17 @@ static int stmmac_pci_probe(struct pci_dev *pdev, */ static void stmmac_pci_remove(struct pci_dev *pdev) { + int i; + stmmac_dvr_remove(&pdev->dev); + + for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { + if (pci_resource_len(pdev, i) == 0) + continue; + pcim_iounmap_regions(pdev, BIT(i)); + break; + } + pci_disable_device(pdev); } From 64717501b578a37243d6ad8bb9698745d260386e Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Tue, 8 Jan 2019 21:04:48 +0800 Subject: [PATCH 2012/2608] isdn: i4l: isdn_tty: Fix some concurrency double-free bugs [ Upstream commit 2ff33d6637393fe9348357285931811b76e1402f ] The functions isdn_tty_tiocmset() and isdn_tty_set_termios() may be concurrently executed. isdn_tty_tiocmset isdn_tty_modem_hup line 719: kfree(info->dtmf_state); line 721: kfree(info->silence_state); line 723: kfree(info->adpcms); line 725: kfree(info->adpcmr); isdn_tty_set_termios isdn_tty_modem_hup line 719: kfree(info->dtmf_state); line 721: kfree(info->silence_state); line 723: kfree(info->adpcms); line 725: kfree(info->adpcmr); Thus, some concurrency double-free bugs may occur. These possible bugs are found by a static tool written by myself and my manual code review. To fix these possible bugs, the mutex lock "modem_info_mutex" used in isdn_tty_tiocmset() is added in isdn_tty_set_termios(). Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/isdn/i4l/isdn_tty.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c index d30130c8d0f3..b107452e16df 100644 --- a/drivers/isdn/i4l/isdn_tty.c +++ b/drivers/isdn/i4l/isdn_tty.c @@ -1456,15 +1456,19 @@ isdn_tty_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { modem_info *info = (modem_info *) tty->driver_data; + mutex_lock(&modem_info_mutex); if (!old_termios) isdn_tty_change_speed(info); else { if (tty->termios.c_cflag == old_termios->c_cflag && tty->termios.c_ispeed == old_termios->c_ispeed && - tty->termios.c_ospeed == old_termios->c_ospeed) + tty->termios.c_ospeed == old_termios->c_ospeed) { + mutex_unlock(&modem_info_mutex); return; + } isdn_tty_change_speed(info); } + mutex_unlock(&modem_info_mutex); } /* From f7e2a3857feb21fe06fc1d65363b6e3c5aef0cee Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Mon, 7 Jan 2019 22:19:34 +0800 Subject: [PATCH 2013/2608] scsi: ufs: Fix system suspend status [ Upstream commit ce9e7bce43526626f7cffe2e657953997870197e ] hba->is_sys_suspended is set after successful system suspend but not clear after successful system resume. According to current behavior, hba->is_sys_suspended will not be set if host is runtime-suspended but not system-suspended. Thus we shall aligh the same policy: clear this flag even if host remains runtime-suspended after ufshcd_system_resume is successfully returned. Simply fix this flag to correct host status logs. Signed-off-by: Stanley Chu Reviewed-by: Avri Altman Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ufs/ufshcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 21c81c1feac5..66540491839e 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -7520,6 +7520,8 @@ out: trace_ufshcd_system_resume(dev_name(hba->dev), ret, ktime_to_us(ktime_sub(ktime_get(), start)), hba->curr_dev_pwr_mode, hba->uic_link_state); + if (!ret) + hba->is_sys_suspended = false; return ret; } EXPORT_SYMBOL(ufshcd_system_resume); From 88c556795a158bbdee44abb46ac195eef4d5e2f6 Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Wed, 9 Jan 2019 01:39:07 -0800 Subject: [PATCH 2014/2608] scsi: qedi: Add ep_state for login completion on un-reachable targets [ Upstream commit 34a2ce887668db9dda4b56e6f155c49ac13f3e54 ] When the driver finds invalid destination MAC for the first un-reachable target, and before completes the PATH_REQ operation, set new ep_state to OFFLDCONN_NONE so that as part of driver ep_poll mechanism, the upper open-iscsi layer is notified to complete the login process on the first un-reachable target and thus proceed login to other reachable targets. Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qedi/qedi_iscsi.c | 3 +++ drivers/scsi/qedi/qedi_iscsi.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index a02b34ea5cab..45f044f35cea 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -961,6 +961,7 @@ static int qedi_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) qedi_ep = ep->dd_data; if (qedi_ep->state == EP_STATE_IDLE || + qedi_ep->state == EP_STATE_OFLDCONN_NONE || qedi_ep->state == EP_STATE_OFLDCONN_FAILED) return -1; @@ -1043,6 +1044,7 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep) switch (qedi_ep->state) { case EP_STATE_OFLDCONN_START: + case EP_STATE_OFLDCONN_NONE: goto ep_release_conn; case EP_STATE_OFLDCONN_FAILED: break; @@ -1233,6 +1235,7 @@ static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data) if (!is_valid_ether_addr(&path_data->mac_addr[0])) { QEDI_NOTICE(&qedi->dbg_ctx, "dst mac NOT VALID\n"); + qedi_ep->state = EP_STATE_OFLDCONN_NONE; ret = -EIO; goto set_path_exit; } diff --git a/drivers/scsi/qedi/qedi_iscsi.h b/drivers/scsi/qedi/qedi_iscsi.h index 3247287cb0e7..812b4b68e6e4 100644 --- a/drivers/scsi/qedi/qedi_iscsi.h +++ b/drivers/scsi/qedi/qedi_iscsi.h @@ -59,6 +59,7 @@ enum { EP_STATE_OFLDCONN_FAILED = 0x2000, EP_STATE_CONNECT_FAILED = 0x4000, EP_STATE_DISCONN_TIMEDOUT = 0x8000, + EP_STATE_OFLDCONN_NONE = 0x10000, }; struct qedi_conn; From b226fe953da01c4f6267e65c178e315526cb244f Mon Sep 17 00:00:00 2001 From: Talons Lee Date: Mon, 10 Dec 2018 18:03:00 +0800 Subject: [PATCH 2015/2608] always clear the X2APIC_ENABLE bit for PV guest [ Upstream commit 5268c8f39e0efef81af2aaed160272d9eb507beb ] Commit e657fcc clears cpu capability bit instead of using fake cpuid value, the EXTD should always be off for PV guest without depending on cpuid value. So remove the cpuid check in xen_read_msr_safe() to always clear the X2APIC_ENABLE bit. Signed-off-by: Talons Lee Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin --- arch/x86/xen/enlighten_pv.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index fd173e6425cc..481d7920ea24 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -900,10 +900,7 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) val = native_read_msr_safe(msr, err); switch (msr) { case MSR_IA32_APICBASE: -#ifdef CONFIG_X86_X2APIC - if (!(cpuid_ecx(1) & (1 << (X86_FEATURE_X2APIC & 31)))) -#endif - val &= ~X2APIC_ENABLE; + val &= ~X2APIC_ENABLE; break; } return val; From 170c71065fec7c5851b2219793788cfdbf2c6eba Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 13 Jan 2019 10:44:51 +0100 Subject: [PATCH 2016/2608] drm/meson: add missing of_node_put [ Upstream commit f672b93e4a0a4947d2e1103ed8780e01e13eadb6 ] Add an of_node_put when the result of of_graph_get_remote_port_parent is not available. An of_node_put is also needed when meson_probe_remote completes. This was present at the recursive call, but not in the call from meson_drv_probe. The semantic match that finds this problem is as follows (http://coccinelle.lip6.fr): // @r exists@ local idexpression e; expression x; @@ e = of_graph_get_remote_port_parent(...); ... when != x = e when != true e == NULL when != of_node_put(e) when != of_fwnode_handle(e) ( return e; | *return ...; ) // Signed-off-by: Julia Lawall Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/1547372691-28324-4-git-send-email-Julia.Lawall@lip6.fr Signed-off-by: Sasha Levin --- drivers/gpu/drm/meson/meson_drv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 4ad8223c60ea..5deb44ac6791 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -345,8 +345,10 @@ static int meson_probe_remote(struct platform_device *pdev, remote_node = of_graph_get_remote_port_parent(ep); if (!remote_node || remote_node == parent || /* Ignore parent endpoint */ - !of_device_is_available(remote_node)) + !of_device_is_available(remote_node)) { + of_node_put(remote_node); continue; + } count += meson_probe_remote(pdev, match, remote, remote_node); @@ -365,10 +367,13 @@ static int meson_drv_probe(struct platform_device *pdev) for_each_endpoint_of_node(np, ep) { remote = of_graph_get_remote_port_parent(ep); - if (!remote || !of_device_is_available(remote)) + if (!remote || !of_device_is_available(remote)) { + of_node_put(remote); continue; + } count += meson_probe_remote(pdev, &match, np, remote); + of_node_put(remote); } if (count && !match) From e5d7ffdeec096953e097d4f7b5565c455fcc9bf6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 15 Jan 2019 18:03:38 +0000 Subject: [PATCH 2017/2608] atm: he: fix sign-extension overflow on large shift [ Upstream commit cb12d72b27a6f41325ae23a11033cf5fedfa1b97 ] Shifting the 1 by exp by an int can lead to sign-extension overlow when exp is 31 since 1 is an signed int and sign-extending this result to an unsigned long long will set the upper 32 bits. Fix this by shifting an unsigned long. Detected by cppcheck: (warning) Shifting signed 32-bit value by 31 bits is undefined behaviour Signed-off-by: Colin Ian King Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/atm/he.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/he.c b/drivers/atm/he.c index e58538c29377..7ba243691004 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -717,7 +717,7 @@ static int he_init_cs_block_rcm(struct he_dev *he_dev) instead of '/ 512', use '>> 9' to prevent a call to divdu3 on x86 platforms */ - rate_cps = (unsigned long long) (1 << exp) * (man + 512) >> 9; + rate_cps = (unsigned long long) (1UL << exp) * (man + 512) >> 9; if (rate_cps < 10) rate_cps = 10; /* 2.2.1 minimum payload rate is 10 cps */ From 733d1915bf571616746f563632015d3bf568bc87 Mon Sep 17 00:00:00 2001 From: Cheng-Min Ao Date: Mon, 7 Jan 2019 14:29:32 +0800 Subject: [PATCH 2018/2608] hwmon: (tmp421) Correct the misspelling of the tmp442 compatible attribute in OF device ID table [ Upstream commit f422449b58548a41e98fc97b259a283718e527db ] Correct a typo in OF device ID table The last one should be 'ti,tmp442' Signed-off-by: Cheng-Min Ao Signed-off-by: Yu-Hsiang Chen Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/tmp421.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c index e36399213324..ceb3db6f3fdd 100644 --- a/drivers/hwmon/tmp421.c +++ b/drivers/hwmon/tmp421.c @@ -88,7 +88,7 @@ static const struct of_device_id tmp421_of_match[] = { .data = (void *)2 }, { - .compatible = "ti,tmp422", + .compatible = "ti,tmp442", .data = (void *)3 }, { }, From caa27a81df46e5a697ca6731677deeb45100883b Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 25 Dec 2018 22:18:23 -0600 Subject: [PATCH 2019/2608] leds: lp5523: fix a missing check of return value of lp55xx_read [ Upstream commit 248b57015f35c94d4eae2fdd8c6febf5cd703900 ] When lp55xx_read() fails, "status" is an uninitialized variable and thus may contain random value; using it leads to undefined behaviors. The fix inserts a check for the return value of lp55xx_read: if it fails, returns with its error code. Signed-off-by: Kangjie Lu Signed-off-by: Jacek Anaszewski Signed-off-by: Sasha Levin --- drivers/leds/leds-lp5523.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c index 924e50aefb00..13838d72e297 100644 --- a/drivers/leds/leds-lp5523.c +++ b/drivers/leds/leds-lp5523.c @@ -318,7 +318,9 @@ static int lp5523_init_program_engine(struct lp55xx_chip *chip) /* Let the programs run for couple of ms and check the engine status */ usleep_range(3000, 6000); - lp55xx_read(chip, LP5523_REG_STATUS, &status); + ret = lp55xx_read(chip, LP5523_REG_STATUS, &status); + if (ret) + return ret; status &= LP5523_ENG_STATUS_MASK; if (status != LP5523_ENG_STATUS_MASK) { From f040249c14ed75436f86e9b59e7e0eeb1027ff6c Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 16 Jan 2019 08:47:54 -0800 Subject: [PATCH 2020/2608] bpf: bpf_setsockopt: reset sock dst on SO_MARK changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f4924f24da8c7ef64195096817f3cde324091d97 ] In sock_setsockopt() (net/core/sock.h), when SO_MARK option is used to change sk_mark, sk_dst_reset(sk) is called. The same should be done in bpf_setsockopt(). Fixes: 8c4b4c7e9ff0 ("bpf: Add setsockopt helper function to bpf") Reported-by: Maciej Żenczykowski Signed-off-by: Peter Oskolkov Acked-by: Martin KaFai Lau Reviewed-by: Maciej Żenczykowski Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- net/core/filter.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index a8a9ff0568b9..41ede90fc28f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3102,7 +3102,10 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, sk->sk_rcvlowat = val ? : 1; break; case SO_MARK: - sk->sk_mark = val; + if (sk->sk_mark != val) { + sk->sk_mark = val; + sk_dst_reset(sk); + } break; default: ret = -EINVAL; From 91252e8e872272d09b3f7b5552b2f90cf02be669 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 18 Jan 2019 15:58:01 +0000 Subject: [PATCH 2021/2608] mlxsw: spectrum_switchdev: Do not treat static FDB entries as sticky [ Upstream commit 64254a2054611205798e6bde634639bc704573ac ] The driver currently treats static FDB entries as both static and sticky. This is incorrect and prevents such entries from being roamed to a different port via learning. Fix this by configuring static entries with ageing disabled and roaming enabled. In net-next we can add proper support for the newly introduced 'sticky' flag. Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC") Signed-off-by: Ido Schimmel Reported-by: Alexander Petrovskiy Reviewed-by: Petr Machata Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index f33fb95c4189..3ba9f2c079b2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1086,7 +1086,7 @@ mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp, static enum mlxsw_reg_sfd_rec_policy mlxsw_sp_sfd_rec_policy(bool dynamic) { return dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS : - MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY; + MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_MLAG; } static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) @@ -1098,7 +1098,7 @@ static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, const char *mac, u16 fid, bool adding, enum mlxsw_reg_sfd_rec_action action, - bool dynamic) + enum mlxsw_reg_sfd_rec_policy policy) { char *sfd_pl; u8 num_rec; @@ -1109,8 +1109,7 @@ static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, return -ENOMEM; mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); - mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), - mac, fid, action, local_port); + mlxsw_reg_sfd_uc_pack(sfd_pl, 0, policy, mac, fid, action, local_port); num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); if (err) @@ -1129,7 +1128,8 @@ static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, bool dynamic) { return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, adding, - MLXSW_REG_SFD_REC_ACTION_NOP, dynamic); + MLXSW_REG_SFD_REC_ACTION_NOP, + mlxsw_sp_sfd_rec_policy(dynamic)); } int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, @@ -1137,7 +1137,7 @@ int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, { return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, 0, mac, fid, adding, MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER, - false); + MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY); } static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, From 1bbc4cbe1f77b77267f2d0b46aff51efd1dec08b Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 8 Nov 2018 12:06:53 +0200 Subject: [PATCH 2022/2608] net/mlx5e: Fix wrong (zero) TX drop counter indication for representor [ Upstream commit 7fdc1adc52d3975740547a78c2df329bb207f15d ] For representors, the TX dropped counter is not folded from the per-ring counters. Fix it. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 281911698f72..e69674d38f16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -125,6 +125,7 @@ static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) s->tx_packets += sq_stats->packets; s->tx_bytes += sq_stats->bytes; + s->tx_queue_dropped += sq_stats->dropped; } } } From 3cbc51d68f485a7b339562db0d95175e4a7014ac Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 9 Jan 2019 22:41:08 -0700 Subject: [PATCH 2023/2608] isdn: avm: Fix string plus integer warning from Clang [ Upstream commit 7afa81c55fca0cad589722cb4bce698b4803b0e1 ] A recent commit in Clang expanded the -Wstring-plus-int warning, showing some odd behavior in this file. drivers/isdn/hardware/avm/b1.c:426:30: warning: adding 'int' to a string does not append to the string [-Wstring-plus-int] cinfo->version[j] = "\0\0" + 1; ~~~~~~~^~~ drivers/isdn/hardware/avm/b1.c:426:30: note: use array indexing to silence this warning cinfo->version[j] = "\0\0" + 1; ^ & [ ] 1 warning generated. This is equivalent to just "\0". Nick pointed out that it is smarter to use "" instead of "\0" because "" is used elsewhere in the kernel and can be deduplicated at the linking stage. Link: https://github.com/ClangBuiltLinux/linux/issues/309 Suggested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/isdn/hardware/avm/b1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c index b1833d08a5fe..40a099f33bfc 100644 --- a/drivers/isdn/hardware/avm/b1.c +++ b/drivers/isdn/hardware/avm/b1.c @@ -423,7 +423,7 @@ void b1_parse_version(avmctrl_info *cinfo) int i, j; for (j = 0; j < AVM_MAXVERSION; j++) - cinfo->version[j] = "\0\0" + 1; + cinfo->version[j] = ""; for (i = 0, j = 0; j < AVM_MAXVERSION && i < cinfo->versionlen; j++, i += cinfo->versionbuf[i] + 1) From f08f5424a728156caca52efc2678fdec05e51439 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Feb 2019 14:41:22 -0800 Subject: [PATCH 2024/2608] batman-adv: fix uninit-value in batadv_interface_tx() [ Upstream commit 4ffcbfac60642f63ae3d80891f573ba7e94a265c ] KMSAN reported batadv_interface_tx() was possibly using a garbage value [1] batadv_get_vid() does have a pskb_may_pull() call but batadv_interface_tx() does not actually make sure this did not fail. [1] BUG: KMSAN: uninit-value in batadv_interface_tx+0x908/0x1e40 net/batman-adv/soft-interface.c:231 CPU: 0 PID: 10006 Comm: syz-executor469 Not tainted 4.20.0-rc7+ #5 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:613 __msan_warning+0x82/0xf0 mm/kmsan/kmsan_instr.c:313 batadv_interface_tx+0x908/0x1e40 net/batman-adv/soft-interface.c:231 __netdev_start_xmit include/linux/netdevice.h:4356 [inline] netdev_start_xmit include/linux/netdevice.h:4365 [inline] xmit_one net/core/dev.c:3257 [inline] dev_hard_start_xmit+0x607/0xc40 net/core/dev.c:3273 __dev_queue_xmit+0x2e42/0x3bc0 net/core/dev.c:3843 dev_queue_xmit+0x4b/0x60 net/core/dev.c:3876 packet_snd net/packet/af_packet.c:2928 [inline] packet_sendmsg+0x8306/0x8f30 net/packet/af_packet.c:2953 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] __sys_sendto+0x8c4/0xac0 net/socket.c:1788 __do_sys_sendto net/socket.c:1800 [inline] __se_sys_sendto+0x107/0x130 net/socket.c:1796 __x64_sys_sendto+0x6e/0x90 net/socket.c:1796 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x441889 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 bb 10 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffdda6fd468 EFLAGS: 00000216 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 0000000000441889 RDX: 000000000000000e RSI: 00000000200000c0 RDI: 0000000000000003 RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000216 R12: 00007ffdda6fd4c0 R13: 00007ffdda6fd4b0 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:204 [inline] kmsan_internal_poison_shadow+0x92/0x150 mm/kmsan/kmsan.c:158 kmsan_kmalloc+0xa6/0x130 mm/kmsan/kmsan_hooks.c:176 kmsan_slab_alloc+0xe/0x10 mm/kmsan/kmsan_hooks.c:185 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2759 [inline] __kmalloc_node_track_caller+0xe18/0x1030 mm/slub.c:4383 __kmalloc_reserve net/core/skbuff.c:137 [inline] __alloc_skb+0x309/0xa20 net/core/skbuff.c:205 alloc_skb include/linux/skbuff.h:998 [inline] alloc_skb_with_frags+0x1c7/0xac0 net/core/skbuff.c:5220 sock_alloc_send_pskb+0xafd/0x10e0 net/core/sock.c:2083 packet_alloc_skb net/packet/af_packet.c:2781 [inline] packet_snd net/packet/af_packet.c:2872 [inline] packet_sendmsg+0x661a/0x8f30 net/packet/af_packet.c:2953 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] __sys_sendto+0x8c4/0xac0 net/socket.c:1788 __do_sys_sendto net/socket.c:1800 [inline] __se_sys_sendto+0x107/0x130 net/socket.c:1796 __x64_sys_sendto+0x6e/0x90 net/socket.c:1796 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Marek Lindner Cc: Simon Wunderlich Cc: Antonio Quartulli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/soft-interface.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 3a80beef247c..7c883420485b 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -219,6 +219,8 @@ static int batadv_interface_tx(struct sk_buff *skb, switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: + if (!pskb_may_pull(skb, sizeof(*vhdr))) + goto dropped; vhdr = vlan_eth_hdr(skb); /* drop batman-in-batman packets to prevent loops */ From bb506ddb2f72ca3973f692e7d901776043f62158 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 11 Feb 2019 19:32:20 +0800 Subject: [PATCH 2025/2608] ipv6: propagate genlmsg_reply return code [ Upstream commit d1f20798a119be71746949ba9b2e2ff330fdc038 ] genlmsg_reply can fail, so propagate its return code Fixes: 915d7e5e593 ("ipv6: sr: add code base for control plane support of SR-IPv6") Signed-off-by: Li RongQing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/seg6.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index c81407770956..fdeb90dd1c82 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -220,9 +220,7 @@ static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); genlmsg_end(msg, hdr); - genlmsg_reply(msg, info); - - return 0; + return genlmsg_reply(msg, info); nla_put_failure: rcu_read_unlock(); From 7c3969ff8bd40272dfbbc00dd16bf87547c818c8 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 28 Jan 2019 15:28:06 -0800 Subject: [PATCH 2026/2608] net/mlx5e: Don't overwrite pedit action when multiple pedit used [ Upstream commit 218d05ce326f9e1b40a56085431fa1068b43d5d9 ] In some case, we may use multiple pedit actions to modify packets. The command shown as below: the last pedit action is effective. $ tc filter add dev netdev_rep parent ffff: protocol ip prio 1 \ flower skip_sw ip_proto icmp dst_ip 3.3.3.3 \ action pedit ex munge ip dst set 192.168.1.100 pipe \ action pedit ex munge eth src set 00:00:00:00:00:01 pipe \ action pedit ex munge eth dst set 00:00:00:00:00:02 pipe \ action csum ip pipe \ action tunnel_key set src_ip 1.1.1.100 dst_ip 1.1.1.200 dst_port 4789 id 100 \ action mirred egress redirect dev vxlan0 To fix it, we add max_mod_hdr_actions to mlx5e_tc_flow_parse_attr struction, max_mod_hdr_actions will store the max pedit action number we support and num_mod_hdr_actions indicates how many pedit action we used, and store all pedit action to mod_hdr_actions. Fixes: d79b6df6b10a ("net/mlx5e: Add parsing of TC pedit actions to HW format") Cc: Or Gerlitz Signed-off-by: Tonghao Zhang Reviewed-by: Or Gerlitz Acked-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9e0be077df9c..47003ea4ed65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -81,6 +81,7 @@ struct mlx5e_tc_flow_parse_attr { struct ip_tunnel_info tun_info; struct mlx5_flow_spec spec; int num_mod_hdr_actions; + int max_mod_hdr_actions; void *mod_hdr_actions; int mirred_ifindex; }; @@ -1128,9 +1129,9 @@ static struct mlx5_fields fields[] = { OFFLOAD(UDP_DPORT, 2, udp.dest, 0), }; -/* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at - * max from the SW pedit action. On success, it says how many HW actions were - * actually parsed. +/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at + * max from the SW pedit action. On success, attr->num_mod_hdr_actions + * says how many HW actions were actually parsed. */ static int offload_pedit_fields(struct pedit_headers *masks, struct pedit_headers *vals, @@ -1153,9 +1154,11 @@ static int offload_pedit_fields(struct pedit_headers *masks, add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD]; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - action = parse_attr->mod_hdr_actions; - max_actions = parse_attr->num_mod_hdr_actions; - nactions = 0; + action = parse_attr->mod_hdr_actions + + parse_attr->num_mod_hdr_actions * action_size; + + max_actions = parse_attr->max_mod_hdr_actions; + nactions = parse_attr->num_mod_hdr_actions; for (i = 0; i < ARRAY_SIZE(fields); i++) { f = &fields[i]; @@ -1260,7 +1263,7 @@ static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, if (!parse_attr->mod_hdr_actions) return -ENOMEM; - parse_attr->num_mod_hdr_actions = max_actions; + parse_attr->max_mod_hdr_actions = max_actions; return 0; } @@ -1304,9 +1307,11 @@ static int parse_tc_pedit_action(struct mlx5e_priv *priv, goto out_err; } - err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr); - if (err) - goto out_err; + if (!parse_attr->mod_hdr_actions) { + err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr); + if (err) + goto out_err; + } err = offload_pedit_fields(masks, vals, parse_attr); if (err < 0) From 2226f9592422c320e37585825a4f7b8bfb8c8504 Mon Sep 17 00:00:00 2001 From: Kal Conley Date: Sun, 10 Feb 2019 09:57:11 +0100 Subject: [PATCH 2027/2608] net/packet: fix 4gb buffer limit due to overflow check [ Upstream commit fc62814d690cf62189854464f4bd07457d5e9e50 ] When calculating rb->frames_per_block * req->tp_block_nr the result can overflow. Check it for overflow without limiting the total buffer size to UINT_MAX. This change fixes support for packet ring buffers >= UINT_MAX. Fixes: 8f8d28e4d6d8 ("net/packet: fix overflow in check for tp_frame_nr") Signed-off-by: Kal Conley Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 44a093c75567..a2bd5917a2a9 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4313,7 +4313,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, rb->frames_per_block = req->tp_block_size / req->tp_frame_size; if (unlikely(rb->frames_per_block == 0)) goto out; - if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr)) + if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr)) goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) From c4ba68b8691e4879cc43c37ad395ea529709bf81 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 6 Feb 2019 10:52:30 +0000 Subject: [PATCH 2028/2608] net: sfp: do not probe SFP module before we're attached [ Upstream commit b5bfc21af5cb3d53f9cee0ef82eaa43762a90f81 ] When we probe a SFP module, we expect to be able to call the upstream device's module_insert() function so that the upstream link can be configured. However, when the upstream device is delayed, we currently may end up probing the module before the upstream device is available, and lose the module_insert() call. Avoid this by holding off probing the module until the SFP bus is properly connected to both the SFP socket driver and the upstream driver. Signed-off-by: Russell King Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/sfp-bus.c | 2 ++ drivers/net/phy/sfp.c | 30 +++++++++++++++++++++--------- drivers/net/phy/sfp.h | 2 ++ 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 7ae815bee52d..be6016e21d87 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -276,6 +276,7 @@ static int sfp_register_bus(struct sfp_bus *bus) return ret; } } + bus->socket_ops->attach(bus->sfp); if (bus->started) bus->socket_ops->start(bus->sfp); bus->registered = true; @@ -289,6 +290,7 @@ static void sfp_unregister_bus(struct sfp_bus *bus) if (bus->registered) { if (bus->started) bus->socket_ops->stop(bus->sfp); + bus->socket_ops->detach(bus->sfp); if (bus->phydev && ops && ops->disconnect_phy) ops->disconnect_phy(bus->upstream); } diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 3165bc7b8e1e..a1b68b19d912 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -114,6 +114,7 @@ struct sfp { struct gpio_desc *gpio[GPIO_MAX]; + bool attached; unsigned int state; struct delayed_work poll; struct delayed_work timeout; @@ -500,7 +501,7 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event) */ switch (sfp->sm_mod_state) { default: - if (event == SFP_E_INSERT) { + if (event == SFP_E_INSERT && sfp->attached) { sfp_module_tx_disable(sfp); sfp_sm_ins_next(sfp, SFP_MOD_PROBE, T_PROBE_INIT); } @@ -628,6 +629,19 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event) mutex_unlock(&sfp->sm_mutex); } +static void sfp_attach(struct sfp *sfp) +{ + sfp->attached = true; + if (sfp->state & SFP_F_PRESENT) + sfp_sm_event(sfp, SFP_E_INSERT); +} + +static void sfp_detach(struct sfp *sfp) +{ + sfp->attached = false; + sfp_sm_event(sfp, SFP_E_REMOVE); +} + static void sfp_start(struct sfp *sfp) { sfp_sm_event(sfp, SFP_E_DEV_UP); @@ -687,6 +701,8 @@ static int sfp_module_eeprom(struct sfp *sfp, struct ethtool_eeprom *ee, } static const struct sfp_socket_ops sfp_module_ops = { + .attach = sfp_attach, + .detach = sfp_detach, .start = sfp_start, .stop = sfp_stop, .module_info = sfp_module_info, @@ -829,10 +845,6 @@ static int sfp_probe(struct platform_device *pdev) sfp->set_state = sfp_gpio_set_state; } - sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops); - if (!sfp->sfp_bus) - return -ENOMEM; - /* Get the initial state, and always signal TX disable, * since the network interface will not be up. */ @@ -843,10 +855,6 @@ static int sfp_probe(struct platform_device *pdev) sfp->state |= SFP_F_RATE_SELECT; sfp_set_state(sfp, sfp->state); sfp_module_tx_disable(sfp); - rtnl_lock(); - if (sfp->state & SFP_F_PRESENT) - sfp_sm_event(sfp, SFP_E_INSERT); - rtnl_unlock(); for (i = 0; i < GPIO_MAX; i++) { if (gpio_flags[i] != GPIOD_IN || !sfp->gpio[i]) @@ -879,6 +887,10 @@ static int sfp_remove(struct platform_device *pdev) sfp_unregister_socket(sfp->sfp_bus); + sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops); + if (!sfp->sfp_bus) + return -ENOMEM; + return 0; } diff --git a/drivers/net/phy/sfp.h b/drivers/net/phy/sfp.h index 31b0acf337e2..64f54b0bbd8c 100644 --- a/drivers/net/phy/sfp.h +++ b/drivers/net/phy/sfp.h @@ -7,6 +7,8 @@ struct sfp; struct sfp_socket_ops { + void (*attach)(struct sfp *sfp); + void (*detach)(struct sfp *sfp); void (*start)(struct sfp *sfp); void (*stop)(struct sfp *sfp); int (*module_info)(struct sfp *sfp, struct ethtool_modinfo *modinfo); From 77278f05f0bbc3409363b844d179aea19504acb6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 12 Feb 2019 18:47:30 +0800 Subject: [PATCH 2029/2608] sctp: call gso_reset_checksum when computing checksum in sctp_gso_segment [ Upstream commit fc228abc2347e106a44c0e9b29ab70b712c4ca51 ] Jianlin reported a panic when running sctp gso over gre over vlan device: [ 84.772930] RIP: 0010:do_csum+0x6d/0x170 [ 84.790605] Call Trace: [ 84.791054] csum_partial+0xd/0x20 [ 84.791657] gre_gso_segment+0x2c3/0x390 [ 84.792364] inet_gso_segment+0x161/0x3e0 [ 84.793071] skb_mac_gso_segment+0xb8/0x120 [ 84.793846] __skb_gso_segment+0x7e/0x180 [ 84.794581] validate_xmit_skb+0x141/0x2e0 [ 84.795297] __dev_queue_xmit+0x258/0x8f0 [ 84.795949] ? eth_header+0x26/0xc0 [ 84.796581] ip_finish_output2+0x196/0x430 [ 84.797295] ? skb_gso_validate_network_len+0x11/0x80 [ 84.798183] ? ip_finish_output+0x169/0x270 [ 84.798875] ip_output+0x6c/0xe0 [ 84.799413] ? ip_append_data.part.50+0xc0/0xc0 [ 84.800145] iptunnel_xmit+0x144/0x1c0 [ 84.800814] ip_tunnel_xmit+0x62d/0x930 [ip_tunnel] [ 84.801699] gre_tap_xmit+0xac/0xf0 [ip_gre] [ 84.802395] dev_hard_start_xmit+0xa5/0x210 [ 84.803086] sch_direct_xmit+0x14f/0x340 [ 84.803733] __dev_queue_xmit+0x799/0x8f0 [ 84.804472] ip_finish_output2+0x2e0/0x430 [ 84.805255] ? skb_gso_validate_network_len+0x11/0x80 [ 84.806154] ip_output+0x6c/0xe0 [ 84.806721] ? ip_append_data.part.50+0xc0/0xc0 [ 84.807516] sctp_packet_transmit+0x716/0xa10 [sctp] [ 84.808337] sctp_outq_flush+0xd7/0x880 [sctp] It was caused by SKB_GSO_CB(skb)->csum_start not set in sctp_gso_segment. sctp_gso_segment() calls skb_segment() with 'feature | NETIF_F_HW_CSUM', which causes SKB_GSO_CB(skb)->csum_start not to be set in skb_segment(). For TCP/UDP, when feature supports HW_CSUM, CHECKSUM_PARTIAL will be set and gso_reset_checksum will be called to set SKB_GSO_CB(skb)->csum_start. So SCTP should do the same as TCP/UDP, to call gso_reset_checksum() when computing checksum in sctp_gso_segment. Reported-by: Jianlin Shi Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/offload.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 35bc7106d182..055e1ab1e963 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -36,6 +36,7 @@ static __le32 sctp_gso_make_checksum(struct sk_buff *skb) { skb->ip_summed = CHECKSUM_NONE; skb->csum_not_inet = 0; + gso_reset_checksum(skb, ~0); return sctp_compute_cksum(skb, skb_transport_offset(skb)); } From 90dbd485b32b80633e37bef242619f48bf312ea7 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 11 Feb 2019 21:59:51 -0800 Subject: [PATCH 2030/2608] team: avoid complex list operations in team_nl_cmd_options_set() [ Upstream commit 2fdeee2549231b1f989f011bb18191f5660d3745 ] The current opt_inst_list operations inside team_nl_cmd_options_set() is too complex to track: LIST_HEAD(opt_inst_list); nla_for_each_nested(...) { list_for_each_entry(opt_inst, &team->option_inst_list, list) { if (__team_option_inst_tmp_find(&opt_inst_list, opt_inst)) continue; list_add(&opt_inst->tmp_list, &opt_inst_list); } } team_nl_send_event_options_get(team, &opt_inst_list); as while we retrieve 'opt_inst' from team->option_inst_list, it could be added to the local 'opt_inst_list' for multiple times. The __team_option_inst_tmp_find() doesn't work, as the setter team_mode_option_set() still calls team->ops.exit() which uses ->tmp_list too in __team_options_change_check(). Simplify the list operations by moving the 'opt_inst_list' and team_nl_send_event_options_get() into the nla_for_each_nested() loop so that it can be guranteed that we won't insert a same list entry for multiple times. Therefore, __team_option_inst_tmp_find() can be removed too. Fixes: 4fb0534fb7bb ("team: avoid adding twice the same option to the event list") Fixes: 2fcdb2c9e659 ("team: allow to send multiple set events in one message") Reported-by: syzbot+4d4af685432dc0e56c91@syzkaller.appspotmail.com Reported-by: syzbot+68ee510075cf64260cc4@syzkaller.appspotmail.com Cc: Jiri Pirko Cc: Paolo Abeni Signed-off-by: Cong Wang Acked-by: Jiri Pirko Reviewed-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index bd455a6cc82c..bb96153f496e 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -261,17 +261,6 @@ static void __team_option_inst_mark_removed_port(struct team *team, } } -static bool __team_option_inst_tmp_find(const struct list_head *opts, - const struct team_option_inst *needle) -{ - struct team_option_inst *opt_inst; - - list_for_each_entry(opt_inst, opts, tmp_list) - if (opt_inst == needle) - return true; - return false; -} - static int __team_options_register(struct team *team, const struct team_option *option, size_t option_count) @@ -2457,7 +2446,6 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) int err = 0; int i; struct nlattr *nl_option; - LIST_HEAD(opt_inst_list); rtnl_lock(); @@ -2477,6 +2465,7 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) struct nlattr *opt_attrs[TEAM_ATTR_OPTION_MAX + 1]; struct nlattr *attr; struct nlattr *attr_data; + LIST_HEAD(opt_inst_list); enum team_option_type opt_type; int opt_port_ifindex = 0; /* != 0 for per-port options */ u32 opt_array_index = 0; @@ -2581,23 +2570,17 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) if (err) goto team_put; opt_inst->changed = true; - - /* dumb/evil user-space can send us duplicate opt, - * keep only the last one - */ - if (__team_option_inst_tmp_find(&opt_inst_list, - opt_inst)) - continue; - list_add(&opt_inst->tmp_list, &opt_inst_list); } if (!opt_found) { err = -ENOENT; goto team_put; } - } - err = team_nl_send_event_options_get(team, &opt_inst_list); + err = team_nl_send_event_options_get(team, &opt_inst_list); + if (err) + break; + } team_put: team_nl_team_put(team); From 67b462305b2345a8b673aa69b2d4c1f4ce554364 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 7 Feb 2019 18:36:11 +0800 Subject: [PATCH 2031/2608] sit: check if IPv6 enabled before calling ip6_err_gen_icmpv6_unreach() [ Upstream commit 173656accaf583698bac3f9e269884ba60d51ef4 ] If we disabled IPv6 from the kernel command line (ipv6.disable=1), we should not call ip6_err_gen_icmpv6_unreach(). This: ip link add sit1 type sit local 192.0.2.1 remote 192.0.2.2 ttl 1 ip link set sit1 up ip addr add 198.51.100.1/24 dev sit1 ping 198.51.100.2 if IPv6 is disabled at boot time, will crash the kernel. v2: there's no need to use in6_dev_get(), use __in6_dev_get() instead, as we only need to check that idev exists and we are under rcu_read_lock() (from netif_receive_skb_internal()). Reported-by: Jianlin Shi Fixes: ca15a078bd90 ("sit: generate icmpv6 error when receiving icmpv4 error") Cc: Oussama Ghorbel Signed-off-by: Hangbin Liu Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/sit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 5d00a38cd1cb..2e55f9894548 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -540,7 +540,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info) } err = 0; - if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) + if (__in6_dev_get(skb->dev) && + !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) goto out; if (t->parms.iph.daddr == 0) From fde4151ca8c2b7673769a6e150115b25f414173e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 11 Feb 2019 18:04:17 +0200 Subject: [PATCH 2032/2608] net/mlx4_en: Force CHECKSUM_NONE for short ethernet frames [ Upstream commit 29dded89e80e3fff61efb34f07a8a3fba3ea146d ] When an ethernet frame is padded to meet the minimum ethernet frame size, the padding octets are not covered by the hardware checksum. Fortunately the padding octets are usually zero's, which don't affect checksum. However, it is not guaranteed. For example, switches might choose to make other use of these octets. This repeatedly causes kernel hardware checksum fault. Prior to the cited commit below, skb checksum was forced to be CHECKSUM_NONE when padding is detected. After it, we need to keep skb->csum updated. However, fixing up CHECKSUM_COMPLETE requires to verify and parse IP headers, it does not worth the effort as the packets are so small that CHECKSUM_COMPLETE has no significant advantage. Future work: when reporting checksum complete is not an option for IP non-TCP/UDP packets, we can actually fallback to report checksum unnecessary, by looking at cqe IPOK bit. Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends") Cc: Eric Dumazet Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index ab2a9dbb46c7..8fcf9dd42740 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -623,13 +623,27 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, return 0; } #endif + +#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) + static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, netdev_features_t dev_features) { __wsum hw_checksum = 0; + void *hdr; - void *hdr = (u8 *)va + sizeof(struct ethhdr); + /* CQE csum doesn't cover padding octets in short ethernet + * frames. And the pad field is appended prior to calculating + * and appending the FCS field. + * + * Detecting these padded frames requires to verify and parse + * IP headers, so we simply force all those small frames to skip + * checksum complete. + */ + if (short_frame(skb->len)) + return -EINVAL; + hdr = (u8 *)va + sizeof(struct ethhdr); hw_checksum = csum_unfold((__force __sum16)cqe->checksum); if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK) && @@ -817,6 +831,11 @@ xdp_drop_no_cnt: skb_record_rx_queue(skb, cq_ring); if (likely(dev->features & NETIF_F_RXCSUM)) { + /* TODO: For IP non TCP/UDP packets when csum complete is + * not an option (not supported or any other reason) we can + * actually check cqe IPOK status bit and report + * CHECKSUM_UNNECESSARY rather than CHECKSUM_NONE + */ if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP)) { if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && From 3daca16bdddef8b84cb79f4327c81fb2af2d2cb3 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sat, 9 Feb 2019 13:35:52 +0300 Subject: [PATCH 2033/2608] inet_diag: fix reporting cgroup classid and fallback to priority [ Upstream commit 1ec17dbd90f8b638f41ee650558609c1af63dfa0 ] Field idiag_ext in struct inet_diag_req_v2 used as bitmap of requested extensions has only 8 bits. Thus extensions starting from DCTCPINFO cannot be requested directly. Some of them included into response unconditionally or hook into some of lower 8 bits. Extension INET_DIAG_CLASS_ID has not way to request from the beginning. This patch bundle it with INET_DIAG_TCLASS (ipv6 tos), fixes space reservation, and documents behavior for other extensions. Also this patch adds fallback to reporting socket priority. This filed is more widely used for traffic classification because ipv4 sockets automatically maps TOS to priority and default qdisc pfifo_fast knows about that. But priority could be changed via setsockopt SO_PRIORITY so INET_DIAG_TOS isn't enough for predicting class. Also cgroup2 obsoletes net_cls classid (it always zero), but we cannot reuse this field for reporting cgroup2 id because it is 64-bit (ino+gen). So, after this patch INET_DIAG_CLASS_ID will report socket priority for most common setup when net_cls isn't set and/or cgroup2 in use. Fixes: 0888e372c37f ("net: inet: diag: expose sockets cgroup classid") Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/inet_diag.h | 16 +++++++++++----- net/ipv4/inet_diag.c | 10 +++++++++- net/sctp/sctp_diag.c | 1 + 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 817d807e9481..f7df51ffd2a4 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -135,15 +135,21 @@ enum { INET_DIAG_TCLASS, INET_DIAG_SKMEMINFO, INET_DIAG_SHUTDOWN, - INET_DIAG_DCTCPINFO, - INET_DIAG_PROTOCOL, /* response attribute only */ + + /* + * Next extenstions cannot be requested in struct inet_diag_req_v2: + * its field idiag_ext has only 8 bits. + */ + + INET_DIAG_DCTCPINFO, /* request as INET_DIAG_VEGASINFO */ + INET_DIAG_PROTOCOL, /* response attribute only */ INET_DIAG_SKV6ONLY, INET_DIAG_LOCALS, INET_DIAG_PEERS, INET_DIAG_PAD, - INET_DIAG_MARK, - INET_DIAG_BBRINFO, - INET_DIAG_CLASS_ID, + INET_DIAG_MARK, /* only with CAP_NET_ADMIN */ + INET_DIAG_BBRINFO, /* request as INET_DIAG_VEGASINFO */ + INET_DIAG_CLASS_ID, /* request as INET_DIAG_TCLASS */ INET_DIAG_MD5SIG, __INET_DIAG_MAX, }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 857ec3dbb742..33edccfebc30 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -109,6 +109,7 @@ static size_t inet_sk_attr_size(struct sock *sk, + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) @@ -288,12 +289,19 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } - if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) { + if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || + ext & (1 << (INET_DIAG_TCLASS - 1))) { u32 classid = 0; #ifdef CONFIG_SOCK_CGROUP_DATA classid = sock_cgroup_classid(&sk->sk_cgrp_data); #endif + /* Fallback to socket priority if class id isn't set. + * Classful qdiscs use it as direct reference to class. + * For cgroup2 classid is always zero. + */ + if (!classid) + classid = sk->sk_priority; if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) goto errout; diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index a72a7d925d46..75274a60b77a 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -225,6 +225,7 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc) + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(addrlen * asoc->peer.transport_count) + nla_total_size(addrlen * addrcnt) + nla_total_size(sizeof(struct inet_diag_meminfo)) From 4040907e22d2441574871ea35befa4e3d6ec8028 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 30 Jan 2019 14:05:55 -0800 Subject: [PATCH 2034/2608] RDMA/srp: Rework SCSI device reset handling commit 48396e80fb6526ea5ed267bd84f028bae56d2f9e upstream. Since .scsi_done() must only be called after scsi_queue_rq() has finished, make sure that the SRP initiator driver does not call .scsi_done() while scsi_queue_rq() is in progress. Although invoking sg_reset -d while I/O is in progress works fine with kernel v4.20 and before, that is not the case with kernel v5.0-rc1. This patch avoids that the following crash is triggered with kernel v5.0-rc1: BUG: unable to handle kernel NULL pointer dereference at 0000000000000138 CPU: 0 PID: 360 Comm: kworker/0:1H Tainted: G B 5.0.0-rc1-dbg+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Workqueue: kblockd blk_mq_run_work_fn RIP: 0010:blk_mq_dispatch_rq_list+0x116/0xb10 Call Trace: blk_mq_sched_dispatch_requests+0x2f7/0x300 __blk_mq_run_hw_queue+0xd6/0x180 blk_mq_run_work_fn+0x27/0x30 process_one_work+0x4f1/0xa20 worker_thread+0x67/0x5b0 kthread+0x1cf/0x1f0 ret_from_fork+0x24/0x30 Cc: Fixes: 94a9174c630c ("IB/srp: reduce lock coverage of command completion") Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index ade98c234dcb..3f5b5893792c 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2669,7 +2669,6 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) { struct srp_target_port *target = host_to_target(scmnd->device->host); struct srp_rdma_ch *ch; - int i, j; u8 status; shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); @@ -2681,15 +2680,6 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) if (status) return FAILED; - for (i = 0; i < target->ch_count; i++) { - ch = &target->ch[i]; - for (j = 0; j < target->req_ring_size; ++j) { - struct srp_request *req = &ch->req_ring[j]; - - srp_finish_req(ch, req, scmnd->device, DID_RESET << 16); - } - } - return SUCCESS; } From 56a682bde39c5f16135ef513d062ace480ca679e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 20 Feb 2019 13:32:11 +0000 Subject: [PATCH 2035/2608] KEYS: user: Align the payload buffer commit cc1780fc42c76c705dd07ea123f1143dc5057630 upstream. Align the payload of "user" and "logon" keys so that users of the keyrings service can access it as a struct that requires more than 2-byte alignment. fscrypt currently does this which results in the read of fscrypt_key::size being misaligned as it needs 4-byte alignment. Align to __alignof__(u64) rather than __alignof__(long) since in the future it's conceivable that people would use structs beginning with u64, which on some platforms would require more than 'long' alignment. Reported-by: Aaro Koskinen Fixes: 2aa349f6e37c ("[PATCH] Keys: Export user-defined keyring operations") Fixes: 88bd6ccdcdd6 ("ext4 crypto: add encryption key management facilities") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Tested-by: Aaro Koskinen Signed-off-by: David Howells Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- include/keys/user-type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/keys/user-type.h b/include/keys/user-type.h index e098cbe27db5..12babe991594 100644 --- a/include/keys/user-type.h +++ b/include/keys/user-type.h @@ -31,7 +31,7 @@ struct user_key_payload { struct rcu_head rcu; /* RCU destructor */ unsigned short datalen; /* length of this data */ - char data[0]; /* actual data */ + char data[0] __aligned(__alignof__(u64)); /* actual data */ }; extern struct key_type key_type_user; From 50d039d91d1007a1799b036763eed07e17feb75b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 22 Feb 2019 15:36:18 +0000 Subject: [PATCH 2036/2608] KEYS: always initialize keyring_index_key::desc_len commit ede0fa98a900e657d1fcd80b50920efc896c1a4c upstream. syzbot hit the 'BUG_ON(index_key->desc_len == 0);' in __key_link_begin() called from construct_alloc_key() during sys_request_key(), because the length of the key description was never calculated. The problem is that we rely on ->desc_len being initialized by search_process_keyrings(), specifically by search_nested_keyrings(). But, if the process isn't subscribed to any keyrings that never happens. Fix it by always initializing keyring_index_key::desc_len as soon as the description is set, like we already do in some places. The following program reproduces the BUG_ON() when it's run as root and no session keyring has been installed. If it doesn't work, try removing pam_keyinit.so from /etc/pam.d/login and rebooting. #include #include #include int main(void) { int id = add_key("keyring", "syz", NULL, 0, KEY_SPEC_USER_KEYRING); keyctl_setperm(id, KEY_OTH_WRITE); setreuid(5000, 5000); request_key("user", "desc", "", id); } Reported-by: syzbot+ec24e95ea483de0a24da@syzkaller.appspotmail.com Fixes: b2a4df200d57 ("KEYS: Expand the capacity of a keyring") Signed-off-by: Eric Biggers Signed-off-by: David Howells Cc: stable@vger.kernel.org Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- security/keys/keyring.c | 4 +--- security/keys/proc.c | 3 +-- security/keys/request_key.c | 1 + security/keys/request_key_auth.c | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 36f842ec87f0..359b9cba3d0d 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -661,9 +661,6 @@ static bool search_nested_keyrings(struct key *keyring, BUG_ON((ctx->flags & STATE_CHECKS) == 0 || (ctx->flags & STATE_CHECKS) == STATE_CHECKS); - if (ctx->index_key.description) - ctx->index_key.desc_len = strlen(ctx->index_key.description); - /* Check to see if this top-level keyring is what we are looking for * and whether it is valid or not. */ @@ -921,6 +918,7 @@ key_ref_t keyring_search(key_ref_t keyring, struct keyring_search_context ctx = { .index_key.type = type, .index_key.description = description, + .index_key.desc_len = strlen(description), .cred = current_cred(), .match_data.cmp = key_default_cmp, .match_data.raw_data = description, diff --git a/security/keys/proc.c b/security/keys/proc.c index 6d1fcbba1e09..0ee9a36e6815 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -188,8 +188,7 @@ static int proc_keys_show(struct seq_file *m, void *v) int rc; struct keyring_search_context ctx = { - .index_key.type = key->type, - .index_key.description = key->description, + .index_key = key->index_key, .cred = m->file->f_cred, .match_data.cmp = lookup_user_key_possessed, .match_data.raw_data = key, diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 7dc741382154..c707fdbb3429 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -545,6 +545,7 @@ struct key *request_key_and_link(struct key_type *type, struct keyring_search_context ctx = { .index_key.type = type, .index_key.description = description, + .index_key.desc_len = strlen(description), .cred = current_cred(), .match_data.cmp = key_default_cmp, .match_data.raw_data = description, diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 424e1d90412e..6797843154f0 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -246,7 +246,7 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id) struct key *authkey; key_ref_t authkey_ref; - sprintf(description, "%x", target_id); + ctx.index_key.desc_len = sprintf(description, "%x", target_id); authkey_ref = search_process_keyrings(&ctx); From bc423b651a3ea0f8d4abf108b24eb7a130ea5284 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sat, 16 Feb 2019 16:10:39 +0300 Subject: [PATCH 2037/2608] parisc: Fix ptrace syscall number modification commit b7dc5a071ddf69c0350396b203cba32fe5bab510 upstream. Commit 910cd32e552e ("parisc: Fix and enable seccomp filter support") introduced a regression in ptrace-based syscall tampering: when tracer changes syscall number to -1, the kernel fails to initialize %r28 with -ENOSYS and subsequently fails to return the error code of the failed syscall to userspace. This erroneous behaviour could be observed with a simple strace syscall fault injection command which is expected to print something like this: $ strace -a0 -ewrite -einject=write:error=enospc echo hello write(1, "hello\n", 6) = -1 ENOSPC (No space left on device) (INJECTED) write(2, "echo: ", 6) = -1 ENOSPC (No space left on device) (INJECTED) write(2, "write error", 11) = -1 ENOSPC (No space left on device) (INJECTED) write(2, "\n", 1) = -1 ENOSPC (No space left on device) (INJECTED) +++ exited with 1 +++ After commit 910cd32e552ea09caa89cdbe328e468979b030dd it loops printing something like this instead: write(1, "hello\n", 6../strace: Failed to tamper with process 12345: unexpectedly got no error (return value 0, error 0) ) = 0 (INJECTED) This bug was found by strace test suite. Fixes: 910cd32e552e ("parisc: Fix and enable seccomp filter support") Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Dmitry V. Levin Tested-by: Helge Deller Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/ptrace.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 1a2be6e639b5..eca5b2a1c7e1 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -312,15 +312,29 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, long do_syscall_trace_enter(struct pt_regs *regs) { - if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) { + if (test_thread_flag(TIF_SYSCALL_TRACE)) { + int rc = tracehook_report_syscall_entry(regs); + /* - * Tracing decided this syscall should not happen or the - * debugger stored an invalid system call number. Skip - * the system call and the system call restart handling. + * As tracesys_next does not set %r28 to -ENOSYS + * when %r20 is set to -1, initialize it here. */ - regs->gr[20] = -1UL; - goto out; + regs->gr[28] = -ENOSYS; + + if (rc) { + /* + * A nonzero return code from + * tracehook_report_syscall_entry() tells us + * to prevent the syscall execution. Skip + * the syscall call and the syscall restart handling. + * + * Note that the tracer may also just change + * regs->gr[20] to an invalid syscall number, + * that is handled by tracesys_next. + */ + regs->gr[20] = -1UL; + return -1; + } } /* Do the secure computing check after ptrace. */ @@ -344,7 +358,6 @@ long do_syscall_trace_enter(struct pt_regs *regs) regs->gr[24] & 0xffffffff, regs->gr[23] & 0xffffffff); -out: /* * Sign extend the syscall number to 64bit since it may have been * modified by a compat ptrace call From 1f44814198f800319b56945199c82d28ab700d47 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 16 Jan 2019 14:29:50 +0300 Subject: [PATCH 2038/2608] ARCv2: Enable unaligned access in early ASM code commit 252f6e8eae909bc075a1b1e3b9efb095ae4c0b56 upstream. It is currently done in arc_init_IRQ() which might be too late considering gcc 7.3.1 onwards (GNU 2018.03) generates unaligned memory accesses by default Cc: stable@vger.kernel.org #4.4+ Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta [vgupta: rewrote changelog] Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/head.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 8b90d25a15cc..26e33a8b2d18 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -17,6 +17,7 @@ #include #include #include +#include .macro CPU_EARLY_SETUP @@ -47,6 +48,15 @@ sr r5, [ARC_REG_DC_CTRL] 1: + +#ifdef CONFIG_ISA_ARCV2 + ; Unaligned access is disabled at reset, so re-enable early as + ; gcc 7.3.1 (ARC GNU 2018.03) onwards generates unaligned access + ; by default + lr r5, [status32] + bset r5, r5, STATUS_AD_BIT + kflag r5 +#endif .endm .section .init.text, "ax",@progbits From e7264579eb80ef67efd337ffb32b7d57e7b1ea54 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 14 Feb 2019 18:07:44 +0300 Subject: [PATCH 2039/2608] ARC: U-boot: check arguments paranoidly commit a66f2e57bd566240d8b3884eedf503928fbbe557 upstream. Handle U-boot arguments paranoidly: * don't allow to pass unknown tag. * try to use external device tree blob only if corresponding tag (TAG_DTB) is set. * don't check uboot_tag if kernel build with no ARC_UBOOT_SUPPORT. NOTE: If U-boot args are invalid we skip them and try to use embedded device tree blob. We can't panic on invalid U-boot args as we really pass invalid args due to bug in U-boot code. This happens if we don't provide external DTB to U-boot and don't set 'bootargs' U-boot environment variable (which is default case at least for HSDK board) In that case we will pass {r0 = 1 (bootargs in r2); r1 = 0; r2 = 0;} to linux which is invalid. While I'm at it refactor U-boot arguments handling code. Cc: stable@vger.kernel.org Tested-by: Corentin LABBE Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/head.S | 4 +- arch/arc/kernel/setup.c | 101 +++++++++++++++++++++++++++------------- 2 files changed, 71 insertions(+), 34 deletions(-) diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 26e33a8b2d18..1f945d0f40da 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -103,9 +103,9 @@ ENTRY(stext) #ifdef CONFIG_ARC_UBOOT_SUPPORT ; Uboot - kernel ABI ; r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2 - ; r1 = magic number (board identity, unused as of now + ; r1 = magic number (always zero as of now) ; r2 = pointer to uboot provided cmdline or external DTB in mem - ; These are handled later in setup_arch() + ; These are handled later in handle_uboot_args() st r0, [@uboot_tag] st r2, [@uboot_arg] #endif diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index fb83844daeea..709649e5f9bc 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -414,43 +414,80 @@ void setup_processor(void) arc_chk_core_config(); } -static inline int is_kernel(unsigned long addr) +static inline bool uboot_arg_invalid(unsigned long addr) { - if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) - return 1; - return 0; + /* + * Check that it is a untranslated address (although MMU is not enabled + * yet, it being a high address ensures this is not by fluke) + */ + if (addr < PAGE_OFFSET) + return true; + + /* Check that address doesn't clobber resident kernel image */ + return addr >= (unsigned long)_stext && addr <= (unsigned long)_end; +} + +#define IGNORE_ARGS "Ignore U-boot args: " + +/* uboot_tag values for U-boot - kernel ABI revision 0; see head.S */ +#define UBOOT_TAG_NONE 0 +#define UBOOT_TAG_CMDLINE 1 +#define UBOOT_TAG_DTB 2 + +void __init handle_uboot_args(void) +{ + bool use_embedded_dtb = true; + bool append_cmdline = false; + +#ifdef CONFIG_ARC_UBOOT_SUPPORT + /* check that we know this tag */ + if (uboot_tag != UBOOT_TAG_NONE && + uboot_tag != UBOOT_TAG_CMDLINE && + uboot_tag != UBOOT_TAG_DTB) { + pr_warn(IGNORE_ARGS "invalid uboot tag: '%08x'\n", uboot_tag); + goto ignore_uboot_args; + } + + if (uboot_tag != UBOOT_TAG_NONE && + uboot_arg_invalid((unsigned long)uboot_arg)) { + pr_warn(IGNORE_ARGS "invalid uboot arg: '%px'\n", uboot_arg); + goto ignore_uboot_args; + } + + /* see if U-boot passed an external Device Tree blob */ + if (uboot_tag == UBOOT_TAG_DTB) { + machine_desc = setup_machine_fdt((void *)uboot_arg); + + /* external Device Tree blob is invalid - use embedded one */ + use_embedded_dtb = !machine_desc; + } + + if (uboot_tag == UBOOT_TAG_CMDLINE) + append_cmdline = true; + +ignore_uboot_args: +#endif + + if (use_embedded_dtb) { + machine_desc = setup_machine_fdt(__dtb_start); + if (!machine_desc) + panic("Embedded DT invalid\n"); + } + + /* + * NOTE: @boot_command_line is populated by setup_machine_fdt() so this + * append processing can only happen after. + */ + if (append_cmdline) { + /* Ensure a whitespace between the 2 cmdlines */ + strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); + strlcat(boot_command_line, uboot_arg, COMMAND_LINE_SIZE); + } } void __init setup_arch(char **cmdline_p) { -#ifdef CONFIG_ARC_UBOOT_SUPPORT - /* make sure that uboot passed pointer to cmdline/dtb is valid */ - if (uboot_tag && is_kernel((unsigned long)uboot_arg)) - panic("Invalid uboot arg\n"); - - /* See if u-boot passed an external Device Tree blob */ - machine_desc = setup_machine_fdt(uboot_arg); /* uboot_tag == 2 */ - if (!machine_desc) -#endif - { - /* No, so try the embedded one */ - machine_desc = setup_machine_fdt(__dtb_start); - if (!machine_desc) - panic("Embedded DT invalid\n"); - - /* - * If we are here, it is established that @uboot_arg didn't - * point to DT blob. Instead if u-boot says it is cmdline, - * append to embedded DT cmdline. - * setup_machine_fdt() would have populated @boot_command_line - */ - if (uboot_tag == 1) { - /* Ensure a whitespace between the 2 cmdlines */ - strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); - strlcat(boot_command_line, uboot_arg, - COMMAND_LINE_SIZE); - } - } + handle_uboot_args(); /* Save unparsed command line copy for /proc/cmdline */ *cmdline_p = boot_command_line; From 238209c654d50e74deed8ec1fdb3ff61f4d0ea32 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Fri, 8 Feb 2019 13:55:19 +0300 Subject: [PATCH 2040/2608] ARC: define ARCH_SLAB_MINALIGN = 8 commit b6835ea77729e7faf4656ca637ba53f42b8ee3fd upstream. The default value of ARCH_SLAB_MINALIGN in "include/linux/slab.h" is "__alignof__(unsigned long long)" which for ARC unexpectedly turns out to be 4. This is not a compiler bug, but as defined by ARC ABI [1] Thus slab allocator would allocate a struct which is 32-bit aligned, which is generally OK even if struct has long long members. There was however potetial problem when it had any atomic64_t which use LLOCKD/SCONDD instructions which are required by ISA to take 64-bit addresses. This is the problem we ran into [ 4.015732] EXT4-fs (mmcblk0p2): re-mounted. Opts: (null) [ 4.167881] Misaligned Access [ 4.172356] Path: /bin/busybox.nosuid [ 4.176004] CPU: 2 PID: 171 Comm: rm Not tainted 4.19.14-yocto-standard #1 [ 4.182851] [ 4.182851] [ECR ]: 0x000d0000 => Check Programmer's Manual [ 4.190061] [EFA ]: 0xbeaec3fc [ 4.190061] [BLINK ]: ext4_delete_entry+0x210/0x234 [ 4.190061] [ERET ]: ext4_delete_entry+0x13e/0x234 [ 4.202985] [STAT32]: 0x80080002 : IE K [ 4.207236] BTA: 0x9009329c SP: 0xbe5b1ec4 FP: 0x00000000 [ 4.212790] LPS: 0x9074b118 LPE: 0x9074b120 LPC: 0x00000000 [ 4.218348] r00: 0x00000040 r01: 0x00000021 r02: 0x00000001 ... ... [ 4.270510] Stack Trace: [ 4.274510] ext4_delete_entry+0x13e/0x234 [ 4.278695] ext4_rmdir+0xe0/0x238 [ 4.282187] vfs_rmdir+0x50/0xf0 [ 4.285492] do_rmdir+0x9e/0x154 [ 4.288802] EV_Trap+0x110/0x114 The fix is to make sure slab allocations are 64-bit aligned. Do note that atomic64_t is __attribute__((aligned(8)) which means gcc does generate 64-bit aligned references, relative to beginning of container struct. However the issue is if the container itself is not 64-bit aligned, atomic64_t ends up unaligned which is what this patch ensures. [1] https://github.com/foss-for-synopsys-dwc-arc-processors/toolchain/wiki/files/ARCv2_ABI.pdf Signed-off-by: Alexey Brodkin Cc: # 4.8+ Signed-off-by: Vineet Gupta [vgupta: reworked changelog, added dependency on LL64+LLSC] Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/cache.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index ff7d3232764a..db681cf4959c 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -52,6 +52,17 @@ #define cache_line_size() SMP_CACHE_BYTES #define ARCH_DMA_MINALIGN SMP_CACHE_BYTES +/* + * Make sure slab-allocated buffers are 64-bit aligned when atomic64_t uses + * ARCv2 64-bit atomics (LLOCKD/SCONDD). This guarantess runtime 64-bit + * alignment for any atomic64_t embedded in buffer. + * Default ARCH_SLAB_MINALIGN is __alignof__(long long) which has a relaxed + * value of 4 (and not 8) in ARC ABI. + */ +#if defined(CONFIG_ARC_HAS_LL64) && defined(CONFIG_ARC_HAS_LLSC) +#define ARCH_SLAB_MINALIGN 8 +#endif + extern void arc_cache_init(void); extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); extern void read_decode_cache_bcr(void); From 6964c1d5062de1fb26864f59b41a5ad00f641db9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 15 Feb 2019 12:30:19 +0000 Subject: [PATCH 2041/2608] drm/i915/fbdev: Actually configure untiled displays commit d179b88deb3bf6fed4991a31fd6f0f2cad21fab5 upstream. If we skipped all the connectors that were not part of a tile, we would leave conn_seq=0 and conn_configured=0, convincing ourselves that we had stagnated in our configuration attempts. Avoid this situation by starting conn_seq=ALL_CONNECTORS, and repeating until we find no more connectors to configure. Fixes: 754a76591b12 ("drm/i915/fbdev: Stop repeating tile configuration on stagnation") Reported-by: Maarten Lankhorst Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190215123019.32283-1-chris@chris-wilson.co.uk Cc: # v3.19+ (cherry picked from commit d9b308b1f8a1acc0c3279f443d4fe0f9f663252e) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_fbdev.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index da2d309574ba..14eb8a064562 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -326,8 +326,8 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, bool *enabled, int width, int height) { struct drm_i915_private *dev_priv = to_i915(fb_helper->dev); - unsigned long conn_configured, conn_seq, mask; unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG); + unsigned long conn_configured, conn_seq; int i, j; bool *save_enabled; bool fallback = true, ret = true; @@ -345,10 +345,9 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, drm_modeset_backoff(&ctx); memcpy(save_enabled, enabled, count); - mask = GENMASK(count - 1, 0); + conn_seq = GENMASK(count - 1, 0); conn_configured = 0; retry: - conn_seq = conn_configured; for (i = 0; i < count; i++) { struct drm_fb_helper_connector *fb_conn; struct drm_connector *connector; @@ -361,7 +360,8 @@ retry: if (conn_configured & BIT(i)) continue; - if (conn_seq == 0 && !connector->has_tile) + /* First pass, only consider tiled connectors */ + if (conn_seq == GENMASK(count - 1, 0) && !connector->has_tile) continue; if (connector->status == connector_status_connected) @@ -465,8 +465,10 @@ retry: conn_configured |= BIT(i); } - if ((conn_configured & mask) != mask && conn_configured != conn_seq) + if (conn_configured != conn_seq) { /* repeat until no more are found */ + conn_seq = conn_configured; goto retry; + } /* * If the BIOS didn't enable everything it could, fall back to have the From dac7d4432b13d6b58923c1178b786bcda37c9e9e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 15 Feb 2019 12:15:47 -0500 Subject: [PATCH 2042/2608] net: validate untrusted gso packets without csum offload commit d5be7f632bad0f489879eed0ff4b99bd7fe0b74c upstream. Syzkaller again found a path to a kernel crash through bad gso input. By building an excessively large packet to cause an skb field to wrap. If VIRTIO_NET_HDR_F_NEEDS_CSUM was set this would have been dropped in skb_partial_csum_set. GSO packets that do not set checksum offload are suspicious and rare. Most callers of virtio_net_hdr_to_skb already pass them to skb_probe_transport_header. Move that test forward, change it to detect parse failure and drop packets on failure as those cleary are not one of the legitimate VIRTIO_NET_HDR_GSO types. Fixes: bfd5f4a3d605 ("packet: Add GSO/csum offload support.") Fixes: f43798c27684 ("tun: Allow GSO using virtio_net_hdr") Reported-by: syzbot Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 2 +- include/linux/virtio_net.h | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 50a4a5968f3a..3172e14d9398 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2377,7 +2377,7 @@ static inline void skb_probe_transport_header(struct sk_buff *skb, return; else if (skb_flow_dissect_flow_keys(skb, &keys, 0)) skb_set_transport_header(skb, keys.control.thoff); - else + else if (offset_hint >= 0) skb_set_transport_header(skb, offset_hint); } diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index cb462f9ab7dd..71f2394abbf7 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -57,6 +57,15 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb_partial_csum_set(skb, start, off)) return -EINVAL; + } else { + /* gso packets without NEEDS_CSUM do not set transport_offset. + * probe and drop if does not match one of the above types. + */ + if (gso_type) { + skb_probe_transport_header(skb, -1); + if (!skb_transport_header_was_set(skb)) + return -EINVAL; + } } if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { From d996573ebd5c528c613e90b7e7daa961d58fa16e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 18 Feb 2019 23:37:12 -0500 Subject: [PATCH 2043/2608] net: avoid false positives in untrusted gso validation commit 9e8db5913264d3967b93c765a6a9e464d9c473db upstream. GSO packets with vnet_hdr must conform to a small set of gso_types. The below commit uses flow dissection to drop packets that do not. But it has false positives when the skb is not fully initialized. Dissection needs skb->protocol and skb->network_header. Infer skb->protocol from gso_type as the two must agree. SKB_GSO_UDP can use both ipv4 and ipv6, so try both. Exclude callers for which network header offset is not known. Fixes: d5be7f632bad ("net: validate untrusted gso packets without csum offload") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/virtio_net.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 71f2394abbf7..e0348cb0a1dd 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -61,10 +61,20 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, /* gso packets without NEEDS_CSUM do not set transport_offset. * probe and drop if does not match one of the above types. */ - if (gso_type) { + if (gso_type && skb->network_header) { + if (!skb->protocol) + virtio_net_hdr_set_proto(skb, hdr); +retry: skb_probe_transport_header(skb, -1); - if (!skb_transport_header_was_set(skb)) + if (!skb_transport_header_was_set(skb)) { + /* UFO does not specify ipv4 or 6: try both */ + if (gso_type & SKB_GSO_UDP && + skb->protocol == htons(ETH_P_IP)) { + skb->protocol = htons(ETH_P_IPV6); + goto retry; + } return -EINVAL; + } } } From 9ac5e6507acbe06a25de03ea9f3089c2f662a72f Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 22 Feb 2019 21:22:32 +0800 Subject: [PATCH 2044/2608] Revert "bridge: do not add port to router list when receives query with source 0.0.0.0" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 278e2148c07559dd4ad8602f22366d61eb2ee7b7 upstream. This reverts commit 5a2de63fd1a5 ("bridge: do not add port to router list when receives query with source 0.0.0.0") and commit 0fe5119e267f ("net: bridge: remove ipv6 zero address check in mcast queries") The reason is RFC 4541 is not a standard but suggestive. Currently we will elect 0.0.0.0 as Querier if there is no ip address configured on bridge. If we do not add the port which recives query with source 0.0.0.0 to router list, the IGMP reports will not be about to forward to Querier, IGMP data will also not be able to forward to dest. As Nikolay suggested, revert this change first and add a boolopt api to disable none-zero election in future if needed. Reported-by: Linus Lüssing Reported-by: Sebastian Gottschall Fixes: 5a2de63fd1a5 ("bridge: do not add port to router list when receives query with source 0.0.0.0") Fixes: 0fe5119e267f ("net: bridge: remove ipv6 zero address check in mcast queries") Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_multicast.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a813dfe2dc2c..8dc5c8d69bcd 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1390,14 +1390,7 @@ static void br_multicast_query_received(struct net_bridge *br, return; br_multicast_update_query_timer(br, query, max_delay); - - /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules, - * the arrival port for IGMP Queries where the source address - * is 0.0.0.0 should not be added to router port list. - */ - if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) || - saddr->proto == htons(ETH_P_IPV6)) - br_multicast_mark_router(br, port); + br_multicast_mark_router(br, port); } static int br_ip4_multicast_query(struct net_bridge *br, From c92ba8820aca58e38dceeec554904307ac2e0851 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 15 Feb 2019 12:50:24 +0100 Subject: [PATCH 2045/2608] netfilter: nf_tables: fix flush after rule deletion in the same batch commit 23b7ca4f745f21c2b9cfcb67fdd33733b3ae7e66 upstream. Flush after rule deletion bogusly hits -ENOENT. Skip rules that have been already from nft_delrule_by_chain() which is always called from the flush path. Fixes: cf9dc09d0949 ("netfilter: nf_tables: fix missing rules flushing per table") Reported-by: Phil Sutter Acked-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index bf26e27ca456..c445d57e3a5b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -304,6 +304,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx) int err; list_for_each_entry(rule, &ctx->chain->rules, list) { + if (!nft_is_active_next(ctx->net, rule)) + continue; + err = nft_delrule(ctx, rule); if (err < 0) return err; From 7cfbf4cefbc9374abe54b4838a737e348e907337 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 13 Feb 2019 13:03:53 +0100 Subject: [PATCH 2046/2608] netfilter: nft_compat: use-after-free when deleting targets commit 753c111f655e38bbd52fc01321266633f022ebe2 upstream. Fetch pointer to module before target object is released. Fixes: 29e3880109e3 ("netfilter: nf_tables: fix use-after-free when deleting compat expressions") Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_compat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7533c2fd6b76..7344ec7fff2a 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -277,6 +277,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct xt_target *target = expr->ops->data; void *info = nft_expr_priv(expr); + struct module *me = target->me; struct xt_tgdtor_param par; par.net = ctx->net; @@ -287,7 +288,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) par.target->destroy(&par); if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) - module_put(target->me); + module_put(me); } static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) From 8c2183d845a2ba36eefd9df8d3e975932ce5fd6b Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Mon, 21 Jan 2019 18:45:27 +0800 Subject: [PATCH 2047/2608] netfilter: ipv6: Don't preserve original oif for loopback address commit 15df03c661cb362366ecfc3a21820cb934f3e4ca upstream. Commit 508b09046c0f ("netfilter: ipv6: Preserve link scope traffic original oif") made ip6_route_me_harder() keep the original oif for link-local and multicast packets. However, it also affected packets for the loopback address because it used rt6_need_strict(). REDIRECT rules in the OUTPUT chain rewrite the destination to loopback address; thus its oif should not be preserved. This commit fixes the bug that redirected local packets are being dropped. Actually the packet was not exactly dropped; Instead it was sent out to the original oif rather than lo. When a packet with daddr ::1 is sent to the router, it is effectively dropped. Fixes: 508b09046c0f ("netfilter: ipv6: Preserve link scope traffic original oif") Signed-off-by: Eli Cooper Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv6/netfilter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 1f8b1a433b5d..a776fbc3b2a9 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -24,9 +24,11 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) struct sock *sk = sk_to_full_sk(skb->sk); unsigned int hh_len; struct dst_entry *dst; + int strict = (ipv6_addr_type(&iph->daddr) & + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); struct flowi6 fl6 = { .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if : - rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0, + strict ? skb_dst(skb)->dev->ifindex : 0, .flowi6_mark = skb->mark, .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, From f6de4ca8627b74fca198df8c0c02c52acb8231aa Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 31 Oct 2018 17:50:21 -0700 Subject: [PATCH 2048/2608] pinctrl: max77620: Use define directive for max77620_pinconf_param values commit 1f60652dd586d1b3eee7c4602892a97a62fa937a upstream. Clang warns when one enumerated type is implicitly converted to another: drivers/pinctrl/pinctrl-max77620.c:56:12: warning: implicit conversion from enumeration type 'enum max77620_pinconf_param' to different enumeration type 'enum pin_config_param' [-Wenum-conversion] .param = MAX77620_ACTIVE_FPS_SOURCE, ^~~~~~~~~~~~~~~~~~~~~~~~~~ It is expected that pinctrl drivers can extend pin_config_param because of the gap between PIN_CONFIG_END and PIN_CONFIG_MAX so this conversion isn't an issue. Most drivers that take advantage of this define the PIN_CONFIG variables as constants, rather than enumerated values. Do the same thing here so that Clang no longer warns. Link: https://github.com/ClangBuiltLinux/linux/issues/139 Signed-off-by: Nathan Chancellor Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-max77620.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/pinctrl-max77620.c b/drivers/pinctrl/pinctrl-max77620.c index b8d2180a2bea..baef91aaf9b8 100644 --- a/drivers/pinctrl/pinctrl-max77620.c +++ b/drivers/pinctrl/pinctrl-max77620.c @@ -34,14 +34,12 @@ enum max77620_pin_ppdrv { MAX77620_PIN_PP_DRV, }; -enum max77620_pinconf_param { - MAX77620_ACTIVE_FPS_SOURCE = PIN_CONFIG_END + 1, - MAX77620_ACTIVE_FPS_POWER_ON_SLOTS, - MAX77620_ACTIVE_FPS_POWER_DOWN_SLOTS, - MAX77620_SUSPEND_FPS_SOURCE, - MAX77620_SUSPEND_FPS_POWER_ON_SLOTS, - MAX77620_SUSPEND_FPS_POWER_DOWN_SLOTS, -}; +#define MAX77620_ACTIVE_FPS_SOURCE (PIN_CONFIG_END + 1) +#define MAX77620_ACTIVE_FPS_POWER_ON_SLOTS (PIN_CONFIG_END + 2) +#define MAX77620_ACTIVE_FPS_POWER_DOWN_SLOTS (PIN_CONFIG_END + 3) +#define MAX77620_SUSPEND_FPS_SOURCE (PIN_CONFIG_END + 4) +#define MAX77620_SUSPEND_FPS_POWER_ON_SLOTS (PIN_CONFIG_END + 5) +#define MAX77620_SUSPEND_FPS_POWER_DOWN_SLOTS (PIN_CONFIG_END + 6) struct max77620_pin_function { const char *name; From cc96cdc8e6532f2b3c934318e65901c599f2175c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 23 Nov 2017 11:10:47 +0000 Subject: [PATCH 2049/2608] phy: tegra: remove redundant self assignment of 'map' commit a0dd6773038f3fd2bd1b4f7ec193887cffc49046 upstream. The assignment of map to itself is redundant and can be removed. Detected with Coccinelle. Signed-off-by: Colin Ian King Signed-off-by: Kishon Vijay Abraham I Cc: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- drivers/phy/tegra/xusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 63e916d4d069..11aa5902a9ac 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -418,7 +418,7 @@ tegra_xusb_port_find_lane(struct tegra_xusb_port *port, { struct tegra_xusb_lane *lane, *match = ERR_PTR(-ENODEV); - for (map = map; map->type; map++) { + for (; map->type; map++) { if (port->index != map->port) continue; From 81bafd09bb8d98370d8d5d21a1b3bbc48f3dcf28 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 30 Oct 2017 11:08:16 -0700 Subject: [PATCH 2050/2608] sched/sysctl: Fix attributes of some extern declarations commit a9903f04e0a4ea522d959c2f287cdf0ab029e324 upstream. The definition of sysctl_sched_migration_cost, sysctl_sched_nr_migrate and sysctl_sched_time_avg includes the attribute const_debug. This attribute is not part of the extern declaration of these variables in include/linux/sched/sysctl.h, while it is in kernel/sched/sched.h, and as a result Clang generates warnings like this: kernel/sched/sched.h:1618:33: warning: section attribute is specified on redeclared variable [-Wsection] extern const_debug unsigned int sysctl_sched_time_avg; ^ ./include/linux/sched/sysctl.h:42:21: note: previous declaration is here extern unsigned int sysctl_sched_time_avg; The header only declares the variables when CONFIG_SCHED_DEBUG is defined, therefore it is not necessary to duplicate the definition of const_debug. Instead we can use the attribute __read_mostly, which is the expansion of const_debug when CONFIG_SCHED_DEBUG=y is set. Signed-off-by: Matthias Kaehlcke Reviewed-by: Nick Desaulniers Cc: Douglas Anderson Cc: Guenter Roeck Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Shile Zhang Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171030180816.170850-1-mka@chromium.org Signed-off-by: Ingo Molnar Cc: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- include/linux/sched/sysctl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index d6a18a3839cc..1c1a1512ec55 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -38,9 +38,9 @@ extern unsigned int sysctl_numa_balancing_scan_period_max; extern unsigned int sysctl_numa_balancing_scan_size; #ifdef CONFIG_SCHED_DEBUG -extern unsigned int sysctl_sched_migration_cost; -extern unsigned int sysctl_sched_nr_migrate; -extern unsigned int sysctl_sched_time_avg; +extern __read_mostly unsigned int sysctl_sched_migration_cost; +extern __read_mostly unsigned int sysctl_sched_nr_migrate; +extern __read_mostly unsigned int sysctl_sched_time_avg; int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, From 418d77caf233c1fa01ec1c9c880b8af855cfd02e Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 11 Feb 2019 15:04:24 +0000 Subject: [PATCH 2051/2608] net: phylink: avoid resolving link state too early commit 87454b6edc1b0143fdb3d9853285477e95af74a4 upstream. During testing on Armada 388 platforms, it was found with a certain module configuration that it was possible to trigger a kernel oops during the module load process, caused by the phylink resolver being triggered for a currently disabled interface. This problem was introduced by changing the way the SFP registration works, which now can result in the sfp link down notification being called during phylink_create(). Fixes: b5bfc21af5cb ("net: sfp: do not probe SFP module before we're attached") Signed-off-by: Russell King Signed-off-by: David S. Miller Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phylink.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 70ce7da26d1f..afe335583832 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -487,6 +487,17 @@ static void phylink_run_resolve(struct phylink *pl) queue_work(system_power_efficient_wq, &pl->resolve); } +static void phylink_run_resolve_and_disable(struct phylink *pl, int bit) +{ + unsigned long state = pl->phylink_disable_state; + + set_bit(bit, &pl->phylink_disable_state); + if (state == 0) { + queue_work(system_power_efficient_wq, &pl->resolve); + flush_work(&pl->resolve); + } +} + static const struct sfp_upstream_ops sfp_phylink_ops; static int phylink_register_sfp(struct phylink *pl, struct device_node *np) @@ -776,9 +787,7 @@ void phylink_stop(struct phylink *pl) if (pl->sfp_bus) sfp_upstream_stop(pl->sfp_bus); - set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state); - queue_work(system_power_efficient_wq, &pl->resolve); - flush_work(&pl->resolve); + phylink_run_resolve_and_disable(pl, PHYLINK_DISABLE_STOPPED); } EXPORT_SYMBOL_GPL(phylink_stop); @@ -1433,9 +1442,7 @@ static void phylink_sfp_link_down(void *upstream) WARN_ON(!lockdep_rtnl_is_held()); - set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state); - queue_work(system_power_efficient_wq, &pl->resolve); - flush_work(&pl->resolve); + phylink_run_resolve_and_disable(pl, PHYLINK_DISABLE_LINK); } static void phylink_sfp_link_up(void *upstream) From 30921fc1e5fcf904f9afddeece1288f5b16ba017 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 27 Feb 2019 10:08:09 +0100 Subject: [PATCH 2052/2608] Linux 4.14.104 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 52d150b1f5ef..967692b8941f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 103 +SUBLEVEL = 104 EXTRAVERSION = NAME = Petit Gorille From d7f9553680b36ca8a0a0bee6695e91634be4f0f2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 27 Feb 2019 14:58:58 +0100 Subject: [PATCH 2053/2608] Revert "loop: Fix double mutex_unlock(&loop_ctl_mutex) in loop_control_ioctl()" This reverts commit d2762edcb6af99fc9322bab0b1d4e71a427760e8 which is commit 628bd85947091830a8c4872adfd5ed1d515a9cf2 upstream. It does not work properly in the 4.14.y tree and causes more problems than it fixes, so revert it. Reported-by: Thomas Lindroth Reported-by: Jan Kara Cc: syzbot Cc: Ming Lei Cc: Tetsuo Handa Cc: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 7910dd8b1d3a..ceb62579eb21 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1996,10 +1996,12 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, break; if (lo->lo_state != Lo_unbound) { ret = -EBUSY; + mutex_unlock(&loop_ctl_mutex); break; } if (atomic_read(&lo->lo_refcnt) > 0) { ret = -EBUSY; + mutex_unlock(&loop_ctl_mutex); break; } lo->lo_disk->private_data = NULL; From 4bc55f93f299d0396d6797c44af34cb2fddad929 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 27 Feb 2019 15:08:29 +0100 Subject: [PATCH 2054/2608] Revert "loop: Get rid of loop_index_mutex" This reverts commit c1e63df4f30c3918476ac9bc594355b0e9629893 which is commit 0a42e99b58a208839626465af194cfe640ef9493 upstream. It does not work properly in the 4.14.y tree and causes more problems than it fixes, so revert it. Reported-by: Thomas Lindroth Reported-by: Jan Kara Cc: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ceb62579eb21..3b1583addacc 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -81,6 +81,7 @@ #include static DEFINE_IDR(loop_index_idr); +static DEFINE_MUTEX(loop_index_mutex); static DEFINE_MUTEX(loop_ctl_mutex); static int max_part; @@ -1618,11 +1619,9 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, static int lo_open(struct block_device *bdev, fmode_t mode) { struct loop_device *lo; - int err; + int err = 0; - err = mutex_lock_killable(&loop_ctl_mutex); - if (err) - return err; + mutex_lock(&loop_index_mutex); lo = bdev->bd_disk->private_data; if (!lo) { err = -ENXIO; @@ -1631,7 +1630,7 @@ static int lo_open(struct block_device *bdev, fmode_t mode) atomic_inc(&lo->lo_refcnt); out: - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&loop_index_mutex); return err; } @@ -1640,11 +1639,12 @@ static void lo_release(struct gendisk *disk, fmode_t mode) struct loop_device *lo; int err; - mutex_lock(&loop_ctl_mutex); + mutex_lock(&loop_index_mutex); lo = disk->private_data; if (atomic_dec_return(&lo->lo_refcnt)) - goto out_unlock; + goto unlock_index; + mutex_lock(&loop_ctl_mutex); if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { /* * In autoclear mode, stop the loop thread @@ -1652,7 +1652,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode) */ err = loop_clr_fd(lo); if (!err) - return; + goto unlock_index; } else if (lo->lo_state == Lo_bound) { /* * Otherwise keep thread (if running) and config, @@ -1662,8 +1662,9 @@ static void lo_release(struct gendisk *disk, fmode_t mode) blk_mq_unfreeze_queue(lo->lo_queue); } -out_unlock: mutex_unlock(&loop_ctl_mutex); +unlock_index: + mutex_unlock(&loop_index_mutex); } static const struct block_device_operations lo_fops = { @@ -1956,7 +1957,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) struct kobject *kobj; int err; - mutex_lock(&loop_ctl_mutex); + mutex_lock(&loop_index_mutex); err = loop_lookup(&lo, MINOR(dev) >> part_shift); if (err < 0) err = loop_add(&lo, MINOR(dev) >> part_shift); @@ -1964,7 +1965,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) kobj = NULL; else kobj = get_disk(lo->lo_disk); - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&loop_index_mutex); *part = 0; return kobj; @@ -1974,13 +1975,9 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, unsigned long parm) { struct loop_device *lo; - int ret; + int ret = -ENOSYS; - ret = mutex_lock_killable(&loop_ctl_mutex); - if (ret) - return ret; - - ret = -ENOSYS; + mutex_lock(&loop_index_mutex); switch (cmd) { case LOOP_CTL_ADD: ret = loop_lookup(&lo, parm); @@ -1994,6 +1991,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, ret = loop_lookup(&lo, parm); if (ret < 0) break; + mutex_lock(&loop_ctl_mutex); if (lo->lo_state != Lo_unbound) { ret = -EBUSY; mutex_unlock(&loop_ctl_mutex); @@ -2005,6 +2003,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, break; } lo->lo_disk->private_data = NULL; + mutex_unlock(&loop_ctl_mutex); idr_remove(&loop_index_idr, lo->lo_number); loop_remove(lo); break; @@ -2014,7 +2013,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, break; ret = loop_add(&lo, -1); } - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&loop_index_mutex); return ret; } @@ -2098,10 +2097,10 @@ static int __init loop_init(void) THIS_MODULE, loop_probe, NULL, NULL); /* pre-create number of devices given by config or max_loop */ - mutex_lock(&loop_ctl_mutex); + mutex_lock(&loop_index_mutex); for (i = 0; i < nr; i++) loop_add(&lo, i); - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&loop_index_mutex); printk(KERN_INFO "loop: module loaded\n"); return 0; From c2db889605d46a33810e64c3ccf3acaf1e79d9b1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 27 Feb 2019 15:09:22 +0100 Subject: [PATCH 2055/2608] Revert "loop: Fold __loop_release into loop_release" This reverts commit f1e81ba8a3fa56dcc48828869b392b29559a0ac3 which is commit 967d1dc144b50ad005e5eecdfadfbcfb399ffff6 upstream. It does not work properly in the 4.14.y tree and causes more problems than it fixes, so revert it. Reported-by: Thomas Lindroth Reported-by: Jan Kara Cc: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3b1583addacc..24a3fb35614f 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1634,15 +1634,12 @@ out: return err; } -static void lo_release(struct gendisk *disk, fmode_t mode) +static void __lo_release(struct loop_device *lo) { - struct loop_device *lo; int err; - mutex_lock(&loop_index_mutex); - lo = disk->private_data; if (atomic_dec_return(&lo->lo_refcnt)) - goto unlock_index; + return; mutex_lock(&loop_ctl_mutex); if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { @@ -1652,7 +1649,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode) */ err = loop_clr_fd(lo); if (!err) - goto unlock_index; + return; } else if (lo->lo_state == Lo_bound) { /* * Otherwise keep thread (if running) and config, @@ -1663,7 +1660,12 @@ static void lo_release(struct gendisk *disk, fmode_t mode) } mutex_unlock(&loop_ctl_mutex); -unlock_index: +} + +static void lo_release(struct gendisk *disk, fmode_t mode) +{ + mutex_lock(&loop_index_mutex); + __lo_release(disk->private_data); mutex_unlock(&loop_index_mutex); } From cb8a3c4ec19663e3bf6ef5aecd6e1ebe3b932347 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 18 Jan 2018 15:12:21 -0800 Subject: [PATCH 2056/2608] net: stmmac: Fix reception of Broadcom switches tags commit 8cad443eacf661796a740903a75cb8944c675b4e upstream. Broadcom tags inserted by Broadcom switches put a 4 byte header after the MAC SA and before the EtherType, which may look like some sort of 0 length LLC/SNAP packet (tcpdump and wireshark do think that way). With ACS enabled in stmmac the packets were truncated to 8 bytes on reception, whereas clearing this bit allowed normal reception to occur. In order to make that possible, we need to pass a net_device argument to the different core_init() functions and we are dependent on the Broadcom tagger padding packets correctly (which it now does). To be as little invasive as possible, this is only done for gmac1000 when the network device is DSA-enabled (netdev_uses_dsa() returns true). Signed-off-by: Florian Fainelli Acked-by: Giuseppe Cavallaro Signed-off-by: David S. Miller Cc: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/common.h | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 3 ++- .../net/ethernet/stmicro/stmmac/dwmac1000_core.c | 12 +++++++++++- .../net/ethernet/stmicro/stmmac/dwmac100_core.c | 15 +++++++++++++-- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 12 +++++++++++- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 6 files changed, 39 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index c87bc0a5efa3..d824bf942a8f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -475,7 +475,7 @@ struct mac_device_info; /* Helpers to program the MAC core */ struct stmmac_ops { /* MAC core initialization */ - void (*core_init)(struct mac_device_info *hw, int mtu); + void (*core_init)(struct mac_device_info *hw, struct net_device *dev); /* Enable the MAC RX/TX */ void (*set_mac)(void __iomem *ioaddr, bool enable); /* Enable and verify that the IPC module is supported */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 39c2122a4f26..14866331eced 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -477,7 +477,8 @@ static int sun8i_dwmac_init(struct platform_device *pdev, void *priv) return 0; } -static void sun8i_dwmac_core_init(struct mac_device_info *hw, int mtu) +static void sun8i_dwmac_core_init(struct mac_device_info *hw, + struct net_device *dev) { void __iomem *ioaddr = hw->pcsr; u32 v; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 8a86340ff2d3..540d21786a43 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -25,18 +25,28 @@ #include #include #include +#include #include #include "stmmac_pcs.h" #include "dwmac1000.h" -static void dwmac1000_core_init(struct mac_device_info *hw, int mtu) +static void dwmac1000_core_init(struct mac_device_info *hw, + struct net_device *dev) { void __iomem *ioaddr = hw->pcsr; u32 value = readl(ioaddr + GMAC_CONTROL); + int mtu = dev->mtu; /* Configure GMAC core */ value |= GMAC_CORE_INIT; + /* Clear ACS bit because Ethernet switch tagging formats such as + * Broadcom tags can look like invalid LLC/SNAP packets and cause the + * hardware to truncate packets on reception. + */ + if (netdev_uses_dsa(dev)) + value &= ~GMAC_CONTROL_ACS; + if (mtu > 1500) value |= GMAC_CONTROL_2K; if (mtu > 2000) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c index 8ef517356313..91b23f9db31a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c @@ -25,15 +25,26 @@ *******************************************************************************/ #include +#include #include #include "dwmac100.h" -static void dwmac100_core_init(struct mac_device_info *hw, int mtu) +static void dwmac100_core_init(struct mac_device_info *hw, + struct net_device *dev) { void __iomem *ioaddr = hw->pcsr; u32 value = readl(ioaddr + MAC_CONTROL); - writel((value | MAC_CORE_INIT), ioaddr + MAC_CONTROL); + value |= MAC_CORE_INIT; + + /* Clear ASTP bit because Ethernet switch tagging formats such as + * Broadcom tags can look like invalid LLC/SNAP packets and cause the + * hardware to truncate packets on reception. + */ + if (netdev_uses_dsa(dev)) + value &= ~MAC_CONTROL_ASTP; + + writel(value, ioaddr + MAC_CONTROL); #ifdef STMMAC_VLAN_TAG_USED writel(ETH_P_8021Q, ioaddr + MAC_VLAN1); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index e1d03489ae63..f2283feb03da 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -17,16 +17,26 @@ #include #include #include +#include #include "stmmac_pcs.h" #include "dwmac4.h" -static void dwmac4_core_init(struct mac_device_info *hw, int mtu) +static void dwmac4_core_init(struct mac_device_info *hw, + struct net_device *dev) { void __iomem *ioaddr = hw->pcsr; u32 value = readl(ioaddr + GMAC_CONFIG); + int mtu = dev->mtu; value |= GMAC_CORE_INIT; + /* Clear ACS bit because Ethernet switch tagging formats such as + * Broadcom tags can look like invalid LLC/SNAP packets and cause the + * hardware to truncate packets on reception. + */ + if (netdev_uses_dsa(dev)) + value &= ~GMAC_CONFIG_ACS; + if (mtu > 1500) value |= GMAC_CONFIG_2K; if (mtu > 2000) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index a901feaad4e1..11e301670b2e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2497,7 +2497,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) } /* Initialize the MAC Core */ - priv->hw->mac->core_init(priv->hw, dev->mtu); + priv->hw->mac->core_init(priv->hw, dev); /* Initialize MTL*/ if (priv->synopsys_id >= DWMAC_CORE_4_00) From 7389f857c239795a464b25573bc6e4b5d51571d6 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 18 Apr 2018 10:57:55 +0100 Subject: [PATCH 2057/2608] net: stmmac: Disable ACS Feature for GMAC >= 4 commit 565020aaeebfa7c8b3ec077bee38f4c15acc9905 upstream. ACS Feature is currently enabled for GMAC >= 4 but the llc_snap status is never checked in descriptor rx_status callback. This will cause stmmac to always strip packets even that ACS feature is already stripping them. Lets be safe and disable the ACS feature for GMAC >= 4 and always strip the packets for this GMAC version. Fixes: 477286b53f55 ("stmmac: add GMAC4 core support") Signed-off-by: Jose Abreu Cc: David S. Miller Cc: Joao Pinto Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/dwmac4.h | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 7 ------- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 ++++++- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index d74cedf2a397..db5f2aee360b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -336,7 +336,7 @@ enum power_event { #define MTL_RX_OVERFLOW_INT BIT(16) /* Default operating mode of the MAC */ -#define GMAC_CORE_INIT (GMAC_CONFIG_JD | GMAC_CONFIG_PS | GMAC_CONFIG_ACS | \ +#define GMAC_CORE_INIT (GMAC_CONFIG_JD | GMAC_CONFIG_PS | \ GMAC_CONFIG_BE | GMAC_CONFIG_DCRS) /* To dump the core regs excluding the Address Registers */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index f2283feb03da..55ae14a6bb8c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -30,13 +30,6 @@ static void dwmac4_core_init(struct mac_device_info *hw, value |= GMAC_CORE_INIT; - /* Clear ACS bit because Ethernet switch tagging formats such as - * Broadcom tags can look like invalid LLC/SNAP packets and cause the - * hardware to truncate packets on reception. - */ - if (netdev_uses_dsa(dev)) - value &= ~GMAC_CONFIG_ACS; - if (mtu > 1500) value |= GMAC_CONFIG_2K; if (mtu > 2000) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 11e301670b2e..0e66a5082140 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3415,8 +3415,13 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) /* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3 * Type frames (LLC/LLC-SNAP) + * + * llc_snap is never checked in GMAC >= 4, so this ACS + * feature is always disabled and packets need to be + * stripped manually. */ - if (unlikely(status != llc_snap)) + if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00) || + unlikely(status != llc_snap)) frame_len -= ETH_FCS_LEN; if (netif_msg_rx_status(priv)) { From fe1c7cf1a24e22b88eaa007a7c3f25b18ae7e1de Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 15 Feb 2019 00:37:57 +0800 Subject: [PATCH 2058/2608] scsi: libsas: Fix rphy phy_identifier for PHYs with end devices attached commit ffeafdd2bf0b280d67ec1a47ea6287910d271f3f upstream. The sysfs phy_identifier attribute for a sas_end_device comes from the rphy phy_identifier value. Currently this is not being set for rphys with an end device attached, so we see incorrect symlinks from systemd disk/by-path: root@localhost:~# ls -l /dev/disk/by-path/ total 0 lrwxrwxrwx 1 root root 9 Feb 13 12:26 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy0-lun-0 -> ../../sdb lrwxrwxrwx 1 root root 10 Feb 13 12:26 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy0-lun-0-part1 -> ../../sdb1 lrwxrwxrwx 1 root root 10 Feb 13 12:26 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy0-lun-0-part2 -> ../../sdb2 lrwxrwxrwx 1 root root 10 Feb 13 12:26 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy0-lun-0-part3 -> ../../sdc3 Indeed, each sas_end_device phy_identifier value is 0: root@localhost:/# more sys/class/sas_device/end_device-0\:0\:2/phy_identifier 0 root@localhost:/# more sys/class/sas_device/end_device-0\:0\:10/phy_identifier 0 This patch fixes the discovery code to set the phy_identifier. With this, we now get proper symlinks: root@localhost:~# ls -l /dev/disk/by-path/ total 0 lrwxrwxrwx 1 root root 9 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy10-lun-0 -> ../../sdg lrwxrwxrwx 1 root root 9 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy11-lun-0 -> ../../sdh lrwxrwxrwx 1 root root 9 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy2-lun-0 -> ../../sda lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy2-lun-0-part1 -> ../../sda1 lrwxrwxrwx 1 root root 9 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy3-lun-0 -> ../../sdb lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy3-lun-0-part1 -> ../../sdb1 lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy3-lun-0-part2 -> ../../sdb2 lrwxrwxrwx 1 root root 9 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy4-lun-0 -> ../../sdc lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy4-lun-0-part1 -> ../../sdc1 lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy4-lun-0-part2 -> ../../sdc2 lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy4-lun-0-part3 -> ../../sdc3 lrwxrwxrwx 1 root root 9 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy5-lun-0 -> ../../sdd lrwxrwxrwx 1 root root 9 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy7-lun-0 -> ../../sde lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy7-lun-0-part1 -> ../../sde1 lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy7-lun-0-part2 -> ../../sde2 lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy7-lun-0-part3 -> ../../sde3 lrwxrwxrwx 1 root root 9 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy8-lun-0 -> ../../sdf lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy8-lun-0-part1 -> ../../sdf1 lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy8-lun-0-part2 -> ../../sdf2 lrwxrwxrwx 1 root root 10 Feb 13 11:53 platform-HISI0162:01-sas-exp0x500e004aaaaaaa1f-phy8-lun-0-part3 -> ../../sdf3 Fixes: 2908d778ab3e ("[SCSI] aic94xx: new driver") Reported-by: dann frazier Signed-off-by: John Garry Reviewed-by: Jason Yan Tested-by: dann frazier Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libsas/sas_expander.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index e2ea389fbec3..56dec663d9f4 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -829,6 +829,7 @@ static struct domain_device *sas_ex_discover_end_dev( rphy = sas_end_device_alloc(phy->port); if (!rphy) goto out_free; + rphy->identify.phy_identifier = phy_id; child->rphy = rphy; get_device(&rphy->dev); @@ -856,6 +857,7 @@ static struct domain_device *sas_ex_discover_end_dev( child->rphy = rphy; get_device(&rphy->dev); + rphy->identify.phy_identifier = phy_id; sas_fill_in_rphy(child, rphy); list_add_tail(&child->disco_list_node, &parent->port->disco_list); From 7796c2a06e1f0236edb5e43af8d995e1b2a8d64d Mon Sep 17 00:00:00 2001 From: "Kristian H. Kristensen" Date: Wed, 19 Dec 2018 08:57:41 -0800 Subject: [PATCH 2059/2608] drm/msm: Unblock writer if reader closes file [ Upstream commit 99c66bc051e7407fe0bf0607b142ec0be1a1d1dd ] Prevents deadlock when fifo is full and reader closes file. Signed-off-by: Kristian H. Kristensen Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/msm_rd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index ec56794ad039..bdce1c9434c6 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -109,7 +109,9 @@ static void rd_write(struct msm_rd_state *rd, const void *buf, int sz) char *fptr = &fifo->buf[fifo->head]; int n; - wait_event(rd->fifo_event, circ_space(&rd->fifo) > 0); + wait_event(rd->fifo_event, circ_space(&rd->fifo) > 0 || !rd->open); + if (!rd->open) + return; /* Note that smp_load_acquire() is not strictly required * as CIRC_SPACE_TO_END() does not access the tail more @@ -207,7 +209,10 @@ out: static int rd_release(struct inode *inode, struct file *file) { struct msm_rd_state *rd = inode->i_private; + rd->open = false; + wake_up_all(&rd->fifo_event); + return 0; } From 39d926f4c18531af71831fe36100da90d1f1ce10 Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Tue, 18 Dec 2018 16:24:54 +0800 Subject: [PATCH 2060/2608] ASoC: Intel: Haswell/Broadwell: fix setting for .dynamic field [ Upstream commit 906a9abc5de73c383af518f5a806f4be2993a0c7 ] For some reason this field was set to zero when all other drivers use .dynamic = 1 for front-ends. This change was tested on Dell XPS13 and has no impact with the existing legacy driver. The SOF driver also works with this change which enables it to override the fixed topology. Signed-off-by: Rander Wang Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/broadwell.c | 2 +- sound/soc/intel/boards/haswell.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/intel/boards/broadwell.c b/sound/soc/intel/boards/broadwell.c index 6dcbbcefc25b..88c26ab7b027 100644 --- a/sound/soc/intel/boards/broadwell.c +++ b/sound/soc/intel/boards/broadwell.c @@ -191,7 +191,7 @@ static struct snd_soc_dai_link broadwell_rt286_dais[] = { .stream_name = "Loopback", .cpu_dai_name = "Loopback Pin", .platform_name = "haswell-pcm-audio", - .dynamic = 0, + .dynamic = 1, .codec_name = "snd-soc-dummy", .codec_dai_name = "snd-soc-dummy-dai", .trigger = {SND_SOC_DPCM_TRIGGER_POST, SND_SOC_DPCM_TRIGGER_POST}, diff --git a/sound/soc/intel/boards/haswell.c b/sound/soc/intel/boards/haswell.c index 5e1ea0371c90..8158409921e0 100644 --- a/sound/soc/intel/boards/haswell.c +++ b/sound/soc/intel/boards/haswell.c @@ -145,7 +145,7 @@ static struct snd_soc_dai_link haswell_rt5640_dais[] = { .stream_name = "Loopback", .cpu_dai_name = "Loopback Pin", .platform_name = "haswell-pcm-audio", - .dynamic = 0, + .dynamic = 1, .codec_name = "snd-soc-dummy", .codec_dai_name = "snd-soc-dummy-dai", .trigger = {SND_SOC_DPCM_TRIGGER_POST, SND_SOC_DPCM_TRIGGER_POST}, From 34f6404b2b1974845b3d24766c4bfe1739365d1b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 21 Dec 2018 12:06:58 +0300 Subject: [PATCH 2061/2608] ALSA: compress: prevent potential divide by zero bugs [ Upstream commit 678e2b44c8e3fec3afc7202f1996a4500a50be93 ] The problem is seen in the q6asm_dai_compr_set_params() function: ret = q6asm_map_memory_regions(dir, prtd->audio_client, prtd->phys, (prtd->pcm_size / prtd->periods), ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ prtd->periods); In this code prtd->pcm_size is the buffer_size and prtd->periods comes from params->buffer.fragments. If we allow the number of fragments to be zero then it results in a divide by zero bug. One possible fix would be to use prtd->pcm_count directly instead of using the division to re-calculate it. But I decided that it doesn't really make sense to allow zero fragments. Signed-off-by: Dan Carpenter Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/core/compress_offload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 4490a699030b..555df64d46ff 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -529,7 +529,8 @@ static int snd_compress_check_input(struct snd_compr_params *params) { /* first let's check the buffer parameter's */ if (params->buffer.fragment_size == 0 || - params->buffer.fragments > INT_MAX / params->buffer.fragment_size) + params->buffer.fragments > INT_MAX / params->buffer.fragment_size || + params->buffer.fragments == 0) return -EINVAL; /* now codec parameters */ From c70c6e7f3a41a416a82faddf4f3cfd6c19d48fce Mon Sep 17 00:00:00 2001 From: Yizhuo Date: Thu, 3 Jan 2019 13:59:12 -0800 Subject: [PATCH 2062/2608] ASoC: Variable "val" in function rt274_i2c_probe() could be uninitialized [ Upstream commit 8c3590de0a378c2449fc1aec127cc693632458e4 ] Inside function rt274_i2c_probe(), if regmap_read() function returns -EINVAL, then local variable "val" leaves uninitialized but used in if statement. This is potentially unsafe. Signed-off-by: Yizhuo Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt274.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt274.c b/sound/soc/codecs/rt274.c index 8f92e5c4dd9d..cd048df76232 100644 --- a/sound/soc/codecs/rt274.c +++ b/sound/soc/codecs/rt274.c @@ -1128,8 +1128,11 @@ static int rt274_i2c_probe(struct i2c_client *i2c, return ret; } - regmap_read(rt274->regmap, + ret = regmap_read(rt274->regmap, RT274_GET_PARAM(AC_NODE_ROOT, AC_PAR_VENDOR_ID), &val); + if (ret) + return ret; + if (val != RT274_VENDOR_ID) { dev_err(&i2c->dev, "Device with ID register %#x is not rt274\n", val); From ffc6b61f38a7165e488de0dba1ffd46f676cc0f7 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 15 Dec 2018 01:55:19 +0100 Subject: [PATCH 2063/2608] clk: vc5: Abort clock configuration without upstream clock [ Upstream commit 2137a109a5e39c2bdccfffe65230ed3fadbaac0e ] In case the upstream clock are not set, which can happen in case the VC5 has no valid upstream clock, the $src variable is used uninited by regmap_update_bits(). Check for this condition and return -EINVAL in such case. Note that in case the VC5 has no valid upstream clock, the VC5 can not operate correctly. That is a hardware property of the VC5. The internal oscilator present in some VC5 models is also considered upstream clock. Signed-off-by: Marek Vasut Cc: Alexey Firago Cc: Laurent Pinchart Cc: Stephen Boyd Cc: linux-renesas-soc@vger.kernel.org [sboyd@kernel.org: Added comment about probe preventing this from happening in the first place] Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk-versaclock5.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c index decffb3826ec..a738af893532 100644 --- a/drivers/clk/clk-versaclock5.c +++ b/drivers/clk/clk-versaclock5.c @@ -262,8 +262,10 @@ static int vc5_mux_set_parent(struct clk_hw *hw, u8 index) if (vc5->clk_mux_ins == VC5_MUX_IN_XIN) src = VC5_PRIM_SRC_SHDN_EN_XTAL; - if (vc5->clk_mux_ins == VC5_MUX_IN_CLKIN) + else if (vc5->clk_mux_ins == VC5_MUX_IN_CLKIN) src = VC5_PRIM_SRC_SHDN_EN_CLKIN; + else /* Invalid; should have been caught by vc5_probe() */ + return -EINVAL; } return regmap_update_bits(vc5->regmap, VC5_PRIM_SRC_SHDN, mask, src); From 7330b4d0088a726876cb855810260e6ed785f123 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 17 Dec 2018 10:02:42 +0300 Subject: [PATCH 2064/2608] thermal: int340x_thermal: Fix a NULL vs IS_ERR() check [ Upstream commit 3fe931b31a4078395c1967f0495dcc9e5ec6b5e3 ] The intel_soc_dts_iosf_init() function doesn't return NULL, it returns error pointers. Fixes: 4d0dd6c1576b ("Thermal/int340x/processor_thermal: Enable auxiliary DTS for Braswell") Signed-off-by: Dan Carpenter Signed-off-by: Zhang Rui Signed-off-by: Sasha Levin --- drivers/thermal/int340x_thermal/processor_thermal_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/int340x_thermal/processor_thermal_device.c b/drivers/thermal/int340x_thermal/processor_thermal_device.c index f02341f7134d..c6ab7db8c8e2 100644 --- a/drivers/thermal/int340x_thermal/processor_thermal_device.c +++ b/drivers/thermal/int340x_thermal/processor_thermal_device.c @@ -416,7 +416,7 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, proc_priv->soc_dts = intel_soc_dts_iosf_init( INTEL_SOC_DTS_INTERRUPT_MSI, 2, 0); - if (proc_priv->soc_dts && pdev->irq) { + if (!IS_ERR(proc_priv->soc_dts) && pdev->irq) { ret = pci_enable_msi(pdev); if (!ret) { ret = request_threaded_irq(pdev->irq, NULL, From 887a873e0ce2e5464a5d62f701900a07651e7a47 Mon Sep 17 00:00:00 2001 From: Bo He Date: Mon, 14 Jan 2019 09:48:32 +0200 Subject: [PATCH 2065/2608] usb: dwc3: gadget: synchronize_irq dwc irq in suspend [ Upstream commit 01c10880d24291a96a4ab0da773e3c5ce4d12da8 ] We see dwc3 endpoint stopped by unwanted irq during suspend resume test, which is caused dwc3 ep can't be started with error "No Resource". Here, add synchronize_irq before suspend to sync the pending IRQ handlers complete. Signed-off-by: Bo He Signed-off-by: Yu Wang Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/dwc3/gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 2f96d2d0addd..5da078bf5f76 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3327,6 +3327,8 @@ int dwc3_gadget_suspend(struct dwc3 *dwc) dwc3_disconnect_gadget(dwc); __dwc3_gadget_stop(dwc); + synchronize_irq(dwc->irq_gadget); + return 0; } From 11617a275ef92158abe02e11ba79fd8eb312dca6 Mon Sep 17 00:00:00 2001 From: Zeng Tao Date: Wed, 26 Dec 2018 19:22:00 +0800 Subject: [PATCH 2066/2608] usb: dwc3: gadget: Fix the uninitialized link_state when udc starts [ Upstream commit 88b1bb1f3b88e0bf20b05d543a53a5b99bd7ceb6 ] Currently the link_state is uninitialized and the default value is 0(U0) before the first time we start the udc, and after we start the udc then stop the udc, the link_state will be undefined. We may have the following warnings if we start the udc again with an undefined link_state: WARNING: CPU: 0 PID: 327 at drivers/usb/dwc3/gadget.c:294 dwc3_send_gadget_ep_cmd+0x304/0x308 dwc3 100e0000.hidwc3_0: wakeup failed --> -22 [...] Call Trace: [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x84/0x98) [] (dump_stack) from [] (__warn+0xe8/0x100) [] (__warn) from [](warn_slowpath_fmt+0x38/0x48) [] (warn_slowpath_fmt) from [](dwc3_send_gadget_ep_cmd+0x304/0x308) [] (dwc3_send_gadget_ep_cmd) from [](dwc3_ep0_start_trans+0x48/0xf4) [] (dwc3_ep0_start_trans) from [](dwc3_ep0_out_start+0x64/0x80) [] (dwc3_ep0_out_start) from [](__dwc3_gadget_start+0x1e0/0x278) [] (__dwc3_gadget_start) from [](dwc3_gadget_start+0x88/0x10c) [] (dwc3_gadget_start) from [](udc_bind_to_driver+0x88/0xbc) [] (udc_bind_to_driver) from [](usb_gadget_probe_driver+0xf8/0x140) [] (usb_gadget_probe_driver) from [](gadget_dev_desc_UDC_store+0xac/0xc4 [libcomposite]) [] (gadget_dev_desc_UDC_store [libcomposite]) from[] (configfs_write_file+0xd4/0x160) [] (configfs_write_file) from [] (__vfs_write+0x1c/0x114) [] (__vfs_write) from [] (vfs_write+0xa4/0x168) [] (vfs_write) from [] (SyS_write+0x3c/0x90) [] (SyS_write) from [] (ret_fast_syscall+0x0/0x3c) Signed-off-by: Zeng Tao Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/dwc3/gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 5da078bf5f76..32dd0d3462da 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1910,6 +1910,7 @@ static int __dwc3_gadget_start(struct dwc3 *dwc) /* begin to receive SETUP packets */ dwc->ep0state = EP0_SETUP_PHASE; + dwc->link_state = DWC3_LINK_STATE_SS_DIS; dwc3_ep0_out_start(dwc); dwc3_gadget_enable_irq(dwc); From c6733e3433ad463a6d64cabbf3d0a320f453bced Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 21 Dec 2018 23:42:52 +0300 Subject: [PATCH 2067/2608] usb: gadget: Potential NULL dereference on allocation error [ Upstream commit df28169e1538e4a8bcd8b779b043e5aa6524545c ] The source_sink_alloc_func() function is supposed to return error pointers on error. The function is called from usb_get_function() which doesn't check for NULL returns so it would result in an Oops. Of course, in the current kernel, small allocations always succeed so this doesn't affect runtime. Signed-off-by: Dan Carpenter Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/f_sourcesink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_sourcesink.c b/drivers/usb/gadget/function/f_sourcesink.c index 8784fa12ea2c..6e9d958004a0 100644 --- a/drivers/usb/gadget/function/f_sourcesink.c +++ b/drivers/usb/gadget/function/f_sourcesink.c @@ -842,7 +842,7 @@ static struct usb_function *source_sink_alloc_func( ss = kzalloc(sizeof(*ss), GFP_KERNEL); if (!ss) - return NULL; + return ERR_PTR(-ENOMEM); ss_opts = container_of(fi, struct f_ss_opts, func_inst); From 10a1f3195914fe650b326364ccbb942080ce72dd Mon Sep 17 00:00:00 2001 From: Srinivas Ramana Date: Thu, 20 Dec 2018 19:05:57 +0530 Subject: [PATCH 2068/2608] genirq: Make sure the initial affinity is not empty [ Upstream commit bddda606ec76550dd63592e32a6e87e7d32583f7 ] If all CPUs in the irq_default_affinity mask are offline when an interrupt is initialized then irq_setup_affinity() can set an empty affinity mask for a newly allocated interrupt. Fix this by falling back to cpu_online_mask in case the resulting affinity mask is zero. Signed-off-by: Srinivas Ramana Signed-off-by: Thomas Gleixner Cc: linux-arm-msm@vger.kernel.org Link: https://lkml.kernel.org/r/1545312957-8504-1-git-send-email-sramana@codeaurora.org Signed-off-by: Sasha Levin --- kernel/irq/manage.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 4cd85870f00e..6c877d28838f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -360,6 +360,9 @@ int irq_setup_affinity(struct irq_desc *desc) } cpumask_and(&mask, cpu_online_mask, set); + if (cpumask_empty(&mask)) + cpumask_copy(&mask, cpu_online_mask); + if (node != NUMA_NO_NODE) { const struct cpumask *nodemask = cpumask_of_node(node); From 8d1e86985239edc297b22cc9a61efd217b1d865a Mon Sep 17 00:00:00 2001 From: Silvio Cesare Date: Sat, 12 Jan 2019 16:28:43 +0100 Subject: [PATCH 2069/2608] ASoC: dapm: change snprintf to scnprintf for possible overflow [ Upstream commit e581e151e965bf1f2815dd94620b638fec4d0a7e ] Change snprintf to scnprintf. There are generally two cases where using snprintf causes problems. 1) Uses of size += snprintf(buf, SIZE - size, fmt, ...) In this case, if snprintf would have written more characters than what the buffer size (SIZE) is, then size will end up larger than SIZE. In later uses of snprintf, SIZE - size will result in a negative number, leading to problems. Note that size might already be too large by using size = snprintf before the code reaches a case of size += snprintf. 2) If size is ultimately used as a length parameter for a copy back to user space, then it will potentially allow for a buffer overflow and information disclosure when size is greater than SIZE. When the size is used to index the buffer directly, we can have memory corruption. This also means when size = snprintf... is used, it may also cause problems since size may become large. Copying to userspace is mitigated by the HARDENED_USERCOPY kernel configuration. The solution to these issues is to use scnprintf which returns the number of characters actually written to the buffer, so the size variable will never exceed SIZE. Signed-off-by: Silvio Cesare Cc: Liam Girdwood Cc: Mark Brown Cc: Dan Carpenter Cc: Kees Cook Cc: Will Deacon Cc: Greg KH Signed-off-by: Willy Tarreau Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-dapm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 53c9d7525639..bba6a917cd02 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -2009,19 +2009,19 @@ static ssize_t dapm_widget_power_read_file(struct file *file, out = is_connected_output_ep(w, NULL, NULL); } - ret = snprintf(buf, PAGE_SIZE, "%s: %s%s in %d out %d", + ret = scnprintf(buf, PAGE_SIZE, "%s: %s%s in %d out %d", w->name, w->power ? "On" : "Off", w->force ? " (forced)" : "", in, out); if (w->reg >= 0) - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, " - R%d(0x%x) mask 0x%x", w->reg, w->reg, w->mask << w->shift); - ret += snprintf(buf + ret, PAGE_SIZE - ret, "\n"); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); if (w->sname) - ret += snprintf(buf + ret, PAGE_SIZE - ret, " stream %s %s\n", + ret += scnprintf(buf + ret, PAGE_SIZE - ret, " stream %s %s\n", w->sname, w->active ? "active" : "inactive"); @@ -2034,7 +2034,7 @@ static ssize_t dapm_widget_power_read_file(struct file *file, if (!p->connect) continue; - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, " %s \"%s\" \"%s\"\n", (rdir == SND_SOC_DAPM_DIR_IN) ? "in" : "out", p->name ? p->name : "static", From 009d7727747e8bf416302c57947e24bb04699996 Mon Sep 17 00:00:00 2001 From: Silvio Cesare Date: Tue, 15 Jan 2019 04:27:27 +0100 Subject: [PATCH 2070/2608] ASoC: imx-audmux: change snprintf to scnprintf for possible overflow [ Upstream commit c407cd008fd039320d147088b52d0fa34ed3ddcb ] Change snprintf to scnprintf. There are generally two cases where using snprintf causes problems. 1) Uses of size += snprintf(buf, SIZE - size, fmt, ...) In this case, if snprintf would have written more characters than what the buffer size (SIZE) is, then size will end up larger than SIZE. In later uses of snprintf, SIZE - size will result in a negative number, leading to problems. Note that size might already be too large by using size = snprintf before the code reaches a case of size += snprintf. 2) If size is ultimately used as a length parameter for a copy back to user space, then it will potentially allow for a buffer overflow and information disclosure when size is greater than SIZE. When the size is used to index the buffer directly, we can have memory corruption. This also means when size = snprintf... is used, it may also cause problems since size may become large. Copying to userspace is mitigated by the HARDENED_USERCOPY kernel configuration. The solution to these issues is to use scnprintf which returns the number of characters actually written to the buffer, so the size variable will never exceed SIZE. Signed-off-by: Silvio Cesare Cc: Timur Tabi Cc: Nicolin Chen Cc: Mark Brown Cc: Xiubo Li Cc: Fabio Estevam Cc: Dan Carpenter Cc: Kees Cook Cc: Will Deacon Cc: Greg KH Signed-off-by: Willy Tarreau Acked-by: Nicolin Chen Reviewed-by: Kees Cook Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/imx-audmux.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sound/soc/fsl/imx-audmux.c b/sound/soc/fsl/imx-audmux.c index 392d5eef356d..99e07b01a2ce 100644 --- a/sound/soc/fsl/imx-audmux.c +++ b/sound/soc/fsl/imx-audmux.c @@ -86,49 +86,49 @@ static ssize_t audmux_read_file(struct file *file, char __user *user_buf, if (!buf) return -ENOMEM; - ret = snprintf(buf, PAGE_SIZE, "PDCR: %08x\nPTCR: %08x\n", + ret = scnprintf(buf, PAGE_SIZE, "PDCR: %08x\nPTCR: %08x\n", pdcr, ptcr); if (ptcr & IMX_AUDMUX_V2_PTCR_TFSDIR) - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "TxFS output from %s, ", audmux_port_string((ptcr >> 27) & 0x7)); else - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "TxFS input, "); if (ptcr & IMX_AUDMUX_V2_PTCR_TCLKDIR) - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "TxClk output from %s", audmux_port_string((ptcr >> 22) & 0x7)); else - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "TxClk input"); - ret += snprintf(buf + ret, PAGE_SIZE - ret, "\n"); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); if (ptcr & IMX_AUDMUX_V2_PTCR_SYN) { - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Port is symmetric"); } else { if (ptcr & IMX_AUDMUX_V2_PTCR_RFSDIR) - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "RxFS output from %s, ", audmux_port_string((ptcr >> 17) & 0x7)); else - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "RxFS input, "); if (ptcr & IMX_AUDMUX_V2_PTCR_RCLKDIR) - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "RxClk output from %s", audmux_port_string((ptcr >> 12) & 0x7)); else - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "RxClk input"); } - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\nData received from %s\n", audmux_port_string((pdcr >> 13) & 0x7)); From 289ee4095263372eb9ccbb4f2e2028ed37e5f9f7 Mon Sep 17 00:00:00 2001 From: Fathi Boudra Date: Wed, 16 Jan 2019 11:43:19 -0600 Subject: [PATCH 2071/2608] selftests: seccomp: use LDLIBS instead of LDFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5bbc73a841d7f0bbe025a342146dde462a796a5a ] seccomp_bpf fails to build due to undefined reference errors: aarch64-linaro-linux-gcc --sysroot=/build/tmp-rpb-glibc/sysroots/hikey -O2 -pipe -g -feliminate-unused-debug-types -Wl,-no-as-needed -Wall -Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed -lpthread seccomp_bpf.c -o /build/tmp-rpb-glibc/work/hikey-linaro-linux/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf /tmp/ccrlR3MW.o: In function `tsync_sibling': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1920: undefined reference to `sem_post' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1920: undefined reference to `sem_post' /tmp/ccrlR3MW.o: In function `TSYNC_setup': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1863: undefined reference to `sem_init' /tmp/ccrlR3MW.o: In function `TSYNC_teardown': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1904: undefined reference to `sem_destroy' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1897: undefined reference to `pthread_kill' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1898: undefined reference to `pthread_cancel' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1899: undefined reference to `pthread_join' /tmp/ccrlR3MW.o: In function `tsync_start_sibling': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1941: undefined reference to `pthread_create' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1941: undefined reference to `pthread_create' /tmp/ccrlR3MW.o: In function `TSYNC_siblings_fail_prctl': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1978: undefined reference to `sem_wait' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1990: undefined reference to `pthread_join' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1992: undefined reference to `pthread_join' /tmp/ccrlR3MW.o: In function `tsync_start_sibling': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1941: undefined reference to `pthread_create' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1941: undefined reference to `pthread_create' /tmp/ccrlR3MW.o: In function `TSYNC_two_siblings_with_ancestor': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:2016: undefined reference to `sem_wait' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:2032: undefined reference to `pthread_join' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:2034: undefined reference to `pthread_join' /tmp/ccrlR3MW.o: In function `tsync_start_sibling': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1941: undefined reference to `pthread_create' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1941: undefined reference to `pthread_create' /tmp/ccrlR3MW.o: In function `TSYNC_two_sibling_want_nnp': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:2046: undefined reference to `sem_wait' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:2058: undefined reference to `pthread_join' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:2060: undefined reference to `pthread_join' /tmp/ccrlR3MW.o: In function `tsync_start_sibling': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1941: undefined reference to `pthread_create' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1941: undefined reference to `pthread_create' /tmp/ccrlR3MW.o: In function `TSYNC_two_siblings_with_no_filter': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:2073: undefined reference to `sem_wait' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:2098: undefined reference to `pthread_join' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:2100: undefined reference to `pthread_join' /tmp/ccrlR3MW.o: In function `tsync_start_sibling': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1941: undefined reference to `pthread_create' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1941: undefined reference to `pthread_create' /tmp/ccrlR3MW.o: In function `TSYNC_two_siblings_with_one_divergence': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:2125: undefined reference to `sem_wait' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:2143: undefined reference to `pthread_join' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:2145: undefined reference to `pthread_join' /tmp/ccrlR3MW.o: In function `tsync_start_sibling': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1941: undefined reference to `pthread_create' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1941: undefined reference to `pthread_create' /tmp/ccrlR3MW.o: In function `TSYNC_two_siblings_not_under_filter': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:2169: undefined reference to `sem_wait' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:2202: undefined reference to `pthread_join' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:2227: undefined reference to `pthread_join' /tmp/ccrlR3MW.o: In function `tsync_start_sibling': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/seccomp/seccomp_bpf.c:1941: undefined reference to `pthread_create' It's GNU Make and linker specific. The default Makefile rule looks like: $(CC) $(CFLAGS) $(LDFLAGS) $@ $^ $(LDLIBS) When linking is done by gcc itself, no issue, but when it needs to be passed to proper ld, only LDLIBS follows and then ld cannot know what libs to link with. More detail: https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html LDFLAGS Extra flags to give to compilers when they are supposed to invoke the linker, ‘ld’, such as -L. Libraries (-lfoo) should be added to the LDLIBS variable instead. LDLIBS Library flags or names given to compilers when they are supposed to invoke the linker, ‘ld’. LOADLIBES is a deprecated (but still supported) alternative to LDLIBS. Non-library linker flags, such as -L, should go in the LDFLAGS variable. https://lkml.org/lkml/2010/2/10/362 tools/perf: libraries must come after objects Link order matters, use LDLIBS instead of LDFLAGS to properly link against libpthread. Signed-off-by: Fathi Boudra Acked-by: Kees Cook Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/seccomp/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile index fce7f4ce0692..1760b3e39730 100644 --- a/tools/testing/selftests/seccomp/Makefile +++ b/tools/testing/selftests/seccomp/Makefile @@ -9,7 +9,7 @@ BINARIES := seccomp_bpf seccomp_benchmark CFLAGS += -Wl,-no-as-needed -Wall seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h - $(CC) $(CFLAGS) $(LDFLAGS) -lpthread $< -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $< -lpthread -o $@ TEST_PROGS += $(BINARIES) EXTRA_CLEAN := $(BINARIES) From 51f08abcd74233d4a5dcc1d20e74e786a67bd52f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 14 Jan 2019 14:51:33 +0100 Subject: [PATCH 2072/2608] selftests: gpio-mockup-chardev: Check asprintf() for error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 508cacd7da6659ae7b7bdd0a335f675422277758 ] With gcc 7.3.0: gpio-mockup-chardev.c: In function ‘get_debugfs’: gpio-mockup-chardev.c:62:3: warning: ignoring return value of ‘asprintf’, declared with attribute warn_unused_result [-Wunused-result] asprintf(path, "%s/gpio", mnt_fs_get_target(fs)); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Handle asprintf() failures to fix this. Signed-off-by: Geert Uytterhoeven Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/gpio/gpio-mockup-chardev.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/gpio/gpio-mockup-chardev.c b/tools/testing/selftests/gpio/gpio-mockup-chardev.c index 667e916fa7cc..6ceeeed4eeb9 100644 --- a/tools/testing/selftests/gpio/gpio-mockup-chardev.c +++ b/tools/testing/selftests/gpio/gpio-mockup-chardev.c @@ -37,7 +37,7 @@ static int get_debugfs(char **path) struct libmnt_table *tb; struct libmnt_iter *itr = NULL; struct libmnt_fs *fs; - int found = 0; + int found = 0, ret; cxt = mnt_new_context(); if (!cxt) @@ -58,8 +58,11 @@ static int get_debugfs(char **path) break; } } - if (found) - asprintf(path, "%s/gpio", mnt_fs_get_target(fs)); + if (found) { + ret = asprintf(path, "%s/gpio", mnt_fs_get_target(fs)); + if (ret < 0) + err(EXIT_FAILURE, "failed to format string"); + } mnt_free_iter(itr); mnt_free_context(cxt); From 87142f7638ad16047b6c377ff87a23390e31406f Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 13 Dec 2018 18:42:57 +0300 Subject: [PATCH 2073/2608] ARC: fix __ffs return value to avoid build warnings [ Upstream commit 4e868f8419cb4cb558c5d428e7ab5629cef864c7 ] | CC mm/nobootmem.o |In file included from ./include/asm-generic/bug.h:18:0, | from ./arch/arc/include/asm/bug.h:32, | from ./include/linux/bug.h:5, | from ./include/linux/mmdebug.h:5, | from ./include/linux/gfp.h:5, | from ./include/linux/slab.h:15, | from mm/nobootmem.c:14: |mm/nobootmem.c: In function '__free_pages_memory': |./include/linux/kernel.h:845:29: warning: comparison of distinct pointer types lacks a cast | (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) | ^ |./include/linux/kernel.h:859:4: note: in expansion of macro '__typecheck' | (__typecheck(x, y) && __no_side_effects(x, y)) | ^~~~~~~~~~~ |./include/linux/kernel.h:869:24: note: in expansion of macro '__safe_cmp' | __builtin_choose_expr(__safe_cmp(x, y), \ | ^~~~~~~~~~ |./include/linux/kernel.h:878:19: note: in expansion of macro '__careful_cmp' | #define min(x, y) __careful_cmp(x, y, <) | ^~~~~~~~~~~~~ |mm/nobootmem.c:104:11: note: in expansion of macro 'min' | order = min(MAX_ORDER - 1UL, __ffs(start)); Change __ffs return value from 'int' to 'unsigned long' as it is done in other implementations (like asm-generic, x86, etc...) to avoid build-time warnings in places where type is strictly checked. As __ffs may return values in [0-31] interval changing return type to unsigned is valid. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/include/asm/bitops.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 8da87feec59a..99e6d8948f4a 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -340,7 +340,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x) /* * __ffs: Similar to ffs, but zero based (0-31) */ -static inline __attribute__ ((const)) int __ffs(unsigned long word) +static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word) { if (!word) return word; @@ -400,9 +400,9 @@ static inline __attribute__ ((const)) int ffs(unsigned long x) /* * __ffs: Similar to ffs, but zero based (0-31) */ -static inline __attribute__ ((const)) int __ffs(unsigned long x) +static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x) { - int n; + unsigned long n; asm volatile( " ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */ From 8d7a522437beb95f3a55fe3c4a6cb5013399c2ec Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Mon, 24 Dec 2018 14:23:36 -0500 Subject: [PATCH 2074/2608] drivers: thermal: int340x_thermal: Fix sysfs race condition [ Upstream commit 129699bb8c7572106b5bbb2407c2daee4727ccad ] Changes since V1: * Use dev_info instead of printk * Use dev_warn instead of BUG_ON Previously, sysfs_create_group was called before all initialization had fully run - specifically, before pci_set_drvdata was called. Since the sysctl group is visible to userspace as soon as sysfs_create_group returns, a small window of time existed during which a process could read from an uninitialized/partially-initialized device. This commit moves the creation of the sysctl group to after all initialized is completed. This ensures that it's impossible for userspace to read from a sysctl file before initialization has fully completed. To catch any future regressions, I've added a check to ensure that proc_thermal_emum_mode is never PROC_THERMAL_NONE when a process tries to read from a sysctl file. Previously, the aforementioned race condition could result in the 'else' branch running while PROC_THERMAL_NONE was set, leading to a null pointer deference. Signed-off-by: Aaron Hill Signed-off-by: Zhang Rui Signed-off-by: Sasha Levin --- .../processor_thermal_device.c | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/thermal/int340x_thermal/processor_thermal_device.c b/drivers/thermal/int340x_thermal/processor_thermal_device.c index c6ab7db8c8e2..c344a3783625 100644 --- a/drivers/thermal/int340x_thermal/processor_thermal_device.c +++ b/drivers/thermal/int340x_thermal/processor_thermal_device.c @@ -77,7 +77,12 @@ static ssize_t power_limit_##index##_##suffix##_show(struct device *dev, \ struct pci_dev *pci_dev; \ struct platform_device *pdev; \ struct proc_thermal_device *proc_dev; \ -\ + \ + if (proc_thermal_emum_mode == PROC_THERMAL_NONE) { \ + dev_warn(dev, "Attempted to get power limit before device was initialized!\n"); \ + return 0; \ + } \ + \ if (proc_thermal_emum_mode == PROC_THERMAL_PLATFORM_DEV) { \ pdev = to_platform_device(dev); \ proc_dev = platform_get_drvdata(pdev); \ @@ -291,11 +296,6 @@ static int proc_thermal_add(struct device *dev, *priv = proc_priv; ret = proc_thermal_read_ppcc(proc_priv); - if (!ret) { - ret = sysfs_create_group(&dev->kobj, - &power_limit_attribute_group); - - } if (ret) return ret; @@ -309,8 +309,7 @@ static int proc_thermal_add(struct device *dev, proc_priv->int340x_zone = int340x_thermal_zone_add(adev, ops); if (IS_ERR(proc_priv->int340x_zone)) { - ret = PTR_ERR(proc_priv->int340x_zone); - goto remove_group; + return PTR_ERR(proc_priv->int340x_zone); } else ret = 0; @@ -324,9 +323,6 @@ static int proc_thermal_add(struct device *dev, remove_zone: int340x_thermal_zone_remove(proc_priv->int340x_zone); -remove_group: - sysfs_remove_group(&proc_priv->dev->kobj, - &power_limit_attribute_group); return ret; } @@ -357,7 +353,10 @@ static int int3401_add(struct platform_device *pdev) platform_set_drvdata(pdev, proc_priv); proc_thermal_emum_mode = PROC_THERMAL_PLATFORM_DEV; - return 0; + dev_info(&pdev->dev, "Creating sysfs group for PROC_THERMAL_PLATFORM_DEV\n"); + + return sysfs_create_group(&pdev->dev.kobj, + &power_limit_attribute_group); } static int int3401_remove(struct platform_device *pdev) @@ -434,7 +433,10 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, dev_err(&pdev->dev, "No auxiliary DTSs enabled\n"); } - return 0; + dev_info(&pdev->dev, "Creating sysfs group for PROC_THERMAL_PCI\n"); + + return sysfs_create_group(&pdev->dev.kobj, + &power_limit_attribute_group); } static void proc_thermal_pci_remove(struct pci_dev *pdev) From 779f4513ec9eea8b377ee4e7c0be3fd5aabdcd88 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 16 Jan 2019 06:20:11 -0700 Subject: [PATCH 2075/2608] staging: rtl8723bs: Fix build error with Clang when inlining is disabled [ Upstream commit 97715058b70da1262fd07798c8b2e3e894f759dd ] When CONFIG_NO_AUTO_INLINE was present in linux-next (which added '-fno-inline-functions' to KBUILD_CFLAGS), an allyesconfig build with Clang failed at the modpost stage: ERROR: "is_broadcast_mac_addr" [drivers/staging/rtl8723bs/r8723bs.ko] undefined! ERROR: "is_zero_mac_addr" [drivers/staging/rtl8723bs/r8723bs.ko] undefined! ERROR: "is_multicast_mac_addr" [drivers/staging/rtl8723bs/r8723bs.ko] undefined! These functions were marked as extern inline, meaning that if inlining doesn't happen, the function will be undefined, as it is above. This happens to work with GCC because the '-fno-inline-functions' option respects the __inline attribute so all instances of these functions are inlined as expected and the definition doesn't actually matter. However, with Clang and '-fno-inline-functions', a function has to be marked with the __always_inline attribute to be considered for inlining, which none of these functions are. Clang tries to find the symbol definition elsewhere as it was told and fails, which trickles down to modpost. To make sure that this code compiles regardless of compiler and make the intention of the code clearer, use 'static' to ensure these functions are always defined, regardless of inlining. Additionally, silence a checkpatch warning by switching from '__inline' to 'inline'. Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/rtl8723bs/include/ieee80211.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/rtl8723bs/include/ieee80211.h b/drivers/staging/rtl8723bs/include/ieee80211.h index 73ce63770c3c..fa9c80fc7773 100644 --- a/drivers/staging/rtl8723bs/include/ieee80211.h +++ b/drivers/staging/rtl8723bs/include/ieee80211.h @@ -1008,18 +1008,18 @@ enum ieee80211_state { #define IP_FMT "%pI4" #define IP_ARG(x) (x) -extern __inline int is_multicast_mac_addr(const u8 *addr) +static inline int is_multicast_mac_addr(const u8 *addr) { return ((addr[0] != 0xff) && (0x01 & addr[0])); } -extern __inline int is_broadcast_mac_addr(const u8 *addr) +static inline int is_broadcast_mac_addr(const u8 *addr) { return ((addr[0] == 0xff) && (addr[1] == 0xff) && (addr[2] == 0xff) && \ (addr[3] == 0xff) && (addr[4] == 0xff) && (addr[5] == 0xff)); } -extern __inline int is_zero_mac_addr(const u8 *addr) +static inline int is_zero_mac_addr(const u8 *addr) { return ((addr[0] == 0x00) && (addr[1] == 0x00) && (addr[2] == 0x00) && \ (addr[3] == 0x00) && (addr[4] == 0x00) && (addr[5] == 0x00)); From 158e41a942b6c59dd73913e9b7a11ae00f853ece Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Thu, 17 Jan 2019 16:32:42 -0500 Subject: [PATCH 2076/2608] mac80211: fix miscounting of ttl-dropped frames [ Upstream commit a0dc02039a2ee54fb4ae400e0b755ed30e73e58c ] In ieee80211_rx_h_mesh_fwding, we increment the 'dropped_frames_ttl' counter when we decrement the ttl to zero. For unicast frames destined for other hosts, we stop processing the frame at that point. For multicast frames, we do not rebroadcast it in this case, but we do pass the frame up the stack to process it on this STA. That doesn't match the usual definition of "dropped," so don't count those as such. With this change, something like `ping6 -i0.2 ff02::1%mesh0` from a peer in a ttl=1 network no longer increments the counter rapidly. Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/rx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c7ac1a480b1d..dfc2af6833af 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2533,7 +2533,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) skb_set_queue_mapping(skb, q); if (!--mesh_hdr->ttl) { - IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_ttl); + if (!is_multicast_ether_addr(hdr->addr1)) + IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, + dropped_frames_ttl); goto out; } From 635d29f488a9941ca29e02b0bc8aab938cd1d6cc Mon Sep 17 00:00:00 2001 From: Prateek Sood Date: Fri, 30 Nov 2018 20:40:56 +0530 Subject: [PATCH 2077/2608] sched/wait: Fix rcuwait_wake_up() ordering [ Upstream commit 6dc080eeb2ba01973bfff0d79844d7a59e12542e ] For some peculiar reason rcuwait_wake_up() has the right barrier in the comment, but not in the code. This mistake has been observed to cause a deadlock in the following situation: P1 P2 percpu_up_read() percpu_down_write() rcu_sync_is_idle() // false rcu_sync_enter() ... __percpu_up_read() [S] ,- __this_cpu_dec(*sem->read_count) | smp_rmb(); [L] | task = rcu_dereference(w->task) // NULL | | [S] w->task = current | smp_mb(); | [L] readers_active_check() // fail `-> Where the smp_rmb() (obviously) fails to constrain the store. [ peterz: Added changelog. ] Signed-off-by: Prateek Sood Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andrea Parri Acked-by: Davidlohr Bueso Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 8f95c90ceb54 ("sched/wait, RCU: Introduce rcuwait machinery") Link: https://lkml.kernel.org/r/1543590656-7157-1-git-send-email-prsood@codeaurora.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/exit.c b/kernel/exit.c index 3aa01b74c1e3..5523fb0c20c8 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -306,7 +306,7 @@ void rcuwait_wake_up(struct rcuwait *w) * MB (A) MB (B) * [L] cond [L] tsk */ - smp_rmb(); /* (B) */ + smp_mb(); /* (B) */ /* * Avoid using task_rcu_dereference() magic as long as we are careful, From f618b90b11fc45efd13a159f628f27f06ebb8184 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 29 Nov 2018 14:44:49 +0100 Subject: [PATCH 2078/2608] futex: Fix (possible) missed wakeup [ Upstream commit b061c38bef43406df8e73c5be06cbfacad5ee6ad ] We must not rely on wake_q_add() to delay the wakeup; in particular commit: 1d0dcb3ad9d3 ("futex: Implement lockless wakeups") moved wake_q_add() before smp_store_release(&q->lock_ptr, NULL), which could result in futex_wait() waking before observing ->lock_ptr == NULL and going back to sleep again. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 1d0dcb3ad9d3 ("futex: Implement lockless wakeups") Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/futex.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 29d708d0b3d1..22f83064abb3 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1462,11 +1462,7 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) return; - /* - * Queue the task for later wakeup for after we've released - * the hb->lock. wake_q_add() grabs reference to p. - */ - wake_q_add(wake_q, p); + get_task_struct(p); __unqueue_futex(q); /* * The waiting task can free the futex_q as soon as q->lock_ptr = NULL @@ -1476,6 +1472,13 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) * plist_del in __unqueue_futex(). */ smp_store_release(&q->lock_ptr, NULL); + + /* + * Queue the task for later wakeup for after we've released + * the hb->lock. wake_q_add() grabs reference to p. + */ + wake_q_add(wake_q, p); + put_task_struct(p); } /* From 726152d423685a4ae266f360e8bb32567c19f522 Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Thu, 29 Nov 2018 20:50:30 +0800 Subject: [PATCH 2079/2608] locking/rwsem: Fix (possible) missed wakeup [ Upstream commit e158488be27b157802753a59b336142dc0eb0380 ] Because wake_q_add() can imply an immediate wakeup (cmpxchg failure case), we must not rely on the wakeup being delayed. However, commit: e38513905eea ("locking/rwsem: Rework zeroing reader waiter->task") relies on exactly that behaviour in that the wakeup must not happen until after we clear waiter->task. [ peterz: Added changelog. ] Signed-off-by: Xie Yongji Signed-off-by: Zhang Yu Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: e38513905eea ("locking/rwsem: Rework zeroing reader waiter->task") Link: https://lkml.kernel.org/r/1543495830-2644-1-git-send-email-xieyongji@baidu.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/locking/rwsem-xadd.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index a90336779375..c75017326c37 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -198,15 +198,22 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, woken++; tsk = waiter->task; - wake_q_add(wake_q, tsk); + get_task_struct(tsk); list_del(&waiter->list); /* - * Ensure that the last operation is setting the reader + * Ensure calling get_task_struct() before setting the reader * waiter to nil such that rwsem_down_read_failed() cannot * race with do_exit() by always holding a reference count * to the task to wakeup. */ smp_store_release(&waiter->task, NULL); + /* + * Ensure issuing the wakeup (either by us or someone else) + * after setting the reader waiter to nil. + */ + wake_q_add(wake_q, tsk); + /* wake_q_add() already take the task ref */ + put_task_struct(tsk); } adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; From 3aeb1d42b3194d66ee432a5daac82569b19436a0 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 18 Jan 2019 18:08:19 +0800 Subject: [PATCH 2080/2608] drm/amd/powerplay: OD setting fix on Vega10 [ Upstream commit 6d87dc97eb3341de3f7b1efa3156cb0e014f4a96 ] gfxclk for OD setting is limited to 1980M for non-acg ASICs of Vega10 Signed-off-by: Kenneth Feng Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../powerplay/hwmgr/vega10_processpptables.c | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c index e343df190375..05bb87a54e90 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c @@ -32,6 +32,7 @@ #include "vega10_pptable.h" #define NUM_DSPCLK_LEVELS 8 +#define VEGA10_ENGINECLOCK_HARDMAX 198000 static void set_hw_cap(struct pp_hwmgr *hwmgr, bool enable, enum phm_platform_caps cap) @@ -258,7 +259,26 @@ static int init_over_drive_limits( struct pp_hwmgr *hwmgr, const ATOM_Vega10_POWERPLAYTABLE *powerplay_table) { - hwmgr->platform_descriptor.overdriveLimit.engineClock = + const ATOM_Vega10_GFXCLK_Dependency_Table *gfxclk_dep_table = + (const ATOM_Vega10_GFXCLK_Dependency_Table *) + (((unsigned long) powerplay_table) + + le16_to_cpu(powerplay_table->usGfxclkDependencyTableOffset)); + bool is_acg_enabled = false; + ATOM_Vega10_GFXCLK_Dependency_Record_V2 *patom_record_v2; + + if (gfxclk_dep_table->ucRevId == 1) { + patom_record_v2 = + (ATOM_Vega10_GFXCLK_Dependency_Record_V2 *)gfxclk_dep_table->entries; + is_acg_enabled = + (bool)patom_record_v2[gfxclk_dep_table->ucNumEntries-1].ucACGEnable; + } + + if (powerplay_table->ulMaxODEngineClock > VEGA10_ENGINECLOCK_HARDMAX && + !is_acg_enabled) + hwmgr->platform_descriptor.overdriveLimit.engineClock = + VEGA10_ENGINECLOCK_HARDMAX; + else + hwmgr->platform_descriptor.overdriveLimit.engineClock = le32_to_cpu(powerplay_table->ulMaxODEngineClock); hwmgr->platform_descriptor.overdriveLimit.memoryClock = le32_to_cpu(powerplay_table->ulMaxODMemoryClock); From 0baaa08d1e3a68f3ef92ab71aac18c56b51c6430 Mon Sep 17 00:00:00 2001 From: Tomonori Sakita Date: Mon, 21 Jan 2019 17:34:16 +0900 Subject: [PATCH 2081/2608] serial: fsl_lpuart: fix maximum acceptable baud rate with over-sampling [ Upstream commit 815d835b7ba46685c316b000013367dacb2b461b ] Using over-sampling ratio, lpuart can accept baud rate upto uartclk / 4. Signed-off-by: Tomonori Sakita Signed-off-by: Atsushi Nemoto Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/fsl_lpuart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 716c33b2a11c..32a473f9d1d3 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -1701,7 +1701,7 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, } /* ask the core to calculate the divisor */ - baud = uart_get_baud_rate(port, termios, old, 50, port->uartclk / 16); + baud = uart_get_baud_rate(port, termios, old, 50, port->uartclk / 4); spin_lock_irqsave(&sport->port.lock, flags); From a0fe1581a2963e5de0a7389498c5a8d046d4d0f5 Mon Sep 17 00:00:00 2001 From: Liam Mark Date: Fri, 18 Jan 2019 10:37:44 -0800 Subject: [PATCH 2082/2608] staging: android: ion: Support cpu access during dma_buf_detach [ Upstream commit 31eb79db420a3f94c4c45a8c0a05cd30e333f981 ] Often userspace doesn't know when the kernel will be calling dma_buf_detach on the buffer. If userpace starts its CPU access at the same time as the sg list is being freed it could end up accessing the sg list after it has been freed. Thread A Thread B - DMA_BUF_IOCTL_SYNC IOCT - ion_dma_buf_begin_cpu_access - list_for_each_entry - ion_dma_buf_detatch - free_duped_table - dma_sync_sg_for_cpu Fix this by getting the ion_buffer lock before freeing the sg table memory. Fixes: 2a55e7b5e544 ("staging: android: ion: Call dma_map_sg for syncing and mapping") Signed-off-by: Liam Mark Acked-by: Laura Abbott Acked-by: Andrew F. Davis Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/android/ion/ion.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index 24cb666c9224..dd96ca61a515 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -257,10 +257,10 @@ static void ion_dma_buf_detatch(struct dma_buf *dmabuf, struct ion_dma_buf_attachment *a = attachment->priv; struct ion_buffer *buffer = dmabuf->priv; - free_duped_table(a->table); mutex_lock(&buffer->lock); list_del(&a->list); mutex_unlock(&buffer->lock); + free_duped_table(a->table); kfree(a); } From 1f13744bc3e00ea7f0332253a11378b67e2b4ddb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Mon, 8 Oct 2018 20:58:23 -0300 Subject: [PATCH 2083/2608] direct-io: allow direct writes to empty inodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8b9433eb4de3c26a9226c981c283f9f4896ae030 ] On a DIO_SKIP_HOLES filesystem, the ->get_block() method is currently not allowed to create blocks for an empty inode. This confusion comes from trying to bit shift a negative number, so check the size of the inode first. The problem is most visible for hfsplus, because the fallback to buffered I/O doesn't happen and the write fails with EIO. This is in part the fault of the module, because it gives a wrong return value on ->get_block(); that will be fixed in a separate patch. Reviewed-by: Jeff Moyer Reviewed-by: Jan Kara Signed-off-by: Ernesto A. Fernández Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- fs/direct-io.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index 40567501015f..2c90d541f527 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -658,6 +658,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, unsigned long fs_count; /* Number of filesystem-sized blocks */ int create; unsigned int i_blkbits = sdio->blkbits + sdio->blkfactor; + loff_t i_size; /* * If there was a memory error and we've overwritten all the @@ -687,8 +688,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, */ create = dio->op == REQ_OP_WRITE; if (dio->flags & DIO_SKIP_HOLES) { - if (fs_startblk <= ((i_size_read(dio->inode) - 1) >> - i_blkbits)) + i_size = i_size_read(dio->inode); + if (i_size && fs_startblk <= (i_size - 1) >> i_blkbits) create = 0; } From 494c4399ef3bbc1efa4bd7f2f36454a5f4ef9e64 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Dec 2017 08:38:30 -0800 Subject: [PATCH 2084/2608] writeback: synchronize sync(2) against cgroup writeback membership switches [ Upstream commit 7fc5854f8c6efae9e7624970ab49a1eac2faefb1 ] sync_inodes_sb() can race against cgwb (cgroup writeback) membership switches and fail to writeback some inodes. For example, if an inode switches to another wb while sync_inodes_sb() is in progress, the new wb might not be visible to bdi_split_work_to_wbs() at all or the inode might jump from a wb which hasn't issued writebacks yet to one which already has. This patch adds backing_dev_info->wb_switch_rwsem to synchronize cgwb switch path against sync_inodes_sb() so that sync_inodes_sb() is guaranteed to see all the target wbs and inodes can't jump wbs to escape syncing. v2: Fixed misplaced rwsem init. Spotted by Jiufei. Signed-off-by: Tejun Heo Reported-by: Jiufei Xue Link: http://lkml.kernel.org/r/dc694ae2-f07f-61e1-7097-7c8411cee12d@gmail.com Acked-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- fs/fs-writeback.c | 40 ++++++++++++++++++++++++++++++-- include/linux/backing-dev-defs.h | 1 + mm/backing-dev.c | 1 + 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3244932f4d5c..6a76616c9401 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -331,11 +331,22 @@ struct inode_switch_wbs_context { struct work_struct work; }; +static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) +{ + down_write(&bdi->wb_switch_rwsem); +} + +static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) +{ + up_write(&bdi->wb_switch_rwsem); +} + static void inode_switch_wbs_work_fn(struct work_struct *work) { struct inode_switch_wbs_context *isw = container_of(work, struct inode_switch_wbs_context, work); struct inode *inode = isw->inode; + struct backing_dev_info *bdi = inode_to_bdi(inode); struct address_space *mapping = inode->i_mapping; struct bdi_writeback *old_wb = inode->i_wb; struct bdi_writeback *new_wb = isw->new_wb; @@ -343,6 +354,12 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) bool switched = false; void **slot; + /* + * If @inode switches cgwb membership while sync_inodes_sb() is + * being issued, sync_inodes_sb() might miss it. Synchronize. + */ + down_read(&bdi->wb_switch_rwsem); + /* * By the time control reaches here, RCU grace period has passed * since I_WB_SWITCH assertion and all wb stat update transactions @@ -435,6 +452,8 @@ skip_switch: spin_unlock(&new_wb->list_lock); spin_unlock(&old_wb->list_lock); + up_read(&bdi->wb_switch_rwsem); + if (switched) { wb_wakeup(new_wb); wb_put(old_wb); @@ -475,9 +494,18 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) if (inode->i_state & I_WB_SWITCH) return; + /* + * Avoid starting new switches while sync_inodes_sb() is in + * progress. Otherwise, if the down_write protected issue path + * blocks heavily, we might end up starting a large number of + * switches which will block on the rwsem. + */ + if (!down_read_trylock(&bdi->wb_switch_rwsem)) + return; + isw = kzalloc(sizeof(*isw), GFP_ATOMIC); if (!isw) - return; + goto out_unlock; /* find and pin the new wb */ rcu_read_lock(); @@ -511,12 +539,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) * Let's continue after I_WB_SWITCH is guaranteed to be visible. */ call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); - return; + goto out_unlock; out_free: if (isw->new_wb) wb_put(isw->new_wb); kfree(isw); +out_unlock: + up_read(&bdi->wb_switch_rwsem); } /** @@ -894,6 +924,9 @@ fs_initcall(cgroup_writeback_init); #else /* CONFIG_CGROUP_WRITEBACK */ +static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } +static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } + static struct bdi_writeback * locked_inode_to_wb_and_lock_list(struct inode *inode) __releases(&inode->i_lock) @@ -2408,8 +2441,11 @@ void sync_inodes_sb(struct super_block *sb) return; WARN_ON(!rwsem_is_locked(&sb->s_umount)); + /* protect against inode wb switch, see inode_switch_wbs_work_fn() */ + bdi_down_write_wb_switch_rwsem(bdi); bdi_split_work_to_wbs(bdi, &work, false); wb_wait_for_completion(bdi, &done); + bdi_up_write_wb_switch_rwsem(bdi); wait_sb_inodes(sb); } diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 19240379637f..b186c4b464e0 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -165,6 +165,7 @@ struct backing_dev_info { struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ struct rb_root cgwb_congested_tree; /* their congested states */ struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */ + struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */ #else struct bdi_writeback_congested *wb_congested; #endif diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 9386c98dac12..6fa31754eadd 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -684,6 +684,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); bdi->cgwb_congested_tree = RB_ROOT; mutex_init(&bdi->cgwb_release_mutex); + init_rwsem(&bdi->wb_switch_rwsem); ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); if (!ret) { From 111b62c6b61e2bf5777147deb2e973b46a2ae3ed Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Sat, 12 Jan 2019 22:14:30 +0530 Subject: [PATCH 2085/2608] scsi: csiostor: fix NULL pointer dereference in csio_vport_set_state() [ Upstream commit fe35a40e675473eb65f2f5462b82770f324b5689 ] Assign fc_vport to ln->fc_vport before calling csio_fcoe_alloc_vnp() to avoid a NULL pointer dereference in csio_vport_set_state(). ln->fc_vport is dereferenced in csio_vport_set_state(). Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/csiostor/csio_attr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/csiostor/csio_attr.c b/drivers/scsi/csiostor/csio_attr.c index 2d1c4ebd40f9..6587f20cff1a 100644 --- a/drivers/scsi/csiostor/csio_attr.c +++ b/drivers/scsi/csiostor/csio_attr.c @@ -582,12 +582,12 @@ csio_vport_create(struct fc_vport *fc_vport, bool disable) } fc_vport_set_state(fc_vport, FC_VPORT_INITIALIZING); + ln->fc_vport = fc_vport; if (csio_fcoe_alloc_vnp(hw, ln)) goto error; *(struct csio_lnode **)fc_vport->dd_data = ln; - ln->fc_vport = fc_vport; if (!fc_vport->node_name) fc_vport->node_name = wwn_to_u64(csio_ln_wwnn(ln)); if (!fc_vport->port_name) From aa83556a6f0f79ef44c49aa1998962ac8c5792d5 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Mon, 21 Jan 2019 17:26:41 +0900 Subject: [PATCH 2086/2608] net: altera_tse: fix connect_local_phy error path [ Upstream commit 17b42a20d7ca59377788c6a2409e77569570cc10 ] The connect_local_phy should return NULL (not negative errno) on error, since its caller expects it. Signed-off-by: Atsushi Nemoto Acked-by: Thor Thayer Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/altera/altera_tse_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 527908c7e384..84def1ff6cb6 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -714,8 +714,10 @@ static struct phy_device *connect_local_phy(struct net_device *dev) phydev = phy_connect(dev, phy_id_fmt, &altera_tse_adjust_link, priv->phy_iface); - if (IS_ERR(phydev)) + if (IS_ERR(phydev)) { netdev_err(dev, "Could not attach to PHY\n"); + phydev = NULL; + } } else { int ret; From 903162bc17938fd003313ff9607983041f486a8e Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Tue, 15 Jan 2019 00:51:42 +0000 Subject: [PATCH 2087/2608] hv_netvsc: Fix ethtool change hash key error [ Upstream commit b4a10c750424e01b5e37372fef0a574ebf7b56c3 ] Hyper-V hosts require us to disable RSS before changing RSS key, otherwise the changing request will fail. This patch fixes the coding error. Fixes: ff4a44199012 ("netvsc: allow get/set of RSS indirection table") Reported-by: Wei Hu Signed-off-by: Haiyang Zhang Reviewed-by: Michael Kelley [sl: fix up subject line] Signed-off-by: Sasha Levin --- drivers/net/hyperv/rndis_filter.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 17025d46bdac..fc1d5e14d83e 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -711,8 +711,8 @@ cleanup: return ret; } -int rndis_filter_set_rss_param(struct rndis_device *rdev, - const u8 *rss_key) +static int rndis_set_rss_param_msg(struct rndis_device *rdev, + const u8 *rss_key, u16 flag) { struct net_device *ndev = rdev->ndev; struct rndis_request *request; @@ -741,7 +741,7 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev, rssp->hdr.type = NDIS_OBJECT_TYPE_RSS_PARAMETERS; rssp->hdr.rev = NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2; rssp->hdr.size = sizeof(struct ndis_recv_scale_param); - rssp->flag = 0; + rssp->flag = flag; rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 | NDIS_HASH_TCP_IPV4 | NDIS_HASH_IPV6 | NDIS_HASH_TCP_IPV6; @@ -766,9 +766,12 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev, wait_for_completion(&request->wait_event); set_complete = &request->response_msg.msg.set_complete; - if (set_complete->status == RNDIS_STATUS_SUCCESS) - memcpy(rdev->rss_key, rss_key, NETVSC_HASH_KEYLEN); - else { + if (set_complete->status == RNDIS_STATUS_SUCCESS) { + if (!(flag & NDIS_RSS_PARAM_FLAG_DISABLE_RSS) && + !(flag & NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED)) + memcpy(rdev->rss_key, rss_key, NETVSC_HASH_KEYLEN); + + } else { netdev_err(ndev, "Fail to set RSS parameters:0x%x\n", set_complete->status); ret = -EINVAL; @@ -779,6 +782,16 @@ cleanup: return ret; } +int rndis_filter_set_rss_param(struct rndis_device *rdev, + const u8 *rss_key) +{ + /* Disable RSS before change */ + rndis_set_rss_param_msg(rdev, rss_key, + NDIS_RSS_PARAM_FLAG_DISABLE_RSS); + + return rndis_set_rss_param_msg(rdev, rss_key, 0); +} + static int rndis_filter_query_device_link_status(struct rndis_device *dev, struct netvsc_device *net_device) { From 279412b3c0d27ac48042b793279400a71214c5b5 Mon Sep 17 00:00:00 2001 From: Zhang Run Date: Thu, 24 Jan 2019 13:48:49 +0800 Subject: [PATCH 2088/2608] net: usb: asix: ax88772_bind return error when hw_reset fail [ Upstream commit 6eea3527e68acc22483f4763c8682f223eb90029 ] The ax88772_bind() should return error code immediately when the PHY was not reset properly through ax88772a_hw_reset(). Otherwise, The asix_get_phyid() will block when get the PHY Identifier from the PHYSID1 MII registers through asix_mdio_read() due to the PHY isn't ready. Furthermore, it will produce a lot of error message cause system crash.As follows: asix 1-1:1.0 (unnamed net_device) (uninitialized): Failed to write reg index 0x0000: -71 asix 1-1:1.0 (unnamed net_device) (uninitialized): Failed to send software reset: ffffffb9 asix 1-1:1.0 (unnamed net_device) (uninitialized): Failed to write reg index 0x0000: -71 asix 1-1:1.0 (unnamed net_device) (uninitialized): Failed to enable software MII access asix 1-1:1.0 (unnamed net_device) (uninitialized): Failed to read reg index 0x0000: -71 asix 1-1:1.0 (unnamed net_device) (uninitialized): Failed to write reg index 0x0000: -71 asix 1-1:1.0 (unnamed net_device) (uninitialized): Failed to enable software MII access asix 1-1:1.0 (unnamed net_device) (uninitialized): Failed to read reg index 0x0000: -71 ... Signed-off-by: Zhang Run Reviewed-by: Yang Wei Tested-by: Marcel Ziswiler Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/asix_devices.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index b1b3d8f7e67d..d0c0ac0c3519 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -731,8 +731,13 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0); chipcode &= AX_CHIPCODE_MASK; - (chipcode == AX_AX88772_CHIPCODE) ? ax88772_hw_reset(dev, 0) : - ax88772a_hw_reset(dev, 0); + ret = (chipcode == AX_AX88772_CHIPCODE) ? ax88772_hw_reset(dev, 0) : + ax88772a_hw_reset(dev, 0); + + if (ret < 0) { + netdev_dbg(dev->net, "Failed to reset AX88772: %d\n", ret); + return ret; + } /* Read PHYID register *AFTER* the PHY was reset properly */ phyid = asix_get_phyid(dev); From 8a1e11f6ab05cf04f625bf48212dd00ee1f5ed58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Thu, 24 Jan 2019 03:07:02 -0800 Subject: [PATCH 2089/2608] net: dev_is_mac_header_xmit() true for ARPHRD_RAWIP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3b707c3008cad04604c1f50e39f456621821c414 ] __bpf_redirect() and act_mirred checks this boolean to determine whether to prefix an ethernet header. Signed-off-by: Maciej Żenczykowski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/if_arp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 3355efc89781..4125f60ee53b 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -54,6 +54,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev) case ARPHRD_IPGRE: case ARPHRD_VOID: case ARPHRD_NONE: + case ARPHRD_RAWIP: return false; default: return true; From 29bfe05143694acf96b0737203113277b2aa7bfb Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 24 Jan 2019 11:17:01 -0600 Subject: [PATCH 2090/2608] ibmveth: Do not process frames after calling napi_reschedule [ Upstream commit e95d22c69b2c130ccce257b84daf283fd82d611e ] The IBM virtual ethernet driver's polling function continues to process frames after rescheduling NAPI, resulting in a warning if it exhausted its budget. Do not restart polling after calling napi_reschedule. Instead let frames be processed in the following instance. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmveth.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 6c05819d995e..754dff4c1771 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1314,7 +1314,6 @@ static int ibmveth_poll(struct napi_struct *napi, int budget) unsigned long lpar_rc; u16 mss = 0; -restart_poll: while (frames_processed < budget) { if (!ibmveth_rxq_pending_buffer(adapter)) break; @@ -1402,7 +1401,6 @@ restart_poll: napi_reschedule(napi)) { lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); - goto restart_poll; } } From e9509697fd674da70401511d4b1042630d3dfaae Mon Sep 17 00:00:00 2001 From: Balaji Pothunoori Date: Mon, 21 Jan 2019 12:30:43 +0530 Subject: [PATCH 2091/2608] mac80211: don't initiate TDLS connection if station is not associated to AP [ Upstream commit 7ed5285396c257fd4070b1e29e7b2341aae2a1ce ] Following call trace is observed while adding TDLS peer entry in driver during TDLS setup. Call Trace: [] dump_stack+0x47/0x61 [] __warn+0xe2/0x100 [] ? sta_apply_parameters+0x49f/0x550 [mac80211] [] warn_slowpath_null+0x25/0x30 [] sta_apply_parameters+0x49f/0x550 [mac80211] [] ? sta_info_alloc+0x1c2/0x450 [mac80211] [] ieee80211_add_station+0xe3/0x160 [mac80211] [] nl80211_new_station+0x273/0x420 [] genl_rcv_msg+0x219/0x3c0 [] ? genl_rcv+0x30/0x30 [] netlink_rcv_skb+0x8e/0xb0 [] genl_rcv+0x1c/0x30 [] netlink_unicast+0x13a/0x1d0 [] netlink_sendmsg+0x2d8/0x390 [] sock_sendmsg+0x2d/0x40 [] ___sys_sendmsg+0x1d9/0x1e0 Fixing this by allowing TDLS setup request only when we have completed association. Signed-off-by: Balaji Pothunoori Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/cfg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ebc8045ddee6..150dd2160cef 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1466,6 +1466,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) sta->sta.tdls = true; + if (sta->sta.tdls && sdata->vif.type == NL80211_IFTYPE_STATION && + !sdata->u.mgd.associated) + return -EINVAL; + err = sta_apply_parameters(local, sta, params); if (err) { sta_info_free(local, sta); From 3804637933575ff9774d3bd0398b8d6bd4aed383 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 24 Jan 2019 19:19:57 +0100 Subject: [PATCH 2092/2608] mac80211: Add attribute aligned(2) to struct 'action' [ Upstream commit 7c53eb5d87bc21464da4268c3c0c47457b6d9c9b ] During refactor in commit 9e478066eae4 ("mac80211: fix MU-MIMO follow-MAC mode") a new struct 'action' was declared with packed attribute as: struct { struct ieee80211_hdr_3addr hdr; u8 category; u8 action_code; } __packed action; But since struct 'ieee80211_hdr_3addr' is declared with an aligned keyword as: struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; u8 addr1[ETH_ALEN]; u8 addr2[ETH_ALEN]; u8 addr3[ETH_ALEN]; __le16 seq_ctrl; u8 addr4[ETH_ALEN]; } __packed __aligned(2); Solve the ambiguity of placing aligned structure in a packed one by adding the aligned(2) attribute to struct 'action'. This removes the following warning (W=1): net/mac80211/rx.c:234:2: warning: alignment 1 of 'struct ' is less than 2 [-Wpacked-not-aligned] Cc: Johannes Berg Suggested-by: Johannes Berg Signed-off-by: Mathieu Malaterre Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index dfc2af6833af..1512e547a5e0 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -206,7 +206,7 @@ static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr_3addr hdr; u8 category; u8 action_code; - } __packed action; + } __packed __aligned(2) action; if (!sdata) return; From 4b26d66aaece0efc8eccaf351c4efe9c5219dd63 Mon Sep 17 00:00:00 2001 From: Chaitanya Tata Date: Sat, 19 Jan 2019 03:17:47 +0530 Subject: [PATCH 2093/2608] cfg80211: extend range deviation for DMG [ Upstream commit 93183bdbe73bbdd03e9566c8dc37c9d06b0d0db6 ] Recently, DMG frequency bands have been extended till 71GHz, so extend the range check till 20GHz (45-71GHZ), else some channels will be marked as disabled. Signed-off-by: Chaitanya Tata Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/reg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index bd91de416035..ebfbc3f1be42 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -759,7 +759,7 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd) * definitions (the "2.4 GHz band", the "5 GHz band" and the "60GHz band"), * however it is safe for now to assume that a frequency rule should not be * part of a frequency's band if the start freq or end freq are off by more - * than 2 GHz for the 2.4 and 5 GHz bands, and by more than 10 GHz for the + * than 2 GHz for the 2.4 and 5 GHz bands, and by more than 20 GHz for the * 60 GHz band. * This resolution can be lowered and should be considered as we add * regulatory rule support for other "bands". @@ -774,7 +774,7 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, * with the Channel starting frequency above 45 GHz. */ u32 limit = freq_khz > 45 * ONE_GHZ_IN_KHZ ? - 10 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ; + 20 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ; if (abs(freq_khz - freq_range->start_freq_khz) <= limit) return true; if (abs(freq_khz - freq_range->end_freq_khz) <= limit) From f20cf376cc63d887a989cd4521bcccd669ebd3eb Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 22 Jan 2019 10:25:13 +0000 Subject: [PATCH 2094/2608] svm: Fix AVIC incomplete IPI emulation [ Upstream commit bb218fbcfaaa3b115d4cd7a43c0ca164f3a96e57 ] In case of incomplete IPI with invalid interrupt type, the current SVM driver does not properly emulate the IPI, and fails to boot FreeBSD guests with multiple vcpus when enabling AVIC. Fix this by update APIC ICR high/low registers, which also emulate sending the IPI. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/svm.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 656ac12f5439..a94beaecd3e0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4006,25 +4006,14 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) kvm_lapic_reg_write(apic, APIC_ICR, icrl); break; case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: { - int i; - struct kvm_vcpu *vcpu; - struct kvm *kvm = svm->vcpu.kvm; struct kvm_lapic *apic = svm->vcpu.arch.apic; /* - * At this point, we expect that the AVIC HW has already - * set the appropriate IRR bits on the valid target - * vcpus. So, we just need to kick the appropriate vcpu. + * Update ICR high and low, then emulate sending IPI, + * which is handled when writing APIC_ICR. */ - kvm_for_each_vcpu(i, vcpu, kvm) { - bool m = kvm_apic_match_dest(vcpu, apic, - icrl & KVM_APIC_SHORT_MASK, - GET_APIC_DEST_FIELD(icrh), - icrl & KVM_APIC_DEST_MASK); - - if (m && !avic_vcpu_is_running(vcpu)) - kvm_vcpu_wake_up(vcpu); - } + kvm_lapic_reg_write(apic, APIC_ICR2, icrh); + kvm_lapic_reg_write(apic, APIC_ICR, icrl); break; } case AVIC_IPI_FAILURE_INVALID_TARGET: From 7ce9d889aa00d88dacf9a88a293cea555a148a7f Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 7 Jan 2019 19:44:51 +0100 Subject: [PATCH 2095/2608] KVM: nSVM: clear events pending from svm_complete_interrupts() when exiting to L1 [ Upstream commit 619ad846fc3452adaf71ca246c5aa711e2055398 ] kvm-unit-tests' eventinj "NMI failing on IDT" test results in NMI being delivered to the host (L1) when it's running nested. The problem seems to be: svm_complete_interrupts() raises 'nmi_injected' flag but later we decide to reflect EXIT_NPF to L1. The flag remains pending and we do NMI injection upon entry so it got delivered to L1 instead of L2. It seems that VMX code solves the same issue in prepare_vmcs12(), this was introduced with code refactoring in commit 5f3d5799974b ("KVM: nVMX: Rework event injection and recovery"). Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/svm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a94beaecd3e0..c387047e926a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2929,6 +2929,14 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) kvm_mmu_reset_context(&svm->vcpu); kvm_mmu_load(&svm->vcpu); + /* + * Drop what we picked up for L2 via svm_complete_interrupts() so it + * doesn't end up in L1. + */ + svm->vcpu.arch.nmi_injected = false; + kvm_clear_exception_queue(&svm->vcpu); + kvm_clear_interrupt_queue(&svm->vcpu); + return 0; } From 32f31fa571083841f07bed67e46d0c5c5369b549 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Thu, 28 Sep 2017 09:33:39 -0400 Subject: [PATCH 2096/2608] powerpc: Always initialize input array when calling epapr_hypercall() commit 186b8f1587c79c2fa04bfa392fdf084443e398c1 upstream. Several callers to epapr_hypercall() pass an uninitialized stack allocated array for the input arguments, presumably because they have no input arguments. However this can produce errors like this one arch/powerpc/include/asm/epapr_hcalls.h:470:42: error: 'in' may be used uninitialized in this function [-Werror=maybe-uninitialized] unsigned long register r3 asm("r3") = in[0]; ~~^~~ Fix callers to this function to always zero-initialize the input arguments array to prevent this. Signed-off-by: Seth Forshee Signed-off-by: Michael Ellerman Cc: "A. Wilcox" Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/epapr_hcalls.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index 334459ad145b..90863245df53 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -508,7 +508,7 @@ static unsigned long epapr_hypercall(unsigned long *in, static inline long epapr_hypercall0_1(unsigned int nr, unsigned long *r2) { - unsigned long in[8]; + unsigned long in[8] = {0}; unsigned long out[8]; unsigned long r; @@ -520,7 +520,7 @@ static inline long epapr_hypercall0_1(unsigned int nr, unsigned long *r2) static inline long epapr_hypercall0(unsigned int nr) { - unsigned long in[8]; + unsigned long in[8] = {0}; unsigned long out[8]; return epapr_hypercall(in, out, nr); @@ -528,7 +528,7 @@ static inline long epapr_hypercall0(unsigned int nr) static inline long epapr_hypercall1(unsigned int nr, unsigned long p1) { - unsigned long in[8]; + unsigned long in[8] = {0}; unsigned long out[8]; in[0] = p1; @@ -538,7 +538,7 @@ static inline long epapr_hypercall1(unsigned int nr, unsigned long p1) static inline long epapr_hypercall2(unsigned int nr, unsigned long p1, unsigned long p2) { - unsigned long in[8]; + unsigned long in[8] = {0}; unsigned long out[8]; in[0] = p1; @@ -549,7 +549,7 @@ static inline long epapr_hypercall2(unsigned int nr, unsigned long p1, static inline long epapr_hypercall3(unsigned int nr, unsigned long p1, unsigned long p2, unsigned long p3) { - unsigned long in[8]; + unsigned long in[8] = {0}; unsigned long out[8]; in[0] = p1; @@ -562,7 +562,7 @@ static inline long epapr_hypercall4(unsigned int nr, unsigned long p1, unsigned long p2, unsigned long p3, unsigned long p4) { - unsigned long in[8]; + unsigned long in[8] = {0}; unsigned long out[8]; in[0] = p1; From 7a8ccb27436212ff454f25f1a7b49b6f498eef35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sun, 10 Feb 2019 18:31:07 +0100 Subject: [PATCH 2097/2608] mmc: spi: Fix card detection during probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c9bd505dbd9d3dc80c496f88eafe70affdcf1ba6 upstream. When using the mmc_spi driver with a card-detect pin, I noticed that the card was not detected immediately after probe, but only after it was unplugged and plugged back in (and the CD IRQ fired). The call tree looks something like this: mmc_spi_probe mmc_add_host mmc_start_host _mmc_detect_change mmc_schedule_delayed_work(&host->detect, 0) mmc_rescan host->bus_ops->detect(host) mmc_detect _mmc_detect_card_removed host->ops->get_cd(host) mmc_gpio_get_cd -> -ENOSYS (ctx->cd_gpio not set) mmc_gpiod_request_cd ctx->cd_gpio = desc To fix this issue, call mmc_detect_change after the card-detect GPIO/IRQ is registered. Signed-off-by: Jonathan Neuschäfer Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/mmc_spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 476e53d30128..67f6bd24a9d0 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1447,6 +1447,7 @@ static int mmc_spi_probe(struct spi_device *spi) mmc->caps &= ~MMC_CAP_NEEDS_POLL; mmc_gpiod_request_cd_irq(mmc); } + mmc_detect_change(mmc, 0); if (host->pdata && host->pdata->flags & MMC_SPI_USE_RO_GPIO) { has_ro = true; From f4a2a74704d557228d215b0e9065de0baa2a8dc8 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Mon, 18 Feb 2019 20:45:40 +0300 Subject: [PATCH 2098/2608] mmc: tmio_mmc_core: don't claim spurious interrupts commit 5c27ff5db1491a947264d6d4e4cbe43ae6535bae upstream. I have encountered an interrupt storm during the eMMC chip probing (and the chip finally didn't get detected). It turned out that U-Boot left the DMAC interrupts enabled while the Linux driver didn't use those. The SDHI driver's interrupt handler somehow assumes that, even if an SDIO interrupt didn't happen, it should return IRQ_HANDLED. I think that if none of the enabled interrupts happened and got handled, we should return IRQ_NONE -- that way the kernel IRQ code recoginizes a spurious interrupt and masks it off pretty quickly... Fixes: 7729c7a232a9 ("mmc: tmio: Provide separate interrupt handlers") Signed-off-by: Sergei Shtylyov Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Reviewed-by: Simon Horman Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/tmio_mmc_core.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index de1562f27fdb..1fbf50c83c73 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -688,7 +688,7 @@ static bool __tmio_mmc_sdcard_irq(struct tmio_mmc_host *host, int ireg, return false; } -static void __tmio_mmc_sdio_irq(struct tmio_mmc_host *host) +static bool __tmio_mmc_sdio_irq(struct tmio_mmc_host *host) { struct mmc_host *mmc = host->mmc; struct tmio_mmc_data *pdata = host->pdata; @@ -696,7 +696,7 @@ static void __tmio_mmc_sdio_irq(struct tmio_mmc_host *host) unsigned int sdio_status; if (!(pdata->flags & TMIO_MMC_SDIO_IRQ)) - return; + return false; status = sd_ctrl_read16(host, CTL_SDIO_STATUS); ireg = status & TMIO_SDIO_MASK_ALL & ~host->sdio_irq_mask; @@ -709,6 +709,8 @@ static void __tmio_mmc_sdio_irq(struct tmio_mmc_host *host) if (mmc->caps & MMC_CAP_SDIO_IRQ && ireg & TMIO_SDIO_STAT_IOIRQ) mmc_signal_sdio_irq(mmc); + + return ireg; } irqreturn_t tmio_mmc_irq(int irq, void *devid) @@ -727,9 +729,10 @@ irqreturn_t tmio_mmc_irq(int irq, void *devid) if (__tmio_mmc_sdcard_irq(host, ireg, status)) return IRQ_HANDLED; - __tmio_mmc_sdio_irq(host); + if (__tmio_mmc_sdio_irq(host)) + return IRQ_HANDLED; - return IRQ_HANDLED; + return IRQ_NONE; } EXPORT_SYMBOL_GPL(tmio_mmc_irq); From 3094b3f769089729ffbd80356bab1b2d25bd405e Mon Sep 17 00:00:00 2001 From: Takeshi Saito Date: Thu, 21 Feb 2019 20:38:05 +0100 Subject: [PATCH 2099/2608] mmc: tmio: fix access width of Block Count Register commit 5603731a15ef9ca317c122cc8c959f1dee1798b4 upstream. In R-Car Gen2 or later, the maximum number of transfer blocks are changed from 0xFFFF to 0xFFFFFFFF. Therefore, Block Count Register should use iowrite32(). If another system (U-boot, Hypervisor OS, etc) uses bit[31:16], this value will not be cleared. So, SD/MMC card initialization fails. So, check for the bigger register and use apropriate write. Also, mark the register as extended on Gen2. Signed-off-by: Takeshi Saito [wsa: use max_blk_count in if(), add Gen2, update commit message] Signed-off-by: Wolfram Sang Cc: stable@kernel.org Reviewed-by: Simon Horman [Ulf: Fixed build error] Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/renesas_sdhi_sys_dmac.c | 1 + drivers/mmc/host/tmio_mmc.h | 5 +++++ drivers/mmc/host/tmio_mmc_core.c | 6 +++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/renesas_sdhi_sys_dmac.c b/drivers/mmc/host/renesas_sdhi_sys_dmac.c index df4465439e13..5dd31a2a877a 100644 --- a/drivers/mmc/host/renesas_sdhi_sys_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_sys_dmac.c @@ -68,6 +68,7 @@ static const struct renesas_sdhi_of_data of_rcar_gen2_compatible = { .scc_offset = 0x0300, .taps = rcar_gen2_scc_taps, .taps_num = ARRAY_SIZE(rcar_gen2_scc_taps), + .max_blk_count = 0xffffffff, }; /* Definitions for sampling clocks */ diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index 3e6ff8921440..fe10de349aeb 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -286,6 +286,11 @@ static inline void sd_ctrl_write32_as_16_and_16(struct tmio_mmc_host *host, writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift)); } +static inline void sd_ctrl_write32(struct tmio_mmc_host *host, int addr, u32 val) +{ + iowrite32(val, host->ctl + (addr << host->bus_shift)); +} + static inline void sd_ctrl_write32_rep(struct tmio_mmc_host *host, int addr, const u32 *buf, int count) { diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 1fbf50c83c73..2437fcde915a 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -761,7 +762,10 @@ static int tmio_mmc_start_data(struct tmio_mmc_host *host, /* Set transfer length / blocksize */ sd_ctrl_write16(host, CTL_SD_XFER_LEN, data->blksz); - sd_ctrl_write16(host, CTL_XFER_BLK_COUNT, data->blocks); + if (host->mmc->max_blk_count >= SZ_64K) + sd_ctrl_write32(host, CTL_XFER_BLK_COUNT, data->blocks); + else + sd_ctrl_write16(host, CTL_XFER_BLK_COUNT, data->blocks); tmio_mmc_start_dma(host, data); From 3fa9848e2f7bfb8d7d7ed1927dea1b0dc1c6454d Mon Sep 17 00:00:00 2001 From: BOUGH CHEN Date: Thu, 28 Feb 2019 10:15:42 +0000 Subject: [PATCH 2100/2608] mmc: sdhci-esdhc-imx: correct the fix of ERR004536 commit e30be063d6dbcc0f18b1eb25fa709fdef89201fb upstream. Commit 18094430d6b5 ("mmc: sdhci-esdhc-imx: add ADMA Length Mismatch errata fix") involve the fix of ERR004536, but the fix is incorrect. Double confirm with IC, need to clear the bit 7 of register 0x6c rather than set this bit 7. Here is the definition of bit 7 of 0x6c: 0: enable the new IC fix for ERR004536 1: do not use the IC fix, keep the same as before Find this issue on i.MX845s-evk board when enable CMDQ, and let system in heavy loading. root@imx8mmevk:~# dd if=/dev/mmcblk2 of=/dev/null bs=1M & root@imx8mmevk:~# memtester 1000M > /dev/zero & root@imx8mmevk:~# [ 139.897220] mmc2: cqhci: timeout for tag 16 [ 139.901417] mmc2: cqhci: ============ CQHCI REGISTER DUMP =========== [ 139.907862] mmc2: cqhci: Caps: 0x0000310a | Version: 0x00000510 [ 139.914311] mmc2: cqhci: Config: 0x00001001 | Control: 0x00000000 [ 139.920753] mmc2: cqhci: Int stat: 0x00000000 | Int enab: 0x00000006 [ 139.927193] mmc2: cqhci: Int sig: 0x00000006 | Int Coal: 0x00000000 [ 139.933634] mmc2: cqhci: TDL base: 0x7809c000 | TDL up32: 0x00000000 [ 139.940073] mmc2: cqhci: Doorbell: 0x00030000 | TCN: 0x00000000 [ 139.946518] mmc2: cqhci: Dev queue: 0x00010000 | Dev Pend: 0x00010000 [ 139.952967] mmc2: cqhci: Task clr: 0x00000000 | SSC1: 0x00011000 [ 139.959411] mmc2: cqhci: SSC2: 0x00000001 | DCMD rsp: 0x00000000 [ 139.965857] mmc2: cqhci: RED mask: 0xfdf9a080 | TERRI: 0x00000000 [ 139.972308] mmc2: cqhci: Resp idx: 0x0000002e | Resp arg: 0x00000900 [ 139.978761] mmc2: sdhci: ============ SDHCI REGISTER DUMP =========== [ 139.985214] mmc2: sdhci: Sys addr: 0xb2c19000 | Version: 0x00000002 [ 139.991669] mmc2: sdhci: Blk size: 0x00000200 | Blk cnt: 0x00000400 [ 139.998127] mmc2: sdhci: Argument: 0x40110400 | Trn mode: 0x00000033 [ 140.004618] mmc2: sdhci: Present: 0x01088a8f | Host ctl: 0x00000030 [ 140.011113] mmc2: sdhci: Power: 0x00000002 | Blk gap: 0x00000080 [ 140.017583] mmc2: sdhci: Wake-up: 0x00000008 | Clock: 0x0000000f [ 140.024039] mmc2: sdhci: Timeout: 0x0000008f | Int stat: 0x00000000 [ 140.030497] mmc2: sdhci: Int enab: 0x107f4000 | Sig enab: 0x107f4000 [ 140.036972] mmc2: sdhci: AC12 err: 0x00000000 | Slot int: 0x00000502 [ 140.043426] mmc2: sdhci: Caps: 0x07eb0000 | Caps_1: 0x8000b407 [ 140.049867] mmc2: sdhci: Cmd: 0x00002c1a | Max curr: 0x00ffffff [ 140.056314] mmc2: sdhci: Resp[0]: 0x00000900 | Resp[1]: 0xffffffff [ 140.062755] mmc2: sdhci: Resp[2]: 0x328f5903 | Resp[3]: 0x00d00f00 [ 140.069195] mmc2: sdhci: Host ctl2: 0x00000008 [ 140.073640] mmc2: sdhci: ADMA Err: 0x00000007 | ADMA Ptr: 0x7809c108 [ 140.080079] mmc2: sdhci: ============================================ [ 140.086662] mmc2: running CQE recovery Fixes: 18094430d6b5 ("mmc: sdhci-esdhc-imx: add ADMA Length Mismatch errata fix") Signed-off-by: Haibo Chen Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-esdhc-imx.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index c81de2f25281..59041f07b53c 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1077,11 +1077,12 @@ static void sdhci_esdhc_imx_hwinit(struct sdhci_host *host) writel(readl(host->ioaddr + SDHCI_HOST_CONTROL) | ESDHC_BURST_LEN_EN_INCR, host->ioaddr + SDHCI_HOST_CONTROL); + /* - * erratum ESDHC_FLAG_ERR004536 fix for MX6Q TO1.2 and MX6DL - * TO1.1, it's harmless for MX6SL - */ - writel(readl(host->ioaddr + 0x6c) | BIT(7), + * erratum ESDHC_FLAG_ERR004536 fix for MX6Q TO1.2 and MX6DL + * TO1.1, it's harmless for MX6SL + */ + writel(readl(host->ioaddr + 0x6c) & ~BIT(7), host->ioaddr + 0x6c); /* disable DLL_CTRL delay line settings */ From f5817069248630b3b7b17ebfcdee0b679c52be33 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 27 Feb 2019 21:29:52 +0100 Subject: [PATCH 2101/2608] mm: enforce min addr even if capable() in expand_downwards() commit 0a1d52994d440e21def1c2174932410b4f2a98a1 upstream. security_mmap_addr() does a capability check with current_cred(), but we can reach this code from contexts like a VFS write handler where current_cred() must not be used. This can be abused on systems without SMAP to make NULL pointer dereferences exploitable again. Fixes: 8869477a49c3 ("security: protect from stack expansion into low vm addresses") Cc: stable@kernel.org Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mmap.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 2398776195d2..00dab291e61d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2348,12 +2348,11 @@ int expand_downwards(struct vm_area_struct *vma, { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *prev; - int error; + int error = 0; address &= PAGE_MASK; - error = security_mmap_addr(address); - if (error) - return error; + if (address < mmap_min_addr) + return -EPERM; /* Enforce stack_guard_gap */ prev = vma->vm_prev; From 8b1852c5d2c0e406792b898c5ee1f39762c22c9d Mon Sep 17 00:00:00 2001 From: Michael Clark Date: Mon, 11 Feb 2019 17:38:29 +1300 Subject: [PATCH 2102/2608] MIPS: fix truncation in __cmpxchg_small for short values commit 94ee12b507db8b5876e31c9d6c9d84f556a4b49f upstream. __cmpxchg_small erroneously uses u8 for load comparison which can be either char or short. This patch changes the local variable to u32 which is sufficiently sized, as the loaded value is already masked and shifted appropriately. Using an integer size avoids any unnecessary canonicalization from use of non native widths. This patch is part of a series that adapts the MIPS small word atomics code for xchg and cmpxchg on short and char to RISC-V. Cc: RISC-V Patches Cc: Linux RISC-V Cc: Linux MIPS Signed-off-by: Michael Clark [paul.burton@mips.com: - Fix varialble typo per Jonas Gorski. - Consolidate load variable with other declarations.] Signed-off-by: Paul Burton Fixes: 3ba7f44d2b19 ("MIPS: cmpxchg: Implement 1 byte & 2 byte cmpxchg()") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/cmpxchg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/mips/kernel/cmpxchg.c b/arch/mips/kernel/cmpxchg.c index 0b9535bc2c53..6b2a4a902a98 100644 --- a/arch/mips/kernel/cmpxchg.c +++ b/arch/mips/kernel/cmpxchg.c @@ -54,10 +54,9 @@ unsigned long __xchg_small(volatile void *ptr, unsigned long val, unsigned int s unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size) { - u32 mask, old32, new32, load32; + u32 mask, old32, new32, load32, load; volatile u32 *ptr32; unsigned int shift; - u8 load; /* Check that ptr is naturally aligned */ WARN_ON((unsigned long)ptr & (size - 1)); From e01f09149d2dc540c0ef1c0bec48c45cd592ec14 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 1 Mar 2019 22:58:09 +0000 Subject: [PATCH 2103/2608] MIPS: eBPF: Fix icache flush end address commit d1a2930d8a992fb6ac2529449f81a0056e1b98d1 upstream. The MIPS eBPF JIT calls flush_icache_range() in order to ensure the icache observes the code that we just wrote. Unfortunately it gets the end address calculation wrong due to some bad pointer arithmetic. The struct jit_ctx target field is of type pointer to u32, and as such adding one to it will increment the address being pointed to by 4 bytes. Therefore in order to find the address of the end of the code we simply need to add the number of 4 byte instructions emitted, but we mistakenly add the number of instructions multiplied by 4. This results in the call to flush_icache_range() operating on a memory region 4x larger than intended, which is always wasteful and can cause crashes if we overrun into an unmapped page. Fix this by correcting the pointer arithmetic to remove the bogus multiplication, and use braces to remove the need for a set of brackets whilst also making it obvious that the target field is a pointer. Signed-off-by: Paul Burton Fixes: b6bd53f9c4e8 ("MIPS: Add missing file for eBPF JIT.") Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Martin KaFai Lau Cc: Song Liu Cc: Yonghong Song Cc: netdev@vger.kernel.org Cc: bpf@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- arch/mips/net/ebpf_jit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index dd537cba4449..8004bfcfb033 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -1971,7 +1971,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* Update the icache */ flush_icache_range((unsigned long)ctx.target, - (unsigned long)(ctx.target + ctx.idx * sizeof(u32))); + (unsigned long)&ctx.target[ctx.idx]); if (bpf_jit_enable > 1) /* Dump JIT code */ From 24e988f6fcae54009891c1bfdd4112fa5656af70 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 22 Feb 2019 17:17:04 -0800 Subject: [PATCH 2104/2608] x86/uaccess: Don't leak the AC flag into __put_user() value evaluation commit 2a418cf3f5f1caf911af288e978d61c9844b0695 upstream. When calling __put_user(foo(), ptr), the __put_user() macro would call foo() in between __uaccess_begin() and __uaccess_end(). If that code were buggy, then those bugs would be run without SMAP protection. Fortunately, there seem to be few instances of the problem in the kernel. Nevertheless, __put_user() should be fixed to avoid doing this. Therefore, evaluate __put_user()'s argument before setting AC. This issue was noticed when an objtool hack by Peter Zijlstra complained about genregs_get() and I compared the assembly output to the C source. [ bp: Massage commit message and fixed up whitespace. ] Fixes: 11f1a4b9755f ("x86: reorganize SMAP handling in user space accesses") Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Acked-by: Linus Torvalds Cc: Peter Zijlstra Cc: Brian Gerst Cc: Josh Poimboeuf Cc: Denys Vlasenko Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20190225125231.845656645@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/uaccess.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index aae77eb8491c..4111edb3188e 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -293,8 +293,7 @@ do { \ __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ break; \ case 8: \ - __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval, \ - errret); \ + __put_user_asm_u64(x, ptr, retval, errret); \ break; \ default: \ __put_user_bad(); \ @@ -440,8 +439,10 @@ do { \ #define __put_user_nocheck(x, ptr, size) \ ({ \ int __pu_err; \ + __typeof__(*(ptr)) __pu_val; \ + __pu_val = x; \ __uaccess_begin(); \ - __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ + __put_user_size(__pu_val, (ptr), (size), __pu_err, -EFAULT);\ __uaccess_end(); \ __builtin_expect(__pu_err, 0); \ }) From 99403097be0cbe12042775d9ca3a66f2018adc3e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 5 Mar 2019 17:58:03 +0100 Subject: [PATCH 2105/2608] Linux 4.14.105 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 967692b8941f..d5375891a7eb 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 104 +SUBLEVEL = 105 EXTRAVERSION = NAME = Petit Gorille From 85cf5519050d151a77273171f2b2faf0ce5af8eb Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 25 Jan 2019 12:53:07 +0530 Subject: [PATCH 2106/2608] cpufreq: Use struct kobj_attribute instead of struct global_attr commit 625c85a62cb7d3c79f6e16de3cfa972033658250 upstream. The cpufreq_global_kobject is created using kobject_create_and_add() helper, which assigns the kobj_type as dynamic_kobj_ktype and show/store routines are set to kobj_attr_show() and kobj_attr_store(). These routines pass struct kobj_attribute as an argument to the show/store callbacks. But all the cpufreq files created using the cpufreq_global_kobject expect the argument to be of type struct attribute. Things work fine currently as no one accesses the "attr" argument. We may not see issues even if the argument is used, as struct kobj_attribute has struct attribute as its first element and so they will both get same address. But this is logically incorrect and we should rather use struct kobj_attribute instead of struct global_attr in the cpufreq core and drivers and the show/store callbacks should take struct kobj_attribute as argument instead. This bug is caught using CFI CLANG builds in android kernel which catches mismatch in function prototypes for such callbacks. Reported-by: Donghee Han Reported-by: Sangkyu Kim Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq.c | 6 +++--- drivers/cpufreq/intel_pstate.c | 18 +++++++++--------- include/linux/cpufreq.h | 12 ++---------- 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 66c2790dcc5f..9f5c51cd67ad 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -554,13 +554,13 @@ EXPORT_SYMBOL_GPL(cpufreq_policy_transition_delay_us); * SYSFS INTERFACE * *********************************************************************/ static ssize_t show_boost(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%d\n", cpufreq_driver->boost_enabled); } -static ssize_t store_boost(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t count) +static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) { int ret, enable; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 114dfe67015b..5ebefa17d195 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -811,7 +811,7 @@ static void intel_pstate_update_policies(void) /************************** sysfs begin ************************/ #define show_one(file_name, object) \ static ssize_t show_##file_name \ - (struct kobject *kobj, struct attribute *attr, char *buf) \ + (struct kobject *kobj, struct kobj_attribute *attr, char *buf) \ { \ return sprintf(buf, "%u\n", global.object); \ } @@ -820,7 +820,7 @@ static ssize_t intel_pstate_show_status(char *buf); static int intel_pstate_update_status(const char *buf, size_t size); static ssize_t show_status(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { ssize_t ret; @@ -831,7 +831,7 @@ static ssize_t show_status(struct kobject *kobj, return ret; } -static ssize_t store_status(struct kobject *a, struct attribute *b, +static ssize_t store_status(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { char *p = memchr(buf, '\n', count); @@ -845,7 +845,7 @@ static ssize_t store_status(struct kobject *a, struct attribute *b, } static ssize_t show_turbo_pct(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { struct cpudata *cpu; int total, no_turbo, turbo_pct; @@ -871,7 +871,7 @@ static ssize_t show_turbo_pct(struct kobject *kobj, } static ssize_t show_num_pstates(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { struct cpudata *cpu; int total; @@ -892,7 +892,7 @@ static ssize_t show_num_pstates(struct kobject *kobj, } static ssize_t show_no_turbo(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { ssize_t ret; @@ -914,7 +914,7 @@ static ssize_t show_no_turbo(struct kobject *kobj, return ret; } -static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, +static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; @@ -961,7 +961,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, return count; } -static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, +static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; @@ -991,7 +991,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, return count; } -static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, +static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index cbf85c4c745f..cad1eb50d668 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -254,20 +254,12 @@ __ATTR(_name, 0644, show_##_name, store_##_name) static struct freq_attr _name = \ __ATTR(_name, 0200, NULL, store_##_name) -struct global_attr { - struct attribute attr; - ssize_t (*show)(struct kobject *kobj, - struct attribute *attr, char *buf); - ssize_t (*store)(struct kobject *a, struct attribute *b, - const char *c, size_t count); -}; - #define define_one_global_ro(_name) \ -static struct global_attr _name = \ +static struct kobj_attribute _name = \ __ATTR(_name, 0444, show_##_name, NULL) #define define_one_global_rw(_name) \ -static struct global_attr _name = \ +static struct kobj_attribute _name = \ __ATTR(_name, 0644, show_##_name, store_##_name) From 910ae4e055528090d7142e89b494e4c2f2478750 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Wed, 20 Feb 2019 11:43:17 +0100 Subject: [PATCH 2107/2608] USB: serial: option: add Telit ME910 ECM composition commit 6431866b6707d27151be381252d6eef13025cfce upstream. This patch adds Telit ME910 family ECM composition 0x1102. Signed-off-by: Daniele Palmas Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 8cdca3f7acaa..bf72245f1cea 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1151,6 +1151,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), .driver_info = NCTRL(0) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1102, 0xff), /* Telit ME910 (ECM) */ + .driver_info = NCTRL(0) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), From 4beb72360b435bba31d92dae8fb742b5f14b0b5e Mon Sep 17 00:00:00 2001 From: Ivan Mironov Date: Wed, 6 Feb 2019 21:14:13 +0500 Subject: [PATCH 2108/2608] USB: serial: cp210x: add ID for Ingenico 3070 commit dd9d3d86b08d6a106830364879c42c78db85389c upstream. Here is how this device appears in kernel log: usb 3-1: new full-speed USB device number 18 using xhci_hcd usb 3-1: New USB device found, idVendor=0b00, idProduct=3070 usb 3-1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 3-1: Product: Ingenico 3070 usb 3-1: Manufacturer: Silicon Labs usb 3-1: SerialNumber: 0001 Apparently this is a POS terminal with embedded USB-to-Serial converter. Cc: stable@vger.kernel.org Signed-off-by: Ivan Mironov Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index c931ae689a91..d8e6790ccffe 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -64,6 +64,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */ { USB_DEVICE(0x0908, 0x01FF) }, /* Siemens RUGGEDCOM USB Serial Console */ + { USB_DEVICE(0x0B00, 0x3070) }, /* Ingenico 3070 */ { USB_DEVICE(0x0BED, 0x1100) }, /* MEI (TM) Cashflow-SC Bill/Voucher Acceptor */ { USB_DEVICE(0x0BED, 0x1101) }, /* MEI series 2000 Combo Acceptor */ { USB_DEVICE(0x0FCF, 0x1003) }, /* Dynastream ANT development board */ From 888b943801226a5008feb602b44fb597992b2288 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Thu, 14 Feb 2019 19:45:33 +0000 Subject: [PATCH 2109/2608] USB: serial: ftdi_sio: add ID for Hjelmslund Electronics USB485 commit 8d7fa3d4ea3f0ca69554215e87411494e6346fdc upstream. This adds the USB ID of the Hjelmslund Electronics USB485 Iso stick. Signed-off-by: Mans Rullgard Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 2 ++ drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 385f2ae3be24..d45a2c352c98 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1020,6 +1020,8 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) }, { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) }, + /* EZPrototypes devices */ + { USB_DEVICE(EZPROTOTYPES_VID, HJELMSLUND_USB485_ISO_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 975d02666c5a..b863bedb55a1 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1308,6 +1308,12 @@ #define IONICS_VID 0x1c0c #define IONICS_PLUGCOMPUTER_PID 0x0102 +/* + * EZPrototypes (PID reseller) + */ +#define EZPROTOTYPES_VID 0x1c40 +#define HJELMSLUND_USB485_ISO_PID 0x0477 + /* * Dresden Elektronik Sensor Terminal Board */ From 6f06591fc56b0060714956885d120b8958fea33e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 12 Feb 2019 12:44:50 -0600 Subject: [PATCH 2110/2608] staging: comedi: ni_660x: fix missing break in switch statement commit 479826cc86118e0d87e5cefb3df5b748e0480924 upstream. Add missing break statement in order to prevent the code from falling through to the default case and return -EINVAL every time. This bug was found thanks to the ongoing efforts to enable -Wimplicit-fallthrough. Fixes: aa94f2888825 ("staging: comedi: ni_660x: tidy up ni_660x_set_pfi_routing()") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_660x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/comedi/drivers/ni_660x.c b/drivers/staging/comedi/drivers/ni_660x.c index 6aa755ad3953..b3a83f5e3a19 100644 --- a/drivers/staging/comedi/drivers/ni_660x.c +++ b/drivers/staging/comedi/drivers/ni_660x.c @@ -611,6 +611,7 @@ static int ni_660x_set_pfi_routing(struct comedi_device *dev, case NI_660X_PFI_OUTPUT_DIO: if (chan > 31) return -EINVAL; + break; default: return -EINVAL; } From 313de388865ca397fef61d592cfb364f981329fc Mon Sep 17 00:00:00 2001 From: Ajay Singh Date: Thu, 7 Feb 2019 11:28:58 +0000 Subject: [PATCH 2111/2608] staging: wilc1000: fix to set correct value for 'vif_num' commit dda037057a572f5c82ac2499eb4e6fb17600ba3e upstream. Set correct value in '->vif_num' for the total number of interfaces and set '->idx' value using 'i'. Fixes: 735bb39ca3be ("staging: wilc1000: simplify vif[i]->ndev accesses") Fixes: 0e490657c721 ("staging: wilc1000: Fix problem with wrong vif index") Cc: Suggested-by: Dan Carpenter Reviewed-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wilc1000/linux_wlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/wilc1000/linux_wlan.c b/drivers/staging/wilc1000/linux_wlan.c index 119f3459b5bb..2fc6426c3819 100644 --- a/drivers/staging/wilc1000/linux_wlan.c +++ b/drivers/staging/wilc1000/linux_wlan.c @@ -1238,8 +1238,8 @@ int wilc_netdev_init(struct wilc **wilc, struct device *dev, int io_type, vif->wilc = *wilc; vif->ndev = ndev; wl->vif[i] = vif; - wl->vif_num = i; - vif->idx = wl->vif_num; + wl->vif_num = i + 1; + vif->idx = i; ndev->netdev_ops = &wilc_netdev_ops; From acdaa51ac269930ec757065b3f433f6bb943ffb9 Mon Sep 17 00:00:00 2001 From: Qing Xia Date: Fri, 1 Feb 2019 14:59:46 +0800 Subject: [PATCH 2112/2608] staging: android: ion: fix sys heap pool's gfp_flags commit 9bcf065e28122588a6cbee08cf847826dacbb438 upstream. In the first loop, gfp_flags will be modified to high_order_gfp_flags, and there will be no chance to change back to low_order_gfp_flags. Fixes: e7f63771b60e ("ION: Sys_heap: Add cached pool to spead up cached buffer alloc") Signed-off-by: Qing Xia Cc: stable Signed-off-by: Jing Xia Reviewed-by: Yuming Han Reviewed-by: Zhaoyang Huang Reviewed-by: Orson Zhai Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion_system_heap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c index b6ece18e6a88..e64db42aeb1e 100644 --- a/drivers/staging/android/ion/ion_system_heap.c +++ b/drivers/staging/android/ion/ion_system_heap.c @@ -298,10 +298,10 @@ static int ion_system_heap_create_pools(struct ion_page_pool **pools, bool cached) { int i; - gfp_t gfp_flags = low_order_gfp_flags; for (i = 0; i < NUM_ORDERS; i++) { struct ion_page_pool *pool; + gfp_t gfp_flags = low_order_gfp_flags; if (orders[i] > 4) gfp_flags = high_order_gfp_flags; From 412a0e856464b3a063afa721bb928fa0257a9752 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 3 Mar 2019 07:34:57 +0000 Subject: [PATCH 2113/2608] ip6mr: Do not call __IP6_INC_STATS() from preemptible context [ Upstream commit 87c11f1ddbbad38ad8bad47af133a8208985fbdf ] Similar to commit 44f49dd8b5a6 ("ipmr: fix possible race resulting from improper usage of IP_INC_STATS_BH() in preemptible context."), we cannot assume preemption is disabled when incrementing the counter and accessing a per-CPU variable. Preemption can be enabled when we add a route in process context that corresponds to packets stored in the unresolved queue, which are then forwarded using this route [1]. Fix this by using IP6_INC_STATS() which takes care of disabling preemption on architectures where it is needed. [1] [ 157.451447] BUG: using __this_cpu_add() in preemptible [00000000] code: smcrouted/2314 [ 157.460409] caller is ip6mr_forward2+0x73e/0x10e0 [ 157.460434] CPU: 3 PID: 2314 Comm: smcrouted Not tainted 5.0.0-rc7-custom-03635-g22f2712113f1 #1336 [ 157.460449] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016 [ 157.460461] Call Trace: [ 157.460486] dump_stack+0xf9/0x1be [ 157.460553] check_preemption_disabled+0x1d6/0x200 [ 157.460576] ip6mr_forward2+0x73e/0x10e0 [ 157.460705] ip6_mr_forward+0x9a0/0x1510 [ 157.460771] ip6mr_mfc_add+0x16b3/0x1e00 [ 157.461155] ip6_mroute_setsockopt+0x3cb/0x13c0 [ 157.461384] do_ipv6_setsockopt.isra.8+0x348/0x4060 [ 157.462013] ipv6_setsockopt+0x90/0x110 [ 157.462036] rawv6_setsockopt+0x4a/0x120 [ 157.462058] __sys_setsockopt+0x16b/0x340 [ 157.462198] __x64_sys_setsockopt+0xbf/0x160 [ 157.462220] do_syscall_64+0x14d/0x610 [ 157.462349] entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 0912ea38de61 ("[IPV6] MROUTE: Add stats in multicast routing module method ip6_mr_forward().") Signed-off-by: Ido Schimmel Reported-by: Amit Cohen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6mr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index b2fdb3fdd217..459f282d90e1 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2002,10 +2002,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTFORWDATAGRAMS); - __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTOCTETS, skb->len); + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTOCTETS, skb->len); return dst_output(net, sk, skb); } From c031a613ff80001c7d02bcbe431509e009d7bde2 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 1 Mar 2019 19:53:57 +0100 Subject: [PATCH 2114/2608] net: dsa: mv88e6xxx: handle unknown duplex modes gracefully in mv88e6xxx_port_set_duplex [ Upstream commit c6195a8bdfc62a7cecf7df685e64847a4b700275 ] When testing another issue I faced the problem that mv88e6xxx_port_setup_mac() failed due to DUPLEX_UNKNOWN being passed as argument to mv88e6xxx_port_set_duplex(). We should handle this case gracefully and return -EOPNOTSUPP, like e.g. mv88e6xxx_port_set_speed() is doing it. Fixes: 7f1ae07b51e8 ("net: dsa: mv88e6xxx: add port duplex setter") Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mv88e6xxx/port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index a7801f6668a5..2cffecfe86e3 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -165,7 +165,7 @@ int mv88e6xxx_port_set_duplex(struct mv88e6xxx_chip *chip, int port, int dup) /* normal duplex detection */ break; default: - return -EINVAL; + return -EOPNOTSUPP; } err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_MAC_CTL, reg); From 306bbaeb077068141d472b922ae1adf7ab81fd72 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 2 Mar 2019 10:34:55 +0800 Subject: [PATCH 2115/2608] net-sysfs: Fix mem leak in netdev_register_kobject [ Upstream commit 895a5e96dbd6386c8e78e5b78e067dcc67b7f0ab ] syzkaller report this: BUG: memory leak unreferenced object 0xffff88837a71a500 (size 256): comm "syz-executor.2", pid 9770, jiffies 4297825125 (age 17.843s) hex dump (first 32 bytes): 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... ff ff ff ff ff ff ff ff 20 c0 ef 86 ff ff ff ff ........ ....... backtrace: [<00000000db12624b>] netdev_register_kobject+0x124/0x2e0 net/core/net-sysfs.c:1751 [<00000000dc49a994>] register_netdevice+0xcc1/0x1270 net/core/dev.c:8516 [<00000000e5f3fea0>] tun_set_iff drivers/net/tun.c:2649 [inline] [<00000000e5f3fea0>] __tun_chr_ioctl+0x2218/0x3d20 drivers/net/tun.c:2883 [<000000001b8ac127>] vfs_ioctl fs/ioctl.c:46 [inline] [<000000001b8ac127>] do_vfs_ioctl+0x1a5/0x10e0 fs/ioctl.c:690 [<0000000079b269f8>] ksys_ioctl+0x89/0xa0 fs/ioctl.c:705 [<00000000de649beb>] __do_sys_ioctl fs/ioctl.c:712 [inline] [<00000000de649beb>] __se_sys_ioctl fs/ioctl.c:710 [inline] [<00000000de649beb>] __x64_sys_ioctl+0x74/0xb0 fs/ioctl.c:710 [<000000007ebded1e>] do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:290 [<00000000db315d36>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<00000000115be9bb>] 0xffffffffffffffff It should call kset_unregister to free 'dev->queues_kset' in error path of register_queue_kobjects, otherwise will cause a mem leak. Reported-by: Hulk Robot Fixes: 1d24eb4815d1 ("xps: Transmit Packet Steering") Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/net-sysfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 8f17724a173c..c6a2655cc28a 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1402,6 +1402,9 @@ static int register_queue_kobjects(struct net_device *dev) error: netdev_queue_update_kobjects(dev, txq, 0); net_rx_queue_update_kobjects(dev, rxq, 0); +#ifdef CONFIG_SYSFS + kset_unregister(dev->queues_kset); +#endif return error; } From 094a60b28e8f391100294bcf2e0373e6272d3d8d Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 4 Mar 2019 15:00:03 +0800 Subject: [PATCH 2116/2608] sky2: Disable MSI on Dell Inspiron 1545 and Gateway P-79 [ Upstream commit b33b7cd6fd86478dd2890a9abeb6f036aa01fdf7 ] Some sky2 chips fire IRQ after S3, before the driver is fully resumed: [ 686.804877] do_IRQ: 1.37 No irq handler for vector This is likely a platform bug that device isn't fully quiesced during S3. Use MSI-X, maskable MSI or INTx can prevent this issue from happening. Since MSI-X and maskable MSI are not supported by this device, fallback to use INTx on affected platforms. BugLink: https://bugs.launchpad.net/bugs/1807259 BugLink: https://bugs.launchpad.net/bugs/1809843 Signed-off-by: Kai-Heng Feng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/sky2.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 3954bc1d2333..cf6f58889038 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -46,6 +46,7 @@ #include #include #include +#include #include @@ -93,7 +94,7 @@ static int copybreak __read_mostly = 128; module_param(copybreak, int, 0); MODULE_PARM_DESC(copybreak, "Receive copy threshold"); -static int disable_msi = 0; +static int disable_msi = -1; module_param(disable_msi, int, 0); MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)"); @@ -4931,6 +4932,24 @@ static const char *sky2_name(u8 chipid, char *buf, int sz) return buf; } +static const struct dmi_system_id msi_blacklist[] = { + { + .ident = "Dell Inspiron 1545", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1545"), + }, + }, + { + .ident = "Gateway P-79", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Gateway"), + DMI_MATCH(DMI_PRODUCT_NAME, "P-79"), + }, + }, + {} +}; + static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *dev, *dev1; @@ -5042,6 +5061,9 @@ static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_free_pci; } + if (disable_msi == -1) + disable_msi = !!dmi_check_system(msi_blacklist); + if (!disable_msi && pci_enable_msi(pdev) == 0) { err = sky2_test_msi(hw); if (err) { From 1c69361496c97501f205d4ef2bc7b9c8d4c8a699 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 3 Mar 2019 07:35:51 +0000 Subject: [PATCH 2117/2608] team: Free BPF filter when unregistering netdev [ Upstream commit 692c31bd4054212312396b1d303bffab2c5b93a7 ] When team is used in loadbalance mode a BPF filter can be used to provide a hash which will determine the Tx port. When the netdev is later unregistered the filter is not freed which results in memory leaks [1]. Fix by freeing the program and the corresponding filter when unregistering the netdev. [1] unreferenced object 0xffff8881dbc47cc8 (size 16): comm "teamd", pid 3068, jiffies 4294997779 (age 438.247s) hex dump (first 16 bytes): a3 00 6b 6b 6b 6b 6b 6b 88 a5 82 e1 81 88 ff ff ..kkkkkk........ backtrace: [<000000008a3b47e3>] team_nl_cmd_options_set+0x88f/0x11b0 [<00000000c4f4f27e>] genl_family_rcv_msg+0x78f/0x1080 [<00000000610ef838>] genl_rcv_msg+0xca/0x170 [<00000000a281df93>] netlink_rcv_skb+0x132/0x380 [<000000004d9448a2>] genl_rcv+0x29/0x40 [<000000000321b2f4>] netlink_unicast+0x4c0/0x690 [<000000008c25dffb>] netlink_sendmsg+0x929/0xe10 [<00000000068298c5>] sock_sendmsg+0xc8/0x110 [<0000000082a61ff0>] ___sys_sendmsg+0x77a/0x8f0 [<00000000663ae29d>] __sys_sendmsg+0xf7/0x250 [<0000000027c5f11a>] do_syscall_64+0x14d/0x610 [<000000006cfbc8d3>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<00000000e23197e2>] 0xffffffffffffffff unreferenced object 0xffff8881e182a588 (size 2048): comm "teamd", pid 3068, jiffies 4294997780 (age 438.247s) hex dump (first 32 bytes): 20 00 00 00 02 00 00 00 30 00 00 00 28 f0 ff ff .......0...(... 07 00 00 00 00 00 00 00 28 00 00 00 00 00 00 00 ........(....... backtrace: [<000000002daf01fb>] lb_bpf_func_set+0x45c/0x6d0 [<000000008a3b47e3>] team_nl_cmd_options_set+0x88f/0x11b0 [<00000000c4f4f27e>] genl_family_rcv_msg+0x78f/0x1080 [<00000000610ef838>] genl_rcv_msg+0xca/0x170 [<00000000a281df93>] netlink_rcv_skb+0x132/0x380 [<000000004d9448a2>] genl_rcv+0x29/0x40 [<000000000321b2f4>] netlink_unicast+0x4c0/0x690 [<000000008c25dffb>] netlink_sendmsg+0x929/0xe10 [<00000000068298c5>] sock_sendmsg+0xc8/0x110 [<0000000082a61ff0>] ___sys_sendmsg+0x77a/0x8f0 [<00000000663ae29d>] __sys_sendmsg+0xf7/0x250 [<0000000027c5f11a>] do_syscall_64+0x14d/0x610 [<000000006cfbc8d3>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<00000000e23197e2>] 0xffffffffffffffff Fixes: 01d7f30a9f96 ("team: add loadbalance mode") Signed-off-by: Ido Schimmel Reported-by: Amit Cohen Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team_mode_loadbalance.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index 1468ddf424cc..bc0890ee9700 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -319,6 +319,20 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx) return 0; } +static void lb_bpf_func_free(struct team *team) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + struct bpf_prog *fp; + + if (!lb_priv->ex->orig_fprog) + return; + + __fprog_destroy(lb_priv->ex->orig_fprog); + fp = rcu_dereference_protected(lb_priv->fp, + lockdep_is_held(&team->lock)); + bpf_prog_destroy(fp); +} + static int lb_tx_method_get(struct team *team, struct team_gsetter_ctx *ctx) { struct lb_priv *lb_priv = get_lb_priv(team); @@ -633,6 +647,7 @@ static void lb_exit(struct team *team) team_options_unregister(team, lb_options, ARRAY_SIZE(lb_options)); + lb_bpf_func_free(team); cancel_delayed_work_sync(&lb_priv->ex->stats.refresh_dw); free_percpu(lb_priv->pcpu_stats); kfree(lb_priv->ex); From 71d1e057fe117c23688a8b08bec410265954008a Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Mon, 4 Mar 2019 23:26:10 +0100 Subject: [PATCH 2118/2608] tipc: fix RDM/DGRAM connect() regression [ Upstream commit 0e63208915a8d7590d0a6218dadb2a6a00ac705a ] Fix regression bug introduced in commit 365ad353c256 ("tipc: reduce risk of user starvation during link congestion") Only signal -EDESTADDRREQ for RDM/DGRAM if we don't have a cached sockaddr. Fixes: 365ad353c256 ("tipc: reduce risk of user starvation during link congestion") Signed-off-by: Erik Hugne Signed-off-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e5f9f43ff15b..75681845679e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -943,7 +943,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(!dest)) { dest = &tsk->peer; - if (!syn || dest->family != AF_TIPC) + if (!syn && dest->family != AF_TIPC) return -EDESTADDRREQ; } From be11d272e843c47cc7b03f9bd94cf109589db343 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 27 Feb 2019 03:58:53 -0500 Subject: [PATCH 2119/2608] bnxt_en: Drop oversize TX packets to prevent errors. [ Upstream commit 2b3c6885386020b1b9d92d45e8349637e27d1f66 ] There have been reports of oversize UDP packets being sent to the driver to be transmitted, causing error conditions. The issue is likely caused by the dst of the SKB switching between 'lo' with 64K MTU and the hardware device with a smaller MTU. Patches are being proposed by Mahesh Bandewar to fix the issue. In the meantime, add a quick length check in the driver to prevent the error. The driver uses the TX packet size as index to look up an array to setup the TX BD. The array is large enough to support all MTU sizes supported by the driver. The oversize TX packet causes the driver to index beyond the array and put garbage values into the TX BD. Add a simple check to prevent this. Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index da6c73868fa0..15ad247955f7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -447,6 +447,12 @@ normal_tx: } length >>= 9; + if (unlikely(length >= ARRAY_SIZE(bnxt_lhint_arr))) { + dev_warn_ratelimited(&pdev->dev, "Dropped oversize %d bytes TX packet.\n", + skb->len); + i = 0; + goto tx_dma_error; + } flags |= bnxt_lhint_arr[length]; txbd->tx_bd_len_flags_type = cpu_to_le32(flags); From 390ca73e722d77ca776df52b351eb855f4ee5ee3 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 28 Feb 2019 14:56:04 +0100 Subject: [PATCH 2120/2608] geneve: correctly handle ipv6.disable module parameter [ Upstream commit cf1c9ccba7308e48a68fa77f476287d9d614e4c7 ] When IPv6 is compiled but disabled at runtime, geneve_sock_add returns -EAFNOSUPPORT. For metadata based tunnels, this causes failure of the whole operation of bringing up the tunnel. Ignore failure of IPv6 socket creation for metadata based tunnels caused by IPv6 not being available. This is the same fix as what commit d074bf960044 ("vxlan: correctly handle ipv6.disable module parameter") is doing for vxlan. Note there's also commit c0a47e44c098 ("geneve: should not call rt6_lookup() when ipv6 was disabled") which fixes a similar issue but for regular tunnels, while this patch is needed for metadata based tunnels. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/geneve.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index cb51448389a1..55c4b295ed0e 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -632,15 +632,20 @@ out: static int geneve_open(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); - bool ipv6 = !!(geneve->info.mode & IP_TUNNEL_INFO_IPV6); bool metadata = geneve->collect_md; + bool ipv4, ipv6; int ret = 0; + ipv6 = geneve->info.mode & IP_TUNNEL_INFO_IPV6 || metadata; + ipv4 = !ipv6 || metadata; #if IS_ENABLED(CONFIG_IPV6) - if (ipv6 || metadata) + if (ipv6) { ret = geneve_sock_add(geneve, true); + if (ret < 0 && ret != -EAFNOSUPPORT) + ipv4 = false; + } #endif - if (!ret && (!ipv6 || metadata)) + if (ipv4) ret = geneve_sock_add(geneve, false); if (ret < 0) geneve_sock_release(geneve); From c3a0550e0f1c822f929cb96bd559cb53d00692ce Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 22 Feb 2019 18:25:03 +0000 Subject: [PATCH 2121/2608] hv_netvsc: Fix IP header checksum for coalesced packets [ Upstream commit bf48648d650db1146b75b9bd358502431e86cf4f ] Incoming packets may have IP header checksum verified by the host. They may not have IP header checksum computed after coalescing. This patch re-compute the checksum when necessary, otherwise the packets may be dropped, because Linux network stack always checks it. Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc_drv.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 2d90cffae9ff..74b9e51b2b47 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -741,6 +741,14 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, schedule_delayed_work(&ndev_ctx->dwork, 0); } +static void netvsc_comp_ipcsum(struct sk_buff *skb) +{ + struct iphdr *iph = (struct iphdr *)skb->data; + + iph->check = 0; + iph->check = ip_fast_csum(iph, iph->ihl); +} + static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, struct napi_struct *napi, const struct ndis_tcp_ip_checksum_info *csum_info, @@ -764,9 +772,17 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, /* skb is already created with CHECKSUM_NONE */ skb_checksum_none_assert(skb); - /* - * In Linux, the IP checksum is always checked. - * Do L4 checksum offload if enabled and present. + /* Incoming packets may have IP header checksum verified by the host. + * They may not have IP header checksum computed after coalescing. + * We compute it here if the flags are set, because on Linux, the IP + * checksum is always checked. + */ + if (csum_info && csum_info->receive.ip_checksum_value_invalid && + csum_info->receive.ip_checksum_succeeded && + skb->protocol == htons(ETH_P_IP)) + netvsc_comp_ipcsum(skb); + + /* Do L4 checksum offload if enabled and present. */ if (csum_info && (net->features & NETIF_F_RXCSUM)) { if (csum_info->receive.tcp_checksum_succeeded || From 0e8ea299ded68ffe0d1bb974ca4a34a56b162a0b Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 28 Feb 2019 18:14:03 +0100 Subject: [PATCH 2122/2608] net: dsa: mv88e6xxx: Fix u64 statistics [ Upstream commit 6e46e2d821bb22b285ae8187959096b65d063b0d ] The switch maintains u64 counters for the number of octets sent and received. These are kept as two u32's which need to be combined. Fix the combing, which wrongly worked on u16's. Fixes: 80c4627b2719 ("dsa: mv88x6xxx: Refactor getting a single statistic") Reported-by: Chris Healy Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index a3543d637736..45ccd98f6265 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -639,7 +639,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip, default: return UINT64_MAX; } - value = (((u64)high) << 16) | low; + value = (((u64)high) << 32) | low; return value; } From fcfe700acdc1c72eab231300e82b962bac2b2b2c Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 25 Feb 2019 19:06:06 -0500 Subject: [PATCH 2123/2608] netlabel: fix out-of-bounds memory accesses [ Upstream commit 5578de4834fe0f2a34fedc7374be691443396d1f ] There are two array out-of-bounds memory accesses, one in cipso_v4_map_lvl_valid(), the other in netlbl_bitmap_walk(). Both errors are embarassingly simple, and the fixes are straightforward. As a FYI for anyone backporting this patch to kernels prior to v4.8, you'll want to apply the netlbl_bitmap_walk() patch to cipso_v4_bitmap_walk() as netlbl_bitmap_walk() doesn't exist before Linux v4.8. Reported-by: Jann Horn Fixes: 446fda4f2682 ("[NetLabel]: CIPSOv4 engine") Fixes: 3faa8f982f95 ("netlabel: Move bitmap manipulation functions to the NetLabel core.") Signed-off-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/cipso_ipv4.c | 3 ++- net/netlabel/netlabel_kapi.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 777fa3b7fb13..f4b83de2263e 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -667,7 +667,8 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) case CIPSO_V4_MAP_PASS: return 0; case CIPSO_V4_MAP_TRANS: - if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) + if ((level < doi_def->map.std->lvl.cipso_size) && + (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)) return 0; break; } diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index ea7c67050792..ee3e5b6471a6 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -903,7 +903,8 @@ int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len, (state == 0 && (byte & bitmask) == 0)) return bit_spot; - bit_spot++; + if (++bit_spot >= bitmap_len) + return -1; bitmask >>= 1; if (bitmask == 0) { byte = bitmap[++byte_offset]; From 4de7d30668cb8b06330992e1cd336f91700a2ce7 Mon Sep 17 00:00:00 2001 From: Sheng Lan Date: Thu, 28 Feb 2019 18:47:58 +0800 Subject: [PATCH 2124/2608] net: netem: fix skb length BUG_ON in __skb_to_sgvec [ Upstream commit 5845f706388a4cde0f6b80f9e5d33527e942b7d9 ] It can be reproduced by following steps: 1. virtio_net NIC is configured with gso/tso on 2. configure nginx as http server with an index file bigger than 1M bytes 3. use tc netem to produce duplicate packets and delay: tc qdisc add dev eth0 root netem delay 100ms 10ms 30% duplicate 90% 4. continually curl the nginx http server to get index file on client 5. BUG_ON is seen quickly [10258690.371129] kernel BUG at net/core/skbuff.c:4028! [10258690.371748] invalid opcode: 0000 [#1] SMP PTI [10258690.372094] CPU: 5 PID: 0 Comm: swapper/5 Tainted: G W 5.0.0-rc6 #2 [10258690.372094] RSP: 0018:ffffa05797b43da0 EFLAGS: 00010202 [10258690.372094] RBP: 00000000000005ea R08: 0000000000000000 R09: 00000000000005ea [10258690.372094] R10: ffffa0579334d800 R11: 00000000000002c0 R12: 0000000000000002 [10258690.372094] R13: 0000000000000000 R14: ffffa05793122900 R15: ffffa0578f7cb028 [10258690.372094] FS: 0000000000000000(0000) GS:ffffa05797b40000(0000) knlGS:0000000000000000 [10258690.372094] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [10258690.372094] CR2: 00007f1a6dc00868 CR3: 000000001000e000 CR4: 00000000000006e0 [10258690.372094] Call Trace: [10258690.372094] [10258690.372094] skb_to_sgvec+0x11/0x40 [10258690.372094] start_xmit+0x38c/0x520 [virtio_net] [10258690.372094] dev_hard_start_xmit+0x9b/0x200 [10258690.372094] sch_direct_xmit+0xff/0x260 [10258690.372094] __qdisc_run+0x15e/0x4e0 [10258690.372094] net_tx_action+0x137/0x210 [10258690.372094] __do_softirq+0xd6/0x2a9 [10258690.372094] irq_exit+0xde/0xf0 [10258690.372094] smp_apic_timer_interrupt+0x74/0x140 [10258690.372094] apic_timer_interrupt+0xf/0x20 [10258690.372094] In __skb_to_sgvec(), the skb->len is not equal to the sum of the skb's linear data size and nonlinear data size, thus BUG_ON triggered. Because the skb is cloned and a part of nonlinear data is split off. Duplicate packet is cloned in netem_enqueue() and may be delayed some time in qdisc. When qdisc len reached the limit and returns NET_XMIT_DROP, the skb will be retransmit later in write queue. the skb will be fragmented by tso_fragment(), the limit size that depends on cwnd and mss decrease, the skb's nonlinear data will be split off. The length of the skb cloned by netem will not be updated. When we use virtio_net NIC and invoke skb_to_sgvec(), the BUG_ON trigger. To fix it, netem returns NET_XMIT_SUCCESS to upper stack when it clones a duplicate packet. Fixes: 35d889d1 ("sch_netem: fix skb leak in netem_enqueue()") Signed-off-by: Sheng Lan Reported-by: Qin Ji Suggested-by: Eric Dumazet Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_netem.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 3f4f0b946798..3d5654333d49 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -435,6 +435,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, int nb = 0; int count = 1; int rc = NET_XMIT_SUCCESS; + int rc_drop = NET_XMIT_DROP; /* Do not fool qdisc_drop_all() */ skb->prev = NULL; @@ -474,6 +475,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->duplicate = 0; rootq->enqueue(skb2, rootq, to_free); q->duplicate = dupsave; + rc_drop = NET_XMIT_SUCCESS; } /* @@ -486,7 +488,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (skb_is_gso(skb)) { segs = netem_segment(skb, sch, to_free); if (!segs) - return NET_XMIT_DROP; + return rc_drop; } else { segs = skb; } @@ -509,8 +511,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 1<<(prandom_u32() % 8); } - if (unlikely(sch->q.qlen >= sch->limit)) - return qdisc_drop_all(skb, sch, to_free); + if (unlikely(sch->q.qlen >= sch->limit)) { + qdisc_drop_all(skb, sch, to_free); + return rc_drop; + } qdisc_qstats_backlog_inc(sch, skb); From b275f8e6a0096cac11cf697c4e3ad226070264f5 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 22 Feb 2019 15:37:58 +0800 Subject: [PATCH 2125/2608] net: nfc: Fix NULL dereference on nfc_llcp_build_tlv fails [ Upstream commit 58bdd544e2933a21a51eecf17c3f5f94038261b5 ] KASAN report this: BUG: KASAN: null-ptr-deref in nfc_llcp_build_gb+0x37f/0x540 [nfc] Read of size 3 at addr 0000000000000000 by task syz-executor.0/5401 CPU: 0 PID: 5401 Comm: syz-executor.0 Not tainted 5.0.0-rc7+ #45 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xfa/0x1ce lib/dump_stack.c:113 kasan_report+0x171/0x18d mm/kasan/report.c:321 memcpy+0x1f/0x50 mm/kasan/common.c:130 nfc_llcp_build_gb+0x37f/0x540 [nfc] nfc_llcp_register_device+0x6eb/0xb50 [nfc] nfc_register_device+0x50/0x1d0 [nfc] nfcsim_device_new+0x394/0x67d [nfcsim] ? 0xffffffffc1080000 nfcsim_init+0x6b/0x1000 [nfcsim] do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f9cb79dcc58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000280 RDI: 0000000000000003 RBP: 00007f9cb79dcc70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f9cb79dd6bc R13: 00000000004bcefb R14: 00000000006f7030 R15: 0000000000000004 nfc_llcp_build_tlv will return NULL on fails, caller should check it, otherwise will trigger a NULL dereference. Reported-by: Hulk Robot Fixes: eda21f16a5ed ("NFC: Set MIU and RW values from CONNECT and CC LLCP frames") Fixes: d646960f7986 ("NFC: Initial LLCP support") Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/nfc/llcp_commands.c | 20 ++++++++++++++++++++ net/nfc/llcp_core.c | 24 ++++++++++++++++++++---- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index 6a196e438b6c..d1fc019e932e 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -419,6 +419,10 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) sock->service_name, sock->service_name_len, &service_name_tlv_length); + if (!service_name_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += service_name_tlv_length; } @@ -429,9 +433,17 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, &miux_tlv_length); + if (!miux_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += miux_tlv_length; rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length); + if (!rw_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += rw_tlv_length; pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len); @@ -484,9 +496,17 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, &miux_tlv_length); + if (!miux_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += miux_tlv_length; rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length); + if (!rw_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += rw_tlv_length; skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size); diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 02eef5cf3cce..7e619ff8a653 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -532,10 +532,10 @@ static u8 nfc_llcp_reserve_sdp_ssap(struct nfc_llcp_local *local) static int nfc_llcp_build_gb(struct nfc_llcp_local *local) { - u8 *gb_cur, *version_tlv, version, version_length; - u8 *lto_tlv, lto_length; - u8 *wks_tlv, wks_length; - u8 *miux_tlv, miux_length; + u8 *gb_cur, version, version_length; + u8 lto_length, wks_length, miux_length; + u8 *version_tlv = NULL, *lto_tlv = NULL, + *wks_tlv = NULL, *miux_tlv = NULL; __be16 wks = cpu_to_be16(local->local_wks); u8 gb_len = 0; int ret = 0; @@ -543,17 +543,33 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) version = LLCP_VERSION_11; version_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &version, 1, &version_length); + if (!version_tlv) { + ret = -ENOMEM; + goto out; + } gb_len += version_length; lto_tlv = nfc_llcp_build_tlv(LLCP_TLV_LTO, &local->lto, 1, <o_length); + if (!lto_tlv) { + ret = -ENOMEM; + goto out; + } gb_len += lto_length; pr_debug("Local wks 0x%lx\n", local->local_wks); wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&wks, 2, &wks_length); + if (!wks_tlv) { + ret = -ENOMEM; + goto out; + } gb_len += wks_length; miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, &miux_length); + if (!miux_tlv) { + ret = -ENOMEM; + goto out; + } gb_len += miux_length; gb_len += ARRAY_SIZE(llcp_magic); From bf92de28777726d4e95864e5dae7656ab5a4f769 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 26 Feb 2019 19:29:22 +0100 Subject: [PATCH 2126/2608] net: phy: phylink: fix uninitialized variable in phylink_get_mac_state [ Upstream commit d25ed413d5e51644e18f66e34eec049f17a7abcb ] When debugging an issue I found implausible values in state->pause. Reason in that state->pause isn't initialized and later only single bits are changed. Also the struct itself isn't initialized in phylink_resolve(). So better initialize state->pause and other not yet initialized fields. v2: - use right function name in subject v3: - initialize additional fields Fixes: 9525ae83959b ("phylink: add phylink infrastructure") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phylink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index afe335583832..5bfc961e53c9 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -333,6 +333,10 @@ static int phylink_get_mac_state(struct phylink *pl, struct phylink_link_state * linkmode_zero(state->lp_advertising); state->interface = pl->link_config.interface; state->an_enabled = pl->link_config.an_enabled; + state->speed = SPEED_UNKNOWN; + state->duplex = DUPLEX_UNKNOWN; + state->pause = MLO_PAUSE_NONE; + state->an_complete = 0; state->link = 1; return pl->ops->mac_link_state(ndev, state); From 46a5caa79ff73c9403ab37475ef47ed4c027e712 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Fri, 1 Mar 2019 23:06:40 +0800 Subject: [PATCH 2127/2608] net: sit: fix memory leak in sit_init_net() [ Upstream commit 07f12b26e21ab359261bf75cfcb424fdc7daeb6d ] If register_netdev() is failed to register sitn->fb_tunnel_dev, it will go to err_reg_dev and forget to free netdev(sitn->fb_tunnel_dev). BUG: memory leak unreferenced object 0xffff888378daad00 (size 512): comm "syz-executor.1", pid 4006, jiffies 4295121142 (age 16.115s) hex dump (first 32 bytes): 00 e6 ed c0 83 88 ff ff 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000d6dcb63e>] kvmalloc include/linux/mm.h:577 [inline] [<00000000d6dcb63e>] kvzalloc include/linux/mm.h:585 [inline] [<00000000d6dcb63e>] netif_alloc_netdev_queues net/core/dev.c:8380 [inline] [<00000000d6dcb63e>] alloc_netdev_mqs+0x600/0xcc0 net/core/dev.c:8970 [<00000000867e172f>] sit_init_net+0x295/0xa40 net/ipv6/sit.c:1848 [<00000000871019fa>] ops_init+0xad/0x3e0 net/core/net_namespace.c:129 [<00000000319507f6>] setup_net+0x2ba/0x690 net/core/net_namespace.c:314 [<0000000087db4f96>] copy_net_ns+0x1dc/0x330 net/core/net_namespace.c:437 [<0000000057efc651>] create_new_namespaces+0x382/0x730 kernel/nsproxy.c:107 [<00000000676f83de>] copy_namespaces+0x2ed/0x3d0 kernel/nsproxy.c:165 [<0000000030b74bac>] copy_process.part.27+0x231e/0x6db0 kernel/fork.c:1919 [<00000000fff78746>] copy_process kernel/fork.c:1713 [inline] [<00000000fff78746>] _do_fork+0x1bc/0xe90 kernel/fork.c:2224 [<000000001c2e0d1c>] do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:290 [<00000000ec48bd44>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<0000000039acff8a>] 0xffffffffffffffff Signed-off-by: Mao Wenan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/sit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 2e55f9894548..c5b60190b1db 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1856,6 +1856,7 @@ static int __net_init sit_init_net(struct net *net) err_reg_dev: ipip6_dev_free(sitn->fb_tunnel_dev); + free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: return err; } From e5e8350da5b17626d6f73b97157d24c66611d0de Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 21 Feb 2019 14:13:56 -0800 Subject: [PATCH 2128/2608] net: socket: set sock->sk to NULL after calling proto_ops::release() [ Upstream commit ff7b11aa481f682e0e9711abfeb7d03f5cd612bf ] Commit 9060cb719e61 ("net: crypto set sk to NULL when af_alg_release.") fixed a use-after-free in sockfs_setattr() when an AF_ALG socket is closed concurrently with fchownat(). However, it ignored that many other proto_ops::release() methods don't set sock->sk to NULL and therefore allow the same use-after-free: - base_sock_release - bnep_sock_release - cmtp_sock_release - data_sock_release - dn_release - hci_sock_release - hidp_sock_release - iucv_sock_release - l2cap_sock_release - llcp_sock_release - llc_ui_release - rawsock_release - rfcomm_sock_release - sco_sock_release - svc_release - vcc_release - x25_release Rather than fixing all these and relying on every socket type to get this right forever, just make __sock_release() set sock->sk to NULL itself after calling proto_ops::release(). Reproducer that produces the KASAN splat when any of these socket types are configured into the kernel: #include #include #include #include pthread_t t; volatile int fd; void *close_thread(void *arg) { for (;;) { usleep(rand() % 100); close(fd); } } int main() { pthread_create(&t, NULL, close_thread, NULL); for (;;) { fd = socket(rand() % 50, rand() % 11, 0); fchownat(fd, "", 1000, 1000, 0x1000); close(fd); } } Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.") Signed-off-by: Eric Biggers Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/socket.c b/net/socket.c index a401578f3f28..6d8f0c248c7e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -600,6 +600,7 @@ static void __sock_release(struct socket *sock, struct inode *inode) if (inode) inode_lock(inode); sock->ops->release(sock); + sock->sk = NULL; if (inode) inode_unlock(inode); sock->ops = NULL; From 03756e820a0213f0d4db77f436878229fce51b2a Mon Sep 17 00:00:00 2001 From: Igor Druzhinin Date: Thu, 28 Feb 2019 14:11:26 +0000 Subject: [PATCH 2129/2608] xen-netback: don't populate the hash cache on XenBus disconnect [ Upstream commit a2288d4e355992d369c50c45d017a85f6061ff71 ] Occasionally, during the disconnection procedure on XenBus which includes hash cache deinitialization there might be some packets still in-flight on other processors. Handling of these packets includes hashing and hash cache population that finally results in hash cache data structure corruption. In order to avoid this we prevent hashing of those packets if there are no queues initialized. In that case RCU protection of queues guards the hash cache as well. Signed-off-by: Igor Druzhinin Reviewed-by: Paul Durrant Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/hash.c | 2 ++ drivers/net/xen-netback/interface.c | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c index 3b6fb5b3bdb2..6414cc6b9032 100644 --- a/drivers/net/xen-netback/hash.c +++ b/drivers/net/xen-netback/hash.c @@ -435,6 +435,8 @@ void xenvif_init_hash(struct xenvif *vif) if (xenvif_hash_cache_size == 0) return; + BUG_ON(vif->hash.cache.count); + spin_lock_init(&vif->hash.cache.lock); INIT_LIST_HEAD(&vif->hash.cache.list); } diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 4491ca5aee90..d465071656b5 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -153,6 +153,13 @@ static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, { struct xenvif *vif = netdev_priv(dev); unsigned int size = vif->hash.size; + unsigned int num_queues; + + /* If queues are not set up internally - always return 0 + * as the packet going to be dropped anyway */ + num_queues = READ_ONCE(vif->num_queues); + if (num_queues < 1) + return 0; if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) return fallback(dev, skb) % dev->real_num_tx_queues; From 1902c0cb701cbae2bdc3352ea01bdcf98ada7818 Mon Sep 17 00:00:00 2001 From: Igor Druzhinin Date: Thu, 28 Feb 2019 12:48:03 +0000 Subject: [PATCH 2130/2608] xen-netback: fix occasional leak of grant ref mappings under memory pressure [ Upstream commit 99e87f56b48f490fb16b6e0f74691c1e664dea95 ] Zero-copy callback flag is not yet set on frag list skb at the moment xenvif_handle_frag_list() returns -ENOMEM. This eventually results in leaking grant ref mappings since xenvif_zerocopy_callback() is never called for these fragments. Those eventually build up and cause Xen to kill Dom0 as the slots get reused for new mappings: "d0v0 Attempt to implicitly unmap a granted PTE c010000329fce005" That behavior is observed under certain workloads where sudden spikes of page cache writes coexist with active atomic skb allocations from network traffic. Additionally, rework the logic to deal with frag_list deallocation in a single place. Signed-off-by: Paul Durrant Signed-off-by: Igor Druzhinin Acked-by: Wei Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/netback.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 5042ff8d449a..d09dea77c287 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1074,11 +1074,6 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s skb_frag_size_set(&frags[i], len); } - /* Copied all the bits from the frag list -- free it. */ - skb_frag_list_init(skb); - xenvif_skb_zerocopy_prepare(queue, nskb); - kfree_skb(nskb); - /* Release all the original (foreign) frags. */ for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) skb_frag_unref(skb, f); @@ -1147,6 +1142,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) xenvif_fill_frags(queue, skb); if (unlikely(skb_has_frag_list(skb))) { + struct sk_buff *nskb = skb_shinfo(skb)->frag_list; + xenvif_skb_zerocopy_prepare(queue, nskb); if (xenvif_handle_frag_list(queue, skb)) { if (net_ratelimit()) netdev_err(queue->vif->dev, @@ -1155,6 +1152,9 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) kfree_skb(skb); continue; } + /* Copied all the bits from the frag list -- free it. */ + skb_frag_list_init(skb); + kfree_skb(nskb); } skb->dev = queue->vif->dev; From 153100388f39d6dd9f08dc3ba9f00cae00d575e7 Mon Sep 17 00:00:00 2001 From: Nazarov Sergey Date: Mon, 25 Feb 2019 19:24:15 +0300 Subject: [PATCH 2131/2608] net: Add __icmp_send helper. [ Upstream commit 9ef6b42ad6fd7929dd1b6092cb02014e382c6a91 ] Add __icmp_send function having ip_options struct parameter Signed-off-by: Sergey Nazarov Reviewed-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/icmp.h | 9 ++++++++- net/ipv4/icmp.c | 7 ++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/include/net/icmp.h b/include/net/icmp.h index 3ef2743a8eec..8665bf24e3b7 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -22,6 +22,7 @@ #include #include +#include struct icmp_err { int errno; @@ -39,7 +40,13 @@ struct net_proto_family; struct sk_buff; struct net; -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info); +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, + const struct ip_options *opt); +static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) +{ + __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt); +} + int icmp_rcv(struct sk_buff *skb); void icmp_err(struct sk_buff *skb, u32 info); int icmp_init(void); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 3c1570d3e22f..f9d790b058d2 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -573,7 +573,8 @@ relookup_failed: * MUST reply to only the first fragment. */ -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, + const struct ip_options *opt) { struct iphdr *iph; int room; @@ -694,7 +695,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) iph->tos; mark = IP4_REPLY_MARK(net, skb_in->mark); - if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in)) + if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt)) goto out_unlock; @@ -747,7 +748,7 @@ out_bh_enable: local_bh_enable(); out:; } -EXPORT_SYMBOL(icmp_send); +EXPORT_SYMBOL(__icmp_send); static void icmp_socket_deliver(struct sk_buff *skb, u32 info) From 980a71678eef1f035fa04ba211c98110515bc37a Mon Sep 17 00:00:00 2001 From: Nazarov Sergey Date: Mon, 25 Feb 2019 19:27:15 +0300 Subject: [PATCH 2132/2608] net: avoid use IPCB in cipso_v4_error [ Upstream commit 3da1ed7ac398f34fff1694017a07054d69c5f5c5 ] Extract IP options in cipso_v4_error and use __icmp_send. Signed-off-by: Sergey Nazarov Acked-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip.h | 2 ++ net/ipv4/cipso_ipv4.c | 17 +++++++++++++++-- net/ipv4/ip_options.c | 22 +++++++++++++++++----- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 7c430343176a..80575db4e304 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -593,6 +593,8 @@ static inline int ip_options_echo(struct net *net, struct ip_options *dopt, } void ip_options_fragment(struct sk_buff *skb); +int __ip_options_compile(struct net *net, struct ip_options *opt, + struct sk_buff *skb, __be32 *info); int ip_options_compile(struct net *net, struct ip_options *opt, struct sk_buff *skb); int ip_options_get(struct net *net, struct ip_options_rcu **optp, diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index f4b83de2263e..f0165c5f376b 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1736,13 +1736,26 @@ validate_return: */ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) { + unsigned char optbuf[sizeof(struct ip_options) + 40]; + struct ip_options *opt = (struct ip_options *)optbuf; + if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) return; + /* + * We might be called above the IP layer, + * so we can not use icmp_send and IPCB here. + */ + + memset(opt, 0, sizeof(struct ip_options)); + opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); + if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL)) + return; + if (gateway) - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0); + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt); else - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0); + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt); } /** diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index ed194d46c00e..32a35043c9f5 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -251,8 +251,9 @@ static void spec_dst_fill(__be32 *spec_dst, struct sk_buff *skb) * If opt == NULL, then skb->data should point to IP header. */ -int ip_options_compile(struct net *net, - struct ip_options *opt, struct sk_buff *skb) +int __ip_options_compile(struct net *net, + struct ip_options *opt, struct sk_buff *skb, + __be32 *info) { __be32 spec_dst = htonl(INADDR_ANY); unsigned char *pp_ptr = NULL; @@ -468,11 +469,22 @@ eol: return 0; error: - if (skb) { - icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24)); - } + if (info) + *info = htonl((pp_ptr-iph)<<24); return -EINVAL; } + +int ip_options_compile(struct net *net, + struct ip_options *opt, struct sk_buff *skb) +{ + int ret; + __be32 info; + + ret = __ip_options_compile(net, opt, skb, &info); + if (ret != 0 && skb) + icmp_send(skb, ICMP_PARAMETERPROB, 0, info); + return ret; +} EXPORT_SYMBOL(ip_options_compile); /* From 987e2bfbf8171048db1b133b4294c5ce6deebdfe Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 26 Feb 2019 09:00:02 -0800 Subject: [PATCH 2133/2608] ipv4: Return error for RTA_VIA attribute [ Upstream commit b6e9e5df4ecf100f6a10ab2ade8e46d47a4b9779 ] IPv4 currently does not support nexthops outside of the AF_INET family. Specifically, it does not handle RTA_VIA attribute. If it is passed in a route add request, the actual route added only uses the device which is clearly not what the user intended: $ ip ro add 172.16.1.0/24 via inet6 2001:db8:1::1 dev eth0 $ ip ro ls ... 172.16.1.0/24 dev eth0 Catch this and fail the route add: $ ip ro add 172.16.1.0/24 via inet6 2001:db8:1::1 dev eth0 Error: IPv4 does not support RTA_VIA attribute. Fixes: 03c0566542f4c ("mpls: Netlink commands to add, remove, and dump routes") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_frontend.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b5317b2b191d..ff499000f6cd 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -675,6 +675,10 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, case RTA_GATEWAY: cfg->fc_gw = nla_get_be32(attr); break; + case RTA_VIA: + NL_SET_ERR_MSG(extack, "IPv4 does not support RTA_VIA attribute"); + err = -EINVAL; + goto errout; case RTA_PRIORITY: cfg->fc_priority = nla_get_u32(attr); break; From 87964ee15964dd576c85abce4ec69530cc25acb3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 26 Feb 2019 09:00:03 -0800 Subject: [PATCH 2134/2608] ipv6: Return error for RTA_VIA attribute [ Upstream commit e3818541b49fb88650ba339d33cc53e4095da5b3 ] IPv6 currently does not support nexthops outside of the AF_INET6 family. Specifically, it does not handle RTA_VIA attribute. If it is passed in a route add request, the actual route added only uses the device which is clearly not what the user intended: $ ip -6 ro add 2001:db8:2::/64 via inet 172.16.1.1 dev eth0 $ ip ro ls ... 2001:db8:2::/64 dev eth0 metric 1024 pref medium Catch this and fail the route add: $ ip -6 ro add 2001:db8:2::/64 via inet 172.16.1.1 dev eth0 Error: IPv6 does not support RTA_VIA attribute. Fixes: 03c0566542f4c ("mpls: Netlink commands to add, remove, and dump routes") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 74dd35d6567c..fafecdc06900 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3024,6 +3024,10 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); cfg->fc_flags |= RTF_GATEWAY; } + if (tb[RTA_VIA]) { + NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute"); + goto errout; + } if (tb[RTA_DST]) { int plen = (rtm->rtm_dst_len + 7) >> 3; From a9494400f8c1ea8d2e760cec1be44dfe225a607b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 26 Feb 2019 09:00:04 -0800 Subject: [PATCH 2135/2608] mpls: Return error for RTA_GATEWAY attribute [ Upstream commit be48220edd48ca0d569782992840488a52373a24 ] MPLS does not support nexthops with an MPLS address family. Specifically, it does not handle RTA_GATEWAY attribute. Make it clear by returning an error. Fixes: 03c0566542f4c ("mpls: Netlink commands to add, remove, and dump routes") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mpls/af_mpls.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index aee385eb72e7..9a153f64b8d7 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1787,6 +1787,9 @@ static int rtm_to_route_config(struct sk_buff *skb, goto errout; break; } + case RTA_GATEWAY: + NL_SET_ERR_MSG(extack, "MPLS does not support RTA_GATEWAY attribute"); + goto errout; case RTA_VIA: { if (nla_get_via(nla, &cfg->rc_via_alen, From 32869c80859a6fd11b7751e2dc61a04f6feaee9b Mon Sep 17 00:00:00 2001 From: Timur Celik Date: Sat, 23 Feb 2019 12:53:13 +0100 Subject: [PATCH 2136/2608] tun: fix blocking read [ Upstream commit 71828b2240692cec0e68b8d867bc00e1745e7fae ] This patch moves setting of the current state into the loop. Otherwise the task may end up in a busy wait loop if none of the break conditions are met. Signed-off-by: Timur Celik Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 2956bb6cda72..a87450df3230 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1714,9 +1714,9 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock, } add_wait_queue(&tfile->wq.wait, &wait); - current->state = TASK_INTERRUPTIBLE; while (1) { + set_current_state(TASK_INTERRUPTIBLE); skb = skb_array_consume(&tfile->tx_array); if (skb) break; @@ -1732,7 +1732,7 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock, schedule(); } - current->state = TASK_RUNNING; + set_current_state(TASK_RUNNING); remove_wait_queue(&tfile->wq.wait, &wait); out: From a7e2560faea59f89d70123bba4c82cdd162f7c94 Mon Sep 17 00:00:00 2001 From: Timur Celik Date: Mon, 25 Feb 2019 21:13:13 +0100 Subject: [PATCH 2137/2608] tun: remove unnecessary memory barrier [ Upstream commit ecef67cb10db7b83b3b71c61dbb29aa070ab0112 ] Replace set_current_state with __set_current_state since no memory barrier is needed at this point. Signed-off-by: Timur Celik Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index a87450df3230..4227ee33ef19 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1732,7 +1732,7 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock, schedule(); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(&tfile->wq.wait, &wait); out: From b9e298f864a3f066914543213c9c5cff3fd69aa2 Mon Sep 17 00:00:00 2001 From: Rajasingh Thavamani Date: Wed, 27 Feb 2019 17:43:19 +0530 Subject: [PATCH 2138/2608] net: phy: Micrel KSZ8061: link failure after cable connect [ Upstream commit 232ba3a51cc224b339c7114888ed7f0d4d95695e ] With Micrel KSZ8061 PHY, the link may occasionally not come up after Ethernet cable connect. The vendor's (Microchip, former Micrel) errata sheet 80000688A.pdf descripes the problem and possible workarounds in detail, see below. The batch implements workaround 1, which permanently fixes the issue. DESCRIPTION Link-up may not occur properly when the Ethernet cable is initially connected. This issue occurs more commonly when the cable is connected slowly, but it may occur any time a cable is connected. This issue occurs in the auto-negotiation circuit, and will not occur if auto-negotiation is disabled (which requires that the two link partners be set to the same speed and duplex). END USER IMPLICATIONS When this issue occurs, link is not established. Subsequent cable plug/unplaug cycle will not correct the issue. WORk AROUND There are four approaches to work around this issue: 1. This issue can be prevented by setting bit 15 in MMD device address 1, register 2, prior to connecting the cable or prior to setting the Restart Auto-negotiation bit in register 0h. The MMD registers are accessed via the indirect access registers Dh and Eh, or via the Micrel EthUtil utility as shown here: . if using the EthUtil utility (usually with a Micrel KSZ8061 Evaluation Board), type the following commands: > address 1 > mmd 1 > iw 2 b61a . Alternatively, write the following registers to write to the indirect MMD register: Write register Dh, data 0001h Write register Eh, data 0002h Write register Dh, data 4001h Write register Eh, data B61Ah 2. The issue can be avoided by disabling auto-negotiation in the KSZ8061, either by the strapping option, or by clearing bit 12 in register 0h. Care must be taken to ensure that the KSZ8061 and the link partner will link with the same speed and duplex. Note that the KSZ8061 defaults to full-duplex when auto-negotiation is off, but other devices may default to half-duplex in the event of failed auto-negotiation. 3. The issue can be avoided by connecting the cable prior to powering-up or resetting the KSZ8061, and leaving it plugged in thereafter. 4. If the above measures are not taken and the problem occurs, link can be recovered by setting the Restart Auto-Negotiation bit in register 0h, or by resetting or power cycling the device. Reset may be either hardware reset or software reset (register 0h, bit 15). PLAN This errata will not be corrected in the future revision. Fixes: 7ab59dc15e2f ("drivers/net/phy/micrel_phy: Add support for new PHYs") Signed-off-by: Alexander Onnasch Signed-off-by: Rajasingh Thavamani Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/micrel.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 6c45ff650ec7..eb85cf4a381a 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -339,6 +339,17 @@ static int ksz8041_config_aneg(struct phy_device *phydev) return genphy_config_aneg(phydev); } +static int ksz8061_config_init(struct phy_device *phydev) +{ + int ret; + + ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_DEVID1, 0xB61A); + if (ret) + return ret; + + return kszphy_config_init(phydev); +} + static int ksz9021_load_values_from_of(struct phy_device *phydev, const struct device_node *of_node, u16 reg, @@ -938,7 +949,7 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = MICREL_PHY_ID_MASK, .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_init = kszphy_config_init, + .config_init = ksz8061_config_init, .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, From e7c4193e2a1d59afb622a83f605e7c4b58d0fa33 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 1 Mar 2019 23:43:39 +0100 Subject: [PATCH 2139/2608] net: dsa: mv88e6xxx: Fix statistics on mv88e6161 [ Upstream commit a6da21bb0eae459a375d5bd48baed821d14301d0 ] Despite what the datesheet says, the silicon implements the older way of snapshoting the statistics. Change the op. Reported-by: Chris.Healy@zii.aero Tested-by: Chris.Healy@zii.aero Fixes: 0ac64c394900 ("net: dsa: mv88e6xxx: mv88e6161 uses mv88e6320 stats snapshot") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 45ccd98f6265..4fbc75b73433 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2569,7 +2569,7 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .stats_snapshot = mv88e6320_g1_stats_snapshot, + .stats_snapshot = mv88e6xxx_g1_stats_snapshot, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, From 9e1176f21da05751722d8ee0345697251a739320 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 20 Nov 2018 11:00:18 +0800 Subject: [PATCH 2140/2608] x86/CPU/AMD: Set the CPB bit unconditionally on F17h commit 0237199186e7a4aa5310741f0a6498a20c820fd7 upstream. Some F17h models do not have CPB set in CPUID even though the CPU supports it. Set the feature bit unconditionally on all F17h. [ bp: Rewrite commit message and patch. ] Signed-off-by: Jiaxun Yang Signed-off-by: Borislav Petkov Acked-by: Tom Lendacky Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Sherry Hurwitz Cc: Suravee Suthikulpanit Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20181120030018.5185-1-jiaxun.yang@flygoat.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7e03515662c0..ecf82859f1c0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -791,11 +791,9 @@ static void init_amd_bd(struct cpuinfo_x86 *c) static void init_amd_zn(struct cpuinfo_x86 *c) { set_cpu_cap(c, X86_FEATURE_ZEN); - /* - * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects - * all up to and including B1. - */ - if (c->x86_model <= 1 && c->x86_stepping <= 1) + + /* Fix erratum 1076: CPB feature bit not being set in CPUID. */ + if (!cpu_has(c, X86_FEATURE_CPB)) set_cpu_cap(c, X86_FEATURE_CPB); } From 5838a070d1177dc02d7acb553bc02e6f018e024a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 9 Jan 2019 16:05:10 -0600 Subject: [PATCH 2141/2608] applicom: Fix potential Spectre v1 vulnerabilities commit d7ac3c6ef5d8ce14b6381d52eb7adafdd6c8bb3c upstream. IndexCard is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/char/applicom.c:418 ac_write() warn: potential spectre issue 'apbs' [r] drivers/char/applicom.c:728 ac_ioctl() warn: potential spectre issue 'apbs' [r] (local cap) Fix this by sanitizing IndexCard before using it to index apbs. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://lore.kernel.org/lkml/20180423164740.GY17484@dhcp22.suse.cz/ Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman --- drivers/char/applicom.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c index c0a5b1f3a986..4ccc39e00ced 100644 --- a/drivers/char/applicom.c +++ b/drivers/char/applicom.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -386,7 +387,11 @@ static ssize_t ac_write(struct file *file, const char __user *buf, size_t count, TicCard = st_loc.tic_des_from_pc; /* tic number to send */ IndexCard = NumCard - 1; - if((NumCard < 1) || (NumCard > MAX_BOARD) || !apbs[IndexCard].RamIO) + if (IndexCard >= MAX_BOARD) + return -EINVAL; + IndexCard = array_index_nospec(IndexCard, MAX_BOARD); + + if (!apbs[IndexCard].RamIO) return -EINVAL; #ifdef DEBUG @@ -697,6 +702,7 @@ static long ac_ioctl(struct file *file, unsigned int cmd, unsigned long arg) unsigned char IndexCard; void __iomem *pmem; int ret = 0; + static int warncount = 10; volatile unsigned char byte_reset_it; struct st_ram_io *adgl; void __user *argp = (void __user *)arg; @@ -711,16 +717,12 @@ static long ac_ioctl(struct file *file, unsigned int cmd, unsigned long arg) mutex_lock(&ac_mutex); IndexCard = adgl->num_card-1; - if(cmd != 6 && ((IndexCard >= MAX_BOARD) || !apbs[IndexCard].RamIO)) { - static int warncount = 10; - if (warncount) { - printk( KERN_WARNING "APPLICOM driver IOCTL, bad board number %d\n",(int)IndexCard+1); - warncount--; - } - kfree(adgl); - mutex_unlock(&ac_mutex); - return -EINVAL; - } + if (cmd != 6 && IndexCard >= MAX_BOARD) + goto err; + IndexCard = array_index_nospec(IndexCard, MAX_BOARD); + + if (cmd != 6 && !apbs[IndexCard].RamIO) + goto err; switch (cmd) { @@ -838,5 +840,16 @@ static long ac_ioctl(struct file *file, unsigned int cmd, unsigned long arg) kfree(adgl); mutex_unlock(&ac_mutex); return 0; + +err: + if (warncount) { + pr_warn("APPLICOM driver IOCTL, bad board number %d\n", + (int)IndexCard + 1); + warncount--; + } + kfree(adgl); + mutex_unlock(&ac_mutex); + return -EINVAL; + } From 442d1144f91508471a82e77c01fca193d1e5bc0c Mon Sep 17 00:00:00 2001 From: Liu Xiang Date: Sat, 16 Feb 2019 17:12:24 +0800 Subject: [PATCH 2142/2608] MIPS: irq: Allocate accurate order pages for irq stack commit 72faa7a773ca59336f3c889e878de81445c5a85c upstream. The irq_pages is the number of pages for irq stack, but not the order which is needed by __get_free_pages(). We can use get_order() to calculate the accurate order. Signed-off-by: Liu Xiang Signed-off-by: Paul Burton Fixes: fe8bd18ffea5 ("MIPS: Introduce irq_stack") Cc: linux-mips@vger.kernel.org Cc: stable@vger.kernel.org # v4.11+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index ba150c755fcc..85b6c60f285d 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -52,6 +52,7 @@ asmlinkage void spurious_interrupt(void) void __init init_IRQ(void) { int i; + unsigned int order = get_order(IRQ_STACK_SIZE); for (i = 0; i < NR_IRQS; i++) irq_set_noprobe(i); @@ -62,8 +63,7 @@ void __init init_IRQ(void) arch_init_irq(); for_each_possible_cpu(i) { - int irq_pages = IRQ_STACK_SIZE / PAGE_SIZE; - void *s = (void *)__get_free_pages(GFP_KERNEL, irq_pages); + void *s = (void *)__get_free_pages(GFP_KERNEL, order); irq_stack[i] = s; pr_debug("CPU%d IRQ stack at 0x%p - 0x%p\n", i, From af46f4c0372aea3813afa6f2d9debb39346655c3 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 28 Feb 2019 16:22:02 -0800 Subject: [PATCH 2143/2608] hugetlbfs: fix races and page leaks during migration commit cb6acd01e2e43fd8bad11155752b7699c3d0fb76 upstream. hugetlb pages should only be migrated if they are 'active'. The routines set/clear_page_huge_active() modify the active state of hugetlb pages. When a new hugetlb page is allocated at fault time, set_page_huge_active is called before the page is locked. Therefore, another thread could race and migrate the page while it is being added to page table by the fault code. This race is somewhat hard to trigger, but can be seen by strategically adding udelay to simulate worst case scheduling behavior. Depending on 'how' the code races, various BUG()s could be triggered. To address this issue, simply delay the set_page_huge_active call until after the page is successfully added to the page table. Hugetlb pages can also be leaked at migration time if the pages are associated with a file in an explicitly mounted hugetlbfs filesystem. For example, consider a two node system with 4GB worth of huge pages available. A program mmaps a 2G file in a hugetlbfs filesystem. It then migrates the pages associated with the file from one node to another. When the program exits, huge page counts are as follows: node0 1024 free_hugepages 1024 nr_hugepages node1 0 free_hugepages 1024 nr_hugepages Filesystem Size Used Avail Use% Mounted on nodev 4.0G 2.0G 2.0G 50% /var/opt/hugepool That is as expected. 2G of huge pages are taken from the free_hugepages counts, and 2G is the size of the file in the explicitly mounted filesystem. If the file is then removed, the counts become: node0 1024 free_hugepages 1024 nr_hugepages node1 1024 free_hugepages 1024 nr_hugepages Filesystem Size Used Avail Use% Mounted on nodev 4.0G 2.0G 2.0G 50% /var/opt/hugepool Note that the filesystem still shows 2G of pages used, while there actually are no huge pages in use. The only way to 'fix' the filesystem accounting is to unmount the filesystem If a hugetlb page is associated with an explicitly mounted filesystem, this information in contained in the page_private field. At migration time, this information is not preserved. To fix, simply transfer page_private from old to new page at migration time if necessary. There is a related race with removing a huge page from a file and migration. When a huge page is removed from the pagecache, the page_mapping() field is cleared, yet page_private remains set until the page is actually freed by free_huge_page(). A page could be migrated while in this state. However, since page_mapping() is not set the hugetlbfs specific routine to transfer page_private is not called and we leak the page count in the filesystem. To fix that, check for this condition before migrating a huge page. If the condition is detected, return EBUSY for the page. Link: http://lkml.kernel.org/r/74510272-7319-7372-9ea6-ec914734c179@oracle.com Link: http://lkml.kernel.org/r/20190212221400.3512-1-mike.kravetz@oracle.com Fixes: bcc54222309c ("mm: hugetlb: introduce page_huge_active") Signed-off-by: Mike Kravetz Reviewed-by: Naoya Horiguchi Cc: Michal Hocko Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Cc: Mel Gorman Cc: Davidlohr Bueso Cc: [mike.kravetz@oracle.com: v2] Link: http://lkml.kernel.org/r/7534d322-d782-8ac6-1c8d-a8dc380eb3ab@oracle.com [mike.kravetz@oracle.com: update comment and changelog] Link: http://lkml.kernel.org/r/420bcfd6-158b-38e4-98da-26d0cd85bd01@oracle.com Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/hugetlbfs/inode.c | 12 ++++++++++++ mm/hugetlb.c | 16 +++++++++++++--- mm/migrate.c | 11 +++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 2a6ed036d207..eb6f3de29f69 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -845,6 +845,18 @@ static int hugetlbfs_migrate_page(struct address_space *mapping, rc = migrate_huge_page_move_mapping(mapping, newpage, page); if (rc != MIGRATEPAGE_SUCCESS) return rc; + + /* + * page_private is subpool pointer in hugetlb pages. Transfer to + * new page. PagePrivate is not associated with page_private for + * hugetlb pages and can not be set here as only page_huge_active + * pages can be migrated. + */ + if (page_private(page)) { + set_page_private(newpage, page_private(page)); + set_page_private(page, 0); + } + if (mode != MIGRATE_SYNC_NO_COPY) migrate_page_copy(newpage, page); else diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 224cdd953a79..7f75bd2fb8a7 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3577,7 +3577,6 @@ retry_avoidcopy: copy_user_huge_page(new_page, old_page, address, vma, pages_per_huge_page(h)); __SetPageUptodate(new_page); - set_page_huge_active(new_page); mmun_start = address & huge_page_mask(h); mmun_end = mmun_start + huge_page_size(h); @@ -3600,6 +3599,7 @@ retry_avoidcopy: make_huge_pte(vma, new_page, 1)); page_remove_rmap(old_page, true); hugepage_add_new_anon_rmap(new_page, vma, address); + set_page_huge_active(new_page); /* Make the old page be freed below */ new_page = old_page; } @@ -3682,6 +3682,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page; pte_t new_pte; spinlock_t *ptl; + bool new_page = false; /* * Currently, we are forced to kill the process in the event the @@ -3747,7 +3748,7 @@ retry: } clear_huge_page(page, address, pages_per_huge_page(h)); __SetPageUptodate(page); - set_page_huge_active(page); + new_page = true; if (vma->vm_flags & VM_MAYSHARE) { int err = huge_add_to_page_cache(page, mapping, idx); @@ -3818,6 +3819,15 @@ retry: } spin_unlock(ptl); + + /* + * Only make newly allocated pages active. Existing pages found + * in the pagecache could be !page_huge_active() if they have been + * isolated for migration. + */ + if (new_page) + set_page_huge_active(page); + unlock_page(page); out: return ret; @@ -4053,7 +4063,6 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, * the set_pte_at() write. */ __SetPageUptodate(page); - set_page_huge_active(page); mapping = dst_vma->vm_file->f_mapping; idx = vma_hugecache_offset(h, dst_vma, dst_addr); @@ -4121,6 +4130,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, update_mmu_cache(dst_vma, dst_addr, dst_pte); spin_unlock(ptl); + set_page_huge_active(page); if (vm_shared) unlock_page(page); ret = 0; diff --git a/mm/migrate.c b/mm/migrate.c index 8c57cdd77ba5..877269339fa7 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1303,6 +1303,16 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, lock_page(hpage); } + /* + * Check for pages which are in the process of being freed. Without + * page_mapping() set, hugetlbfs specific move page routine will not + * be called and we could leak usage counts for subpools. + */ + if (page_private(hpage) && !page_mapping(hpage)) { + rc = -EBUSY; + goto out_unlock; + } + if (PageAnon(hpage)) anon_vma = page_get_anon_vma(hpage); @@ -1334,6 +1344,7 @@ put_anon: set_page_owner_migrate_reason(new_hpage, reason); } +out_unlock: unlock_page(hpage); out: if (rc != -EAGAIN) From 1b0322ebab523163f6b1ca189191c125bf58ebbc Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 2 Jan 2019 01:08:32 -0800 Subject: [PATCH 2144/2608] xtensa: fix get_wchan commit d90b88fd3653f1fb66ecc6571b860d5a5749fa56 upstream. Stack unwinding is implemented incorrectly in xtensa get_wchan: instead of extracting a0 and a1 registers from the spill location under the stack pointer it extracts a word pointed to by the stack pointer and subtracts 4 or 3 from it. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index f1c46bc5d465..e48a2137e87a 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -314,8 +314,8 @@ unsigned long get_wchan(struct task_struct *p) /* Stack layout: sp-4: ra, sp-3: sp' */ - pc = MAKE_PC_FROM_RA(*(unsigned long*)sp - 4, sp); - sp = *(unsigned long *)sp - 3; + pc = MAKE_PC_FROM_RA(SPILL_SLOT(sp, 0), sp); + sp = SPILL_SLOT(sp, 1); } while (count++ < 16); return 0; } From 82c73d482c746a95f5e76246bda842fc00d2469f Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 2 Jan 2019 16:11:20 -0800 Subject: [PATCH 2145/2608] Bluetooth: Fix locking in bt_accept_enqueue() for BH context commit c4f5627f7eeecde1bb6b646d8c0907b96dc2b2a6 upstream. With commit e16337622016 ("Bluetooth: Handle bt_accept_enqueue() socket atomically") lock_sock[_nested]() is used to acquire the socket lock before manipulating the socket. lock_sock[_nested]() may block, which is problematic since bt_accept_enqueue() can be called in bottom half context (e.g. from rfcomm_connect_ind()): [] __might_sleep+0x4c/0x80 [] lock_sock_nested+0x24/0x58 [] bt_accept_enqueue+0x48/0xd4 [bluetooth] [] rfcomm_connect_ind+0x190/0x218 [rfcomm] Add a parameter to bt_accept_enqueue() to indicate whether the function is called from BH context, and acquire the socket lock with bh_lock_sock_nested() if that's the case. Also adapt all callers of bt_accept_enqueue() to pass the new parameter: - l2cap_sock_new_connection_cb() - uses lock_sock() to lock the parent socket => process context - rfcomm_connect_ind() - acquires the parent socket lock with bh_lock_sock() => BH context - __sco_chan_add() - called from sco_chan_add(), which is called from sco_connect(). parent is NULL, hence bt_accept_enqueue() isn't called in this code path and we can ignore it - also called from sco_conn_ready(). uses bh_lock_sock() to acquire the parent lock => BH context Fixes: e16337622016 ("Bluetooth: Handle bt_accept_enqueue() socket atomically") Signed-off-by: Matthias Kaehlcke Reviewed-by: Douglas Anderson Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/bluetooth.h | 2 +- net/bluetooth/af_bluetooth.c | 16 +++++++++++++--- net/bluetooth/l2cap_sock.c | 2 +- net/bluetooth/rfcomm/sock.c | 2 +- net/bluetooth/sco.c | 2 +- 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 020142bb9735..2e1d36b33db7 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -273,7 +273,7 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); int bt_sock_wait_ready(struct sock *sk, unsigned long flags); -void bt_accept_enqueue(struct sock *parent, struct sock *sk); +void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh); void bt_accept_unlink(struct sock *sk); struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 583951e82cee..b216e697deac 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -154,15 +154,25 @@ void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk) } EXPORT_SYMBOL(bt_sock_unlink); -void bt_accept_enqueue(struct sock *parent, struct sock *sk) +void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) { BT_DBG("parent %p, sk %p", parent, sk); sock_hold(sk); - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + + if (bh) + bh_lock_sock_nested(sk); + else + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q); bt_sk(sk)->parent = parent; - release_sock(sk); + + if (bh) + bh_unlock_sock(sk); + else + release_sock(sk); + parent->sk_ack_backlog++; } EXPORT_SYMBOL(bt_accept_enqueue); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 67a8642f57ea..8c329c549ea6 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1253,7 +1253,7 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) l2cap_sock_init(sk, parent); - bt_accept_enqueue(parent, sk); + bt_accept_enqueue(parent, sk, false); release_sock(parent); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 1aaccf637479..8fcd9130439d 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -988,7 +988,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * rfcomm_pi(sk)->channel = channel; sk->sk_state = BT_CONFIG; - bt_accept_enqueue(parent, sk); + bt_accept_enqueue(parent, sk, true); /* Accept connection and return socket DLC */ *d = rfcomm_pi(sk)->dlc; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 81fe3949c158..2d23b29ce00d 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -193,7 +193,7 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, conn->sk = sk; if (parent) - bt_accept_enqueue(parent, sk); + bt_accept_enqueue(parent, sk, true); } static int sco_chan_add(struct sco_conn *conn, struct sock *sk, From 069fb92ea221c72bd75f4863b3540420082f32ba Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 19 Feb 2019 10:10:38 +0800 Subject: [PATCH 2146/2608] exec: Fix mem leak in kernel_read_file commit f612acfae86af7ecad754ae6a46019be9da05b8e upstream. syzkaller report this: BUG: memory leak unreferenced object 0xffffc9000488d000 (size 9195520): comm "syz-executor.0", pid 2752, jiffies 4294787496 (age 18.757s) hex dump (first 32 bytes): ff ff ff ff ff ff ff ff a8 00 00 00 01 00 00 00 ................ 02 00 00 00 00 00 00 00 80 a1 7a c1 ff ff ff ff ..........z..... backtrace: [<000000000863775c>] __vmalloc_node mm/vmalloc.c:1795 [inline] [<000000000863775c>] __vmalloc_node_flags mm/vmalloc.c:1809 [inline] [<000000000863775c>] vmalloc+0x8c/0xb0 mm/vmalloc.c:1831 [<000000003f668111>] kernel_read_file+0x58f/0x7d0 fs/exec.c:924 [<000000002385813f>] kernel_read_file_from_fd+0x49/0x80 fs/exec.c:993 [<0000000011953ff1>] __do_sys_finit_module+0x13b/0x2a0 kernel/module.c:3895 [<000000006f58491f>] do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 [<00000000ee78baf4>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<00000000241f889b>] 0xffffffffffffffff It should goto 'out_free' lable to free allocated buf while kernel_read fails. Fixes: 39d637af5aa7 ("vfs: forbid write access when reading a file into memory") Signed-off-by: YueHaibing Signed-off-by: Al Viro Cc: Thibaut Sautereau Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exec.c b/fs/exec.c index 0da4d748b4e6..0936b5a8199a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -925,7 +925,7 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size, bytes = kernel_read(file, *buf + pos, i_size - pos, &pos); if (bytes < 0) { ret = bytes; - goto out; + goto out_free; } if (bytes == 0) From 4d00335342d0d3bd2f02137595845b7438af43b4 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Thu, 14 Feb 2019 22:57:41 +0100 Subject: [PATCH 2147/2608] scsi: core: reset host byte in DID_NEXUS_FAILURE case commit 4a067cf823d9d8e50d41cfb618011c0d4a969c72 upstream. Up to 4.12, __scsi_error_from_host_byte() would reset the host byte to DID_OK for various cases including DID_NEXUS_FAILURE. Commit 2a842acab109 ("block: introduce new block status code type") replaced this function with scsi_result_to_blk_status() and removed the host-byte resetting code for the DID_NEXUS_FAILURE case. As the line set_host_byte(cmd, DID_OK) was preserved for the other cases, I suppose this was an editing mistake. The fact that the host byte remains set after 4.13 is causing problems with the sg_persist tool, which now returns success rather then exit status 24 when a RESERVATION CONFLICT error is encountered. Fixes: 2a842acab109 "block: introduce new block status code type" Signed-off-by: Martin Wilck Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_lib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 7f505c027ce7..37d366696d21 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -734,6 +734,7 @@ static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd, set_host_byte(cmd, DID_OK); return BLK_STS_TARGET; case DID_NEXUS_FAILURE: + set_host_byte(cmd, DID_OK); return BLK_STS_NEXUS; case DID_ALLOC_FAILURE: set_host_byte(cmd, DID_OK); From aa4ba765e0903926de64b359e8653bfd29a3c353 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Tue, 18 Dec 2018 20:32:48 -0500 Subject: [PATCH 2148/2608] media: uvcvideo: Fix 'type' check leading to overflow commit 47bb117911b051bbc90764a8bff96543cbd2005f upstream. When initially testing the Camera Terminal Descriptor wTerminalType field (buffer[4]), no mask is used. Later in the function, the MSB is overloaded to store the descriptor subtype, and so a mask of 0x7fff is used to check the type. If a descriptor is specially crafted to set this overloaded bit in the original wTerminalType field, the initial type check will fail (falling through, without adjusting the buffer size), but the later type checks will pass, assuming the buffer has been made suitably large, causing an overflow. Avoid this problem by checking for the MSB in the wTerminalType field. If the bit is set, assume the descriptor is bad, and abort parsing it. Originally reported here: https://groups.google.com/forum/#!topic/syzkaller/Ot1fOE6v1d8 A similar (non-compiling) patch was provided at that time. Reported-by: syzbot Signed-off-by: Alistair Strachan Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/uvc/uvc_driver.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 064d88299adc..c0176f5d8200 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -1054,11 +1054,19 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - /* Make sure the terminal type MSB is not null, otherwise it - * could be confused with a unit. + /* + * Reject invalid terminal types that would cause issues: + * + * - The high byte must be non-zero, otherwise it would be + * confused with a unit. + * + * - Bit 15 must be 0, as we use it internally as a terminal + * direction flag. + * + * Other unknown types are accepted. */ type = get_unaligned_le16(&buffer[4]); - if ((type & 0xff00) == 0) { + if ((type & 0x7f00) == 0 || (type & 0x8000) != 0) { uvc_trace(UVC_TRACE_DESCR, "device %d videocontrol " "interface %d INPUT_TERMINAL %d has invalid " "type 0x%04x, skipping\n", udev->devnum, From 61a2e1118c8a629c463e86a64ddb8d5bf5252b07 Mon Sep 17 00:00:00 2001 From: Su Yanjun Date: Sun, 6 Jan 2019 21:31:20 -0500 Subject: [PATCH 2149/2608] vti4: Fix a ipip packet processing bug in 'IPCOMP' virtual tunnel [ Upstream commit dd9ee3444014e8f28c0eefc9fffc9ac9c5248c12 ] Recently we run a network test over ipcomp virtual tunnel.We find that if a ipv4 packet needs fragment, then the peer can't receive it. We deep into the code and find that when packet need fragment the smaller fragment will be encapsulated by ipip not ipcomp. So when the ipip packet goes into xfrm, it's skb->dev is not properly set. The ipv4 reassembly code always set skb'dev to the last fragment's dev. After ipv4 defrag processing, when the kernel rp_filter parameter is set, the skb will be drop by -EXDEV error. This patch adds compatible support for the ipip process in ipcomp virtual tunnel. Signed-off-by: Su Yanjun Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/ipv4/ip_vti.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 00d4371d4573..306603a7f351 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -74,6 +74,33 @@ drop: return 0; } +static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) +{ + struct ip_tunnel *tunnel; + const struct iphdr *iph = ip_hdr(skb); + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn = net_generic(net, vti_net_id); + + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->saddr, iph->daddr, 0); + if (tunnel) { + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; + + skb->dev = tunnel->dev; + + return xfrm_input(skb, nexthdr, spi, encap_type); + } + + return -EINVAL; +drop: + kfree_skb(skb); + return 0; +} + static int vti_rcv(struct sk_buff *skb) { XFRM_SPI_SKB_CB(skb)->family = AF_INET; @@ -82,6 +109,14 @@ static int vti_rcv(struct sk_buff *skb) return vti_input(skb, ip_hdr(skb)->protocol, 0, 0); } +static int vti_rcv_ipip(struct sk_buff *skb) +{ + XFRM_SPI_SKB_CB(skb)->family = AF_INET; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + + return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0); +} + static int vti_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; @@ -439,6 +474,12 @@ static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { .priority = 100, }; +static struct xfrm_tunnel ipip_handler __read_mostly = { + .handler = vti_rcv_ipip, + .err_handler = vti4_err, + .priority = 0, +}; + static int __net_init vti_init_net(struct net *net) { int err; @@ -607,6 +648,13 @@ static int __init vti_init(void) if (err < 0) goto xfrm_proto_comp_failed; + msg = "ipip tunnel"; + err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + if (err < 0) { + pr_info("%s: cant't register tunnel\n",__func__); + goto xfrm_tunnel_failed; + } + msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); if (err < 0) @@ -616,6 +664,8 @@ static int __init vti_init(void) rtnl_link_failed: xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); +xfrm_tunnel_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); xfrm_proto_comp_failed: xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm_proto_ah_failed: From 482caab70b654d312012cdc5c910147ac3ffa433 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 10 Jan 2019 17:17:16 -0800 Subject: [PATCH 2150/2608] perf core: Fix perf_proc_update_handler() bug [ Upstream commit 1a51c5da5acc6c188c917ba572eebac5f8793432 ] The perf_proc_update_handler() handles /proc/sys/kernel/perf_event_max_sample_rate syctl variable. When the PMU IRQ handler timing monitoring is disabled, i.e, when /proc/sys/kernel/perf_cpu_time_max_percent is equal to 0 or 100, then no modification to sysctl_perf_event_sample_rate is allowed to prevent possible hang from wrong values. The problem is that the test to prevent modification is made after the sysctl variable is modified in perf_proc_update_handler(). You get an error: $ echo 10001 >/proc/sys/kernel/perf_event_max_sample_rate echo: write error: invalid argument But the value is still modified causing all sorts of inconsistencies: $ cat /proc/sys/kernel/perf_event_max_sample_rate 10001 This patch fixes the problem by moving the parsing of the value after the test. Committer testing: # echo 100 > /proc/sys/kernel/perf_cpu_time_max_percent # echo 10001 > /proc/sys/kernel/perf_event_max_sample_rate -bash: echo: write error: Invalid argument # cat /proc/sys/kernel/perf_event_max_sample_rate 10001 # Signed-off-by: Stephane Eranian Reviewed-by: Andi Kleen Reviewed-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1547169436-6266-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- kernel/events/core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 17d5d41464c6..92939b5397df 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -436,18 +436,18 @@ int perf_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - - if (ret || !write) - return ret; - + int ret; + int perf_cpu = sysctl_perf_cpu_time_max_percent; /* * If throttling is disabled don't allow the write: */ - if (sysctl_perf_cpu_time_max_percent == 100 || - sysctl_perf_cpu_time_max_percent == 0) + if (write && (perf_cpu == 100 || perf_cpu == 0)) return -EINVAL; + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (ret || !write) + return ret; + max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ); perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; update_perf_cpu_limits(); From 01b094e3c2c5e00a74abd9a978e0ce6c3b5e68a4 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Sat, 19 Jan 2019 00:12:39 -0800 Subject: [PATCH 2151/2608] perf tools: Handle TOPOLOGY headers with no CPU [ Upstream commit 1497e804d1a6e2bd9107ddf64b0310449f4673eb ] This patch fixes an issue in cpumap.c when used with the TOPOLOGY header. In some configurations, some NUMA nodes may have no CPU (empty cpulist). Yet a cpumap map must be created otherwise perf abort with an error. This patch handles this case by creating a dummy map. Before: $ perf record -o - -e cycles noploop 2 | perf script -i - 0x6e8 [0x6c]: failed to process type: 80 After: $ perf record -o - -e cycles noploop 2 | perf script -i - noploop for 2 seconds Signed-off-by: Stephane Eranian Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1547885559-1657-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/cpumap.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 1ccbd3342069..383674f448fc 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -134,7 +134,12 @@ struct cpu_map *cpu_map__new(const char *cpu_list) if (!cpu_list) return cpu_map__read_all_cpu_map(); - if (!isdigit(*cpu_list)) + /* + * must handle the case of empty cpumap to cover + * TOPOLOGY header for NUMA nodes with no CPU + * ( e.g., because of CPU hotplug) + */ + if (!isdigit(*cpu_list) && *cpu_list != '\0') goto out; while (isdigit(*cpu_list)) { @@ -181,8 +186,10 @@ struct cpu_map *cpu_map__new(const char *cpu_list) if (nr_cpus > 0) cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else + else if (*cpu_list != '\0') cpus = cpu_map__default_new(); + else + cpus = cpu_map__dummy_new(); invalid: free(tmp_cpus); out: From 3162d8af3f5d89141843a4f6bfd6a749ddbbe08d Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Thu, 17 Jan 2019 12:41:32 -0800 Subject: [PATCH 2152/2608] IB/{hfi1, qib}: Fix WC.byte_len calculation for UD_SEND_WITH_IMM [ Upstream commit 904bba211acc2112fdf866e5a2bc6cd9ecd0de1b ] The work completion length for a receiving a UD send with immediate is short by 4 bytes causing application using this opcode to fail. The UD receive logic incorrectly subtracts 4 bytes for immediate value. These bytes are already included in header length and are used to calculate header/payload split, so the result is these 4 bytes are subtracted twice, once when the header length subtracted from the overall length and once again in the UD opcode specific path. Remove the extra subtraction when handling the opcode. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Reviewed-by: Michael J. Ruhl Signed-off-by: Brian Welty Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/ud.c | 1 - drivers/infiniband/hw/qib/qib_ud.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 37abd150fad3..74aff88c593d 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -954,7 +954,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index be4907453ac4..5ef144e4a4cb 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -515,7 +515,6 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; From 1508187f922aa965c4365216aa81d44359fcb1a6 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Thu, 17 Jan 2019 12:29:02 -0700 Subject: [PATCH 2153/2608] iommu/amd: Call free_iova_fast with pfn in map_sg [ Upstream commit 51d8838d66d3249508940d8f59b07701f2129723 ] In the error path of map_sg, free_iova_fast is being called with address instead of the pfn. This results in a bad value getting into the rcache, and can result in hitting a BUG_ON when iova_magazine_free_pfns is called. Cc: Joerg Roedel Cc: Suravee Suthikulpanit Signed-off-by: Jerry Snitselaar Fixes: 80187fd39dcb ("iommu/amd: Optimize map_sg and unmap_sg") Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 766103ea237e..ded13a6afa66 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2566,7 +2566,7 @@ out_unmap: } out_free_iova: - free_iova_fast(&dma_dom->iovad, address, npages); + free_iova_fast(&dma_dom->iovad, address >> PAGE_SHIFT, npages); out_err: return 0; From 878a9d34e4e815518c0deea3e0e53ae5b4356373 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Sat, 19 Jan 2019 10:38:05 -0700 Subject: [PATCH 2154/2608] iommu/amd: Unmap all mapped pages in error path of map_sg [ Upstream commit f1724c0883bb0ce93b8dcb94b53dcca3b75ac9a7 ] In the error path of map_sg there is an incorrect if condition for breaking out of the loop that searches the scatterlist for mapped pages to unmap. Instead of breaking out of the loop once all the pages that were mapped have been unmapped, it will break out of the loop after it has unmapped 1 page. Fix the condition, so it breaks out of the loop only after all the mapped pages have been unmapped. Fixes: 80187fd39dcb ("iommu/amd: Optimize map_sg and unmap_sg") Cc: Joerg Roedel Signed-off-by: Jerry Snitselaar Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index ded13a6afa66..0c0acf6fda87 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2560,7 +2560,7 @@ out_unmap: bus_addr = address + s->dma_address + (j << PAGE_SHIFT); iommu_unmap_page(domain, bus_addr, PAGE_SIZE); - if (--mapped_pages) + if (--mapped_pages == 0) goto out_free_iova; } } From 34f02c1dd2919dca9e1c90fb497ff0e61e0fc248 Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Thu, 10 Jan 2019 16:39:06 +0800 Subject: [PATCH 2155/2608] ipvs: Fix signed integer overflow when setsockopt timeout [ Upstream commit 53ab60baa1ac4f20b080a22c13b77b6373922fd7 ] There is a UBSAN bug report as below: UBSAN: Undefined behaviour in net/netfilter/ipvs/ip_vs_ctl.c:2227:21 signed integer overflow: -2147483647 * 1000 cannot be represented in type 'int' Reproduce program: #include #include #include #define IPPROTO_IP 0 #define IPPROTO_RAW 255 #define IP_VS_BASE_CTL (64+1024+64) #define IP_VS_SO_SET_TIMEOUT (IP_VS_BASE_CTL+10) /* The argument to IP_VS_SO_GET_TIMEOUT */ struct ipvs_timeout_t { int tcp_timeout; int tcp_fin_timeout; int udp_timeout; }; int main() { int ret = -1; int sockfd = -1; struct ipvs_timeout_t to; sockfd = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); if (sockfd == -1) { printf("socket init error\n"); return -1; } to.tcp_timeout = -2147483647; to.tcp_fin_timeout = -2147483647; to.udp_timeout = -2147483647; ret = setsockopt(sockfd, IPPROTO_IP, IP_VS_SO_SET_TIMEOUT, (char *)(&to), sizeof(to)); printf("setsockopt return %d\n", ret); return ret; } Return -EINVAL if the timeout value is negative or max than 'INT_MAX / HZ'. Signed-off-by: ZhangXiaoxu Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipvs/ip_vs_ctl.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 2f45c3ce77ef..dff4ead3d117 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2252,6 +2252,18 @@ static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user u->tcp_fin_timeout, u->udp_timeout); +#ifdef CONFIG_IP_VS_PROTO_TCP + if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) || + u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) { + return -EINVAL; + } +#endif + +#ifdef CONFIG_IP_VS_PROTO_UDP + if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ)) + return -EINVAL; +#endif + #ifdef CONFIG_IP_VS_PROTO_TCP if (u->tcp_timeout) { pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); From ef41aa7ee085d6cedb14fde1c8c0e533150198d3 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 24 Jan 2019 04:16:45 +0000 Subject: [PATCH 2156/2608] iommu/amd: Fix IOMMU page flush when detach device from a domain [ Upstream commit 9825bd94e3a2baae1f4874767ae3a7d4c049720e ] When a VM is terminated, the VFIO driver detaches all pass-through devices from VFIO domain by clearing domain id and page table root pointer from each device table entry (DTE), and then invalidates the DTE. Then, the VFIO driver unmap pages and invalidate IOMMU pages. Currently, the IOMMU driver keeps track of which IOMMU and how many devices are attached to the domain. When invalidate IOMMU pages, the driver checks if the IOMMU is still attached to the domain before issuing the invalidate page command. However, since VFIO has already detached all devices from the domain, the subsequent INVALIDATE_IOMMU_PAGES commands are being skipped as there is no IOMMU attached to the domain. This results in data corruption and could cause the PCI device to end up in indeterministic state. Fix this by invalidate IOMMU pages when detach a device, and before decrementing the per-domain device reference counts. Cc: Boris Ostrovsky Suggested-by: Joerg Roedel Co-developed-by: Brijesh Singh Signed-off-by: Brijesh Singh Signed-off-by: Suravee Suthikulpanit Fixes: 6de8ad9b9ee0 ('x86/amd-iommu: Make iommu_flush_pages aware of multiple IOMMUs') Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 0c0acf6fda87..78b97f31a1f2 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1919,6 +1919,7 @@ static void do_attach(struct iommu_dev_data *dev_data, static void do_detach(struct iommu_dev_data *dev_data) { + struct protection_domain *domain = dev_data->domain; struct amd_iommu *iommu; u16 alias; @@ -1934,10 +1935,6 @@ static void do_detach(struct iommu_dev_data *dev_data) iommu = amd_iommu_rlookup_table[dev_data->devid]; alias = dev_data->alias; - /* decrease reference counters */ - dev_data->domain->dev_iommu[iommu->index] -= 1; - dev_data->domain->dev_cnt -= 1; - /* Update data structures */ dev_data->domain = NULL; list_del(&dev_data->list); @@ -1947,6 +1944,16 @@ static void do_detach(struct iommu_dev_data *dev_data) /* Flush the DTE entry */ device_flush_dte(dev_data); + + /* Flush IOTLB */ + domain_flush_tlb_pde(domain); + + /* Wait for the flushes to finish */ + domain_flush_complete(domain); + + /* decrease reference counters - needs to happen after the flushes */ + domain->dev_iommu[iommu->index] -= 1; + domain->dev_cnt -= 1; } /* From 51cbb781d215eed7ee9146eef96fdce7cbe98132 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 29 Jan 2018 09:09:41 -0800 Subject: [PATCH 2157/2608] xtensa: SMP: fix ccount_timer_shutdown [ Upstream commit 4fe8713b873fc881284722ce4ac47995de7cf62c ] ccount_timer_shutdown is called from the atomic context in the secondary_start_kernel, resulting in the following BUG: BUG: sleeping function called from invalid context in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/1 Preemption disabled at: secondary_start_kernel+0xa1/0x130 Call Trace: ___might_sleep+0xe7/0xfc __might_sleep+0x41/0x44 synchronize_irq+0x24/0x64 disable_irq+0x11/0x14 ccount_timer_shutdown+0x12/0x20 clockevents_switch_state+0x82/0xb4 clockevents_exchange_device+0x54/0x60 tick_check_new_device+0x46/0x70 clockevents_register_device+0x8c/0xc8 clockevents_config_and_register+0x1d/0x2c local_timer_setup+0x75/0x7c secondary_start_kernel+0xb4/0x130 should_never_return+0x32/0x35 Use disable_irq_nosync instead of disable_irq to avoid it. This is safe because the ccount timer IRQ is per-CPU, and once IRQ is masked the ISR will not be called. Signed-off-by: Max Filippov Signed-off-by: Sasha Levin --- arch/xtensa/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index fd524a54d2ab..378186b5eb40 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -89,7 +89,7 @@ static int ccount_timer_shutdown(struct clock_event_device *evt) container_of(evt, struct ccount_timer, evt); if (timer->irq_enabled) { - disable_irq(evt->irq); + disable_irq_nosync(evt->irq); timer->irq_enabled = 0; } return 0; From 33c6313f1e173a739db170addc2049fcf36de307 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 10 Jan 2019 12:38:02 +0000 Subject: [PATCH 2158/2608] selftests: cpu-hotplug: fix case where CPUs offline > CPUs present [ Upstream commit 2b531b6137834a55857a337ac17510d6436b6fbb ] The cpu-hotplug test assumes that we can offline the maximum CPU as described by /sys/devices/system/cpu/offline. However, in the case where the number of CPUs exceeds like kernel configuration then the offline count can be greater than the present count and we end up trying to test the offlining of a CPU that is not available to offline. Fix this by testing the maximum present CPU instead. Also, the test currently offlines the CPU and does not online it, so fix this by onlining the CPU after the test. Fixes: d89dffa976bc ("fault-injection: add selftests for cpu and memory hotplug") Signed-off-by: Colin Ian King Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- .../selftests/cpu-hotplug/cpu-on-off-test.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index f3a8933c1275..49ccd2293343 100755 --- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh @@ -35,6 +35,10 @@ prerequisite() exit 0 fi + present_cpus=`cat $SYSFS/devices/system/cpu/present` + present_max=${present_cpus##*-} + echo "present_cpus = $present_cpus present_max = $present_max" + echo -e "\t Cpus in online state: $online_cpus" offline_cpus=`cat $SYSFS/devices/system/cpu/offline` @@ -149,6 +153,8 @@ online_cpus=0 online_max=0 offline_cpus=0 offline_max=0 +present_cpus=0 +present_max=0 while getopts e:ahp: opt; do case $opt in @@ -188,9 +194,10 @@ if [ $allcpus -eq 0 ]; then online_cpu_expect_success $online_max if [[ $offline_cpus -gt 0 ]]; then - echo -e "\t offline to online to offline: cpu $offline_max" - online_cpu_expect_success $offline_max - offline_cpu_expect_success $offline_max + echo -e "\t offline to online to offline: cpu $present_max" + online_cpu_expect_success $present_max + offline_cpu_expect_success $present_max + online_cpu $present_max fi exit 0 else From 19b7005bfaf93740710424694c65ad396d321577 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 21 Dec 2018 08:26:20 -0800 Subject: [PATCH 2159/2608] xtensa: SMP: fix secondary CPU initialization [ Upstream commit 32a7726c4f4aadfabdb82440d84f88a5a2c8fe13 ] - add missing memory barriers to the secondary CPU synchronization spin loops; add comment to the matching memory barrier in the boot_secondary and __cpu_die functions; - use READ_ONCE/WRITE_ONCE to access cpu_start_id/cpu_start_ccount instead of reading/writing them directly; - re-initialize cpu_running every time before starting secondary CPU to flush possible previous CPU startup results. Signed-off-by: Max Filippov Signed-off-by: Sasha Levin --- arch/xtensa/kernel/head.S | 5 ++++- arch/xtensa/kernel/smp.c | 34 +++++++++++++++++++++------------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index 27c8e07ace43..29f445b410b3 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -281,12 +281,13 @@ should_never_return: movi a2, cpu_start_ccount 1: + memw l32i a3, a2, 0 beqi a3, 0, 1b movi a3, 0 s32i a3, a2, 0 - memw 1: + memw l32i a3, a2, 0 beqi a3, 0, 1b wsr a3, ccount @@ -323,11 +324,13 @@ ENTRY(cpu_restart) rsr a0, prid neg a2, a0 movi a3, cpu_start_id + memw s32i a2, a3, 0 #if XCHAL_DCACHE_IS_WRITEBACK dhwbi a3, 0 #endif 1: + memw l32i a2, a3, 0 dhi a3, 0 bne a2, a0, 1b diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 932d64689bac..c9fc2c4f71b3 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -195,9 +195,11 @@ static int boot_secondary(unsigned int cpu, struct task_struct *ts) int i; #ifdef CONFIG_HOTPLUG_CPU - cpu_start_id = cpu; - system_flush_invalidate_dcache_range( - (unsigned long)&cpu_start_id, sizeof(cpu_start_id)); + WRITE_ONCE(cpu_start_id, cpu); + /* Pairs with the third memw in the cpu_restart */ + mb(); + system_flush_invalidate_dcache_range((unsigned long)&cpu_start_id, + sizeof(cpu_start_id)); #endif smp_call_function_single(0, mx_cpu_start, (void *)cpu, 1); @@ -206,18 +208,21 @@ static int boot_secondary(unsigned int cpu, struct task_struct *ts) ccount = get_ccount(); while (!ccount); - cpu_start_ccount = ccount; + WRITE_ONCE(cpu_start_ccount, ccount); - while (time_before(jiffies, timeout)) { + do { + /* + * Pairs with the first two memws in the + * .Lboot_secondary. + */ mb(); - if (!cpu_start_ccount) - break; - } + ccount = READ_ONCE(cpu_start_ccount); + } while (ccount && time_before(jiffies, timeout)); - if (cpu_start_ccount) { + if (ccount) { smp_call_function_single(0, mx_cpu_stop, - (void *)cpu, 1); - cpu_start_ccount = 0; + (void *)cpu, 1); + WRITE_ONCE(cpu_start_ccount, 0); return -EIO; } } @@ -237,6 +242,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_debug("%s: Calling wakeup_secondary(cpu:%d, idle:%p, sp: %08lx)\n", __func__, cpu, idle, start_info.stack); + init_completion(&cpu_running); ret = boot_secondary(cpu, idle); if (ret == 0) { wait_for_completion_timeout(&cpu_running, @@ -298,8 +304,10 @@ void __cpu_die(unsigned int cpu) unsigned long timeout = jiffies + msecs_to_jiffies(1000); while (time_before(jiffies, timeout)) { system_invalidate_dcache_range((unsigned long)&cpu_start_id, - sizeof(cpu_start_id)); - if (cpu_start_id == -cpu) { + sizeof(cpu_start_id)); + /* Pairs with the second memw in the cpu_restart */ + mb(); + if (READ_ONCE(cpu_start_id) == -cpu) { platform_cpu_kill(cpu); return; } From 7b72724baf4639445f25f63a2447bafc64fa0035 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 24 Jan 2019 17:16:11 -0800 Subject: [PATCH 2160/2608] xtensa: smp_lx200_defconfig: fix vectors clash [ Upstream commit 306b38305c0f86de7f17c5b091a95451dcc93d7d ] Secondary CPU reset vector overlaps part of the double exception handler code, resulting in weird crashes and hangups when running user code. Move exception vectors one page up so that they don't clash with the secondary CPU reset vector. Signed-off-by: Max Filippov Signed-off-by: Sasha Levin --- arch/xtensa/configs/smp_lx200_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index 14e3ca353ac8..5035b86a2e49 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -34,6 +34,7 @@ CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y # CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX is not set # CONFIG_PCI is not set +CONFIG_VECTORS_OFFSET=0x00002000 CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=96M@0" From c754598b5d09b2e938da24f19981a2700fb08a61 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 19 Jan 2019 00:26:48 -0800 Subject: [PATCH 2161/2608] xtensa: SMP: mark each possible CPU as present [ Upstream commit 8b1c42cdd7181200dc1fff39dcb6ac1a3fac2c25 ] Otherwise it is impossible to enable CPUs after booting with 'maxcpus' parameter. Signed-off-by: Max Filippov Signed-off-by: Sasha Levin --- arch/xtensa/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index c9fc2c4f71b3..80be6449c497 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -83,7 +83,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned i; - for (i = 0; i < max_cpus; ++i) + for_each_possible_cpu(i) set_cpu_present(i, true); } From 4c2ceb151db6dc2255581c6c5d4b2f280af94e03 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 26 Jan 2019 20:35:18 -0800 Subject: [PATCH 2162/2608] xtensa: SMP: limit number of possible CPUs by NR_CPUS [ Upstream commit 25384ce5f9530def39421597b1457d9462df6455 ] This fixes the following warning at boot when the kernel is booted on a board with more CPU cores than was configured in NR_CPUS: smp_init_cpus: Core Count = 8 smp_init_cpus: Core Id = 0 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at include/linux/cpumask.h:121 smp_init_cpus+0x54/0x74 Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.0.0-rc3-00015-g1459333f88a0 #124 Call Trace: __warn$part$3+0x6a/0x7c warn_slowpath_null+0x35/0x3c smp_init_cpus+0x54/0x74 setup_arch+0x1c0/0x1d0 start_kernel+0x44/0x310 _startup+0x107/0x107 Signed-off-by: Max Filippov Signed-off-by: Sasha Levin --- arch/xtensa/kernel/smp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 80be6449c497..be1f280c322c 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -96,6 +96,11 @@ void __init smp_init_cpus(void) pr_info("%s: Core Count = %d\n", __func__, ncpus); pr_info("%s: Core Id = %d\n", __func__, core_id); + if (ncpus > NR_CPUS) { + ncpus = NR_CPUS; + pr_info("%s: limiting core count by %d\n", __func__, ncpus); + } + for (i = 0; i < ncpus; ++i) set_cpu_possible(i, true); } From 140e51241934f5da66a590f00dd147a72094e236 Mon Sep 17 00:00:00 2001 From: Tomonori Sakita Date: Fri, 25 Jan 2019 11:02:22 +0900 Subject: [PATCH 2163/2608] net: altera_tse: fix msgdma_tx_completion on non-zero fill_level case [ Upstream commit 6571ebce112a21ec9be68ef2f53b96fcd41fd81b ] If fill_level was not zero and status was not BUSY, result of "tx_prod - tx_cons - inuse" might be zero. Subtracting 1 unconditionally results invalid negative return value on this case. Make sure not to return an negative value. Signed-off-by: Tomonori Sakita Signed-off-by: Atsushi Nemoto Reviewed-by: Dalon L Westergreen Acked-by: Thor Thayer Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/altera/altera_msgdma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/altera/altera_msgdma.c b/drivers/net/ethernet/altera/altera_msgdma.c index 0fb986ba3290..0ae723f75341 100644 --- a/drivers/net/ethernet/altera/altera_msgdma.c +++ b/drivers/net/ethernet/altera/altera_msgdma.c @@ -145,7 +145,8 @@ u32 msgdma_tx_completions(struct altera_tse_private *priv) & 0xffff; if (inuse) { /* Tx FIFO is not empty */ - ready = priv->tx_prod - priv->tx_cons - inuse - 1; + ready = max_t(int, + priv->tx_prod - priv->tx_cons - inuse - 1, 0); } else { /* Check for buffered last packet */ status = csrrd32(priv->tx_dma_csr, msgdma_csroffs(status)); From d70329476999adf562b40af15aacec10b496a6ea Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 26 Jan 2019 17:18:25 +0800 Subject: [PATCH 2164/2608] net: hns: Fix for missing of_node_put() after of_parse_phandle() [ Upstream commit 263c6d75f9a544a3c2f8f6a26de4f4808d8f59cf ] In hns enet driver, we use of_parse_handle() to get hold of the device node related to "ae-handle" but we have missed to put the node reference using of_node_put() after we are done using the node. This patch fixes it. Note: This problem is stated in Link: https://lkml.org/lkml/2018/12/22/217 Fixes: 48189d6aaf1e ("net: hns: enet specifies a reference to dsaf") Reported-by: Alexey Khoroshilov Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 86662a14208e..d30c28fba249 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -2532,6 +2532,8 @@ static int hns_nic_dev_probe(struct platform_device *pdev) out_notify_fail: (void)cancel_work_sync(&priv->service_task); out_read_prop_fail: + /* safe for ACPI FW */ + of_node_put(to_of_node(priv->fwnode)); free_netdev(ndev); return ret; } @@ -2561,6 +2563,9 @@ static int hns_nic_dev_remove(struct platform_device *pdev) set_bit(NIC_STATE_REMOVING, &priv->state); (void)cancel_work_sync(&priv->service_task); + /* safe for ACPI FW */ + of_node_put(to_of_node(priv->fwnode)); + free_netdev(ndev); return 0; } From a60323e6b9661e007b094a9d8327e7917b79b8af Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 26 Jan 2019 17:18:26 +0800 Subject: [PATCH 2165/2608] net: hns: Restart autoneg need return failed when autoneg off [ Upstream commit ed29ca8b9592562559c64d027fb5eb126e463e2c ] The hns driver of earlier devices, when autoneg off, restart autoneg will return -EINVAL, so make the hns driver for the latest devices do the same. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index c1e947bb852f..14df03f60e05 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -1154,16 +1154,18 @@ static int hns_get_regs_len(struct net_device *net_dev) */ static int hns_nic_nway_reset(struct net_device *netdev) { - int ret = 0; struct phy_device *phy = netdev->phydev; - if (netif_running(netdev)) { - /* if autoneg is disabled, don't restart auto-negotiation */ - if (phy && phy->autoneg == AUTONEG_ENABLE) - ret = genphy_restart_aneg(phy); - } + if (!netif_running(netdev)) + return 0; - return ret; + if (!phy) + return -EOPNOTSUPP; + + if (phy->autoneg != AUTONEG_ENABLE) + return -EINVAL; + + return genphy_restart_aneg(phy); } static u32 From 04731b3c702b6008786542707259f1bbe21caae5 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 26 Jan 2019 17:18:27 +0800 Subject: [PATCH 2166/2608] net: hns: Fix wrong read accesses via Clause 45 MDIO protocol [ Upstream commit cec8abba13e6a26729dfed41019720068eeeff2b ] When reading phy registers via Clause 45 MDIO protocol, after write address operation, the driver use another write address operation, so can not read the right value of any phy registers. This patch fixes it. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns_mdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index 017e08452d8c..baf5cc251f32 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -321,7 +321,7 @@ static int hns_mdio_read(struct mii_bus *bus, int phy_id, int regnum) } hns_mdio_cmd_write(mdio_dev, is_c45, - MDIO_C45_WRITE_ADDR, phy_id, devad); + MDIO_C45_READ, phy_id, devad); } /* Step 5: waitting for MDIO_COMMAND_REG 's mdio_start==0,*/ From 366abec996a44f273c5556f2d132ad45afe4f39f Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 26 Jan 2019 22:48:57 +0300 Subject: [PATCH 2167/2608] net: stmmac: dwmac-rk: fix error handling in rk_gmac_powerup() [ Upstream commit c69c29a1a0a8f68cd87e98ba4a5a79fb8ef2a58c ] If phy_power_on() fails in rk_gmac_powerup(), clocks are left enabled. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 13133b30b575..01787344f6e5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1284,8 +1284,10 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) } ret = phy_power_on(bsp_priv, true); - if (ret) + if (ret) { + gmac_clk_enable(bsp_priv, false); return ret; + } pm_runtime_enable(dev); pm_runtime_get_sync(dev); From f3731c960cbd9e0a13c26266397cd0568524f4fb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Jan 2019 21:54:36 +0100 Subject: [PATCH 2168/2608] netfilter: ebtables: compat: un-break 32bit setsockopt when no rules are present [ Upstream commit 2035f3ff8eaa29cfb5c8e2160b0f6e85eeb21a95 ] Unlike ip(6)tables ebtables only counts user-defined chains. The effect is that a 32bit ebtables binary on a 64bit kernel can do 'ebtables -N FOO' only after adding at least one rule, else the request fails with -EINVAL. This is a similar fix as done in 3f1e53abff84 ("netfilter: ebtables: don't attempt to allocate 0-sized compat array"). Fixes: 7d7d7e02111e9 ("netfilter: compat: reject huge allocation requests") Reported-by: Francesco Ruggeri Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/bridge/netfilter/ebtables.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 22e4c15a1fc3..53392ac58b38 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2292,9 +2292,12 @@ static int compat_do_replace(struct net *net, void __user *user, xt_compat_lock(NFPROTO_BRIDGE); - ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); - if (ret < 0) - goto out_unlock; + if (tmp.nentries) { + ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); + if (ret < 0) + goto out_unlock; + } + ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); if (ret < 0) goto out_unlock; From 50d8083d733e2e656d15973eb28630f983b33784 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 27 Jan 2019 22:58:00 +0100 Subject: [PATCH 2169/2608] gpio: vf610: Mask all GPIO interrupts [ Upstream commit 7ae710f9f8b2cf95297e7bbfe1c09789a7dc43d4 ] On SoC reset all GPIO interrupts are disable. However, if kexec is used to boot into a new kernel, the SoC does not experience a reset. Hence GPIO interrupts can be left enabled from the previous kernel. It is then possible for the interrupt to fire before an interrupt handler is registered, resulting in the kernel complaining of an "unexpected IRQ trap", the interrupt is never cleared, and so fires again, resulting in an interrupt storm. Disable all GPIO interrupts before registering the GPIO IRQ chip. Fixes: 7f2691a19627 ("gpio: vf610: add gpiolib/IRQ chip driver for Vybrid") Signed-off-by: Andrew Lunn Acked-by: Stefan Agner Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-vf610.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index cbe9e06861de..1309b444720e 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -261,6 +261,7 @@ static int vf610_gpio_probe(struct platform_device *pdev) struct vf610_gpio_port *port; struct resource *iores; struct gpio_chip *gc; + int i; int ret; port = devm_kzalloc(&pdev->dev, sizeof(*port), GFP_KERNEL); @@ -300,6 +301,10 @@ static int vf610_gpio_probe(struct platform_device *pdev) if (ret < 0) return ret; + /* Mask all GPIO interrupts */ + for (i = 0; i < gc->ngpio; i++) + vf610_gpio_writel(0, port->base + PORT_PCR(i)); + /* Clear the interrupt status register for all GPIO's */ vf610_gpio_writel(~0, port->base + PORT_ISFR); From d2a1e0c3858470f5d9d9347a9358b2f123459f50 Mon Sep 17 00:00:00 2001 From: Fathi Boudra Date: Wed, 16 Jan 2019 11:43:20 -0600 Subject: [PATCH 2170/2608] selftests: timers: use LDLIBS instead of LDFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7d4e591bc051d3382c45caaa2530969fb42ed23d ] posix_timers fails to build due to undefined reference errors: aarch64-linaro-linux-gcc --sysroot=/build/tmp-rpb-glibc/sysroots/hikey -O2 -pipe -g -feliminate-unused-debug-types -O3 -Wl,-no-as-needed -Wall -DKTEST -Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed -lrt -lpthread posix_timers.c -o /build/tmp-rpb-glibc/work/hikey-linaro-linux/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/timers/posix_timers /tmp/cc1FTZzT.o: In function `check_timer_create': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/timers/posix_timers.c:157: undefined reference to `timer_create' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/timers/posix_timers.c:170: undefined reference to `timer_settime' collect2: error: ld returned 1 exit status It's GNU Make and linker specific. The default Makefile rule looks like: $(CC) $(CFLAGS) $(LDFLAGS) $@ $^ $(LDLIBS) When linking is done by gcc itself, no issue, but when it needs to be passed to proper ld, only LDLIBS follows and then ld cannot know what libs to link with. More detail: https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html LDFLAGS Extra flags to give to compilers when they are supposed to invoke the linker, ‘ld’, such as -L. Libraries (-lfoo) should be added to the LDLIBS variable instead. LDLIBS Library flags or names given to compilers when they are supposed to invoke the linker, ‘ld’. LOADLIBES is a deprecated (but still supported) alternative to LDLIBS. Non-library linker flags, such as -L, should go in the LDFLAGS variable. https://lkml.org/lkml/2010/2/10/362 tools/perf: libraries must come after objects Link order matters, use LDLIBS instead of LDFLAGS to properly link against libpthread. Signed-off-by: Denys Dmytriyenko Signed-off-by: Fathi Boudra Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/timers/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index 3496680981f2..d937e45532d8 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -O3 -Wl,-no-as-needed -Wall -LDFLAGS += -lrt -lpthread -lm +LDLIBS += -lrt -lpthread -lm # these are all "safe" tests that don't modify # system time or require escalated privileges From e5f77942d9533aef9f5cb1c962eb712134da6938 Mon Sep 17 00:00:00 2001 From: Yao Liu Date: Mon, 28 Jan 2019 19:44:14 +0800 Subject: [PATCH 2171/2608] nfs: Fix NULL pointer dereference of dev_name [ Upstream commit 80ff00172407e0aad4b10b94ef0816fc3e7813cb ] There is a NULL pointer dereference of dev_name in nfs_parse_devname() The oops looks something like: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 ... RIP: 0010:nfs_fs_mount+0x3b6/0xc20 [nfs] ... Call Trace: ? ida_alloc_range+0x34b/0x3d0 ? nfs_clone_super+0x80/0x80 [nfs] ? nfs_free_parsed_mount_data+0x60/0x60 [nfs] mount_fs+0x52/0x170 ? __init_waitqueue_head+0x3b/0x50 vfs_kern_mount+0x6b/0x170 do_mount+0x216/0xdc0 ksys_mount+0x83/0xd0 __x64_sys_mount+0x25/0x30 do_syscall_64+0x65/0x220 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fix this by adding a NULL check on dev_name Signed-off-by: Yao Liu Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 3c4aeb83e1c4..77d8d03344c8 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1901,6 +1901,11 @@ static int nfs_parse_devname(const char *dev_name, size_t len; char *end; + if (unlikely(!dev_name || !*dev_name)) { + dfprintk(MOUNT, "NFS: device name not specified\n"); + return -EINVAL; + } + /* Is the host name protected with square brakcets? */ if (*dev_name == '[') { end = strchr(++dev_name, ']'); From 88b5593c24dde42946e5c245ba88201fd77f9a33 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 28 Jan 2019 10:05:04 -0800 Subject: [PATCH 2172/2608] qed: Fix bug in tx promiscuous mode settings [ Upstream commit 9e71a15d8b5bbce25c637f7f8833cd3f45b65646 ] When running tx switched traffic between VNICs created via a bridge(to which VFs are added), adapter drops the unicast packets in tx flow due to VNIC's ucast mac being unknown to it. But VF interfaces being in promiscuous mode should have caused adapter to accept all the unknown ucast packets. Later, it was found that driver doesn't really configure tx promiscuous mode settings to accept all unknown unicast macs. This patch fixes tx promiscuous mode settings to accept all unknown/unmatched unicast macs and works out the scenario. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 83c1c4fa102b..5191b575d57b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -607,6 +607,10 @@ qed_sp_update_accept_mode(struct qed_hwfn *p_hwfn, (!!(accept_filter & QED_ACCEPT_MCAST_MATCHED) && !!(accept_filter & QED_ACCEPT_MCAST_UNMATCHED))); + SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL, + (!!(accept_filter & QED_ACCEPT_UCAST_MATCHED) && + !!(accept_filter & QED_ACCEPT_UCAST_UNMATCHED))); + SET_FIELD(state, ETH_VPORT_TX_MODE_BCAST_ACCEPT_ALL, !!(accept_filter & QED_ACCEPT_BCAST)); @@ -2640,7 +2644,8 @@ static int qed_configure_filter_rx_mode(struct qed_dev *cdev, if (type == QED_FILTER_RX_MODE_TYPE_PROMISC) { accept_flags.rx_accept_filter |= QED_ACCEPT_UCAST_UNMATCHED | QED_ACCEPT_MCAST_UNMATCHED; - accept_flags.tx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; + accept_flags.tx_accept_filter |= QED_ACCEPT_UCAST_UNMATCHED | + QED_ACCEPT_MCAST_UNMATCHED; } else if (type == QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC) { accept_flags.rx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; accept_flags.tx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; From e66622771e8379e69693e041b6f088a39673ecf5 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 28 Jan 2019 10:05:05 -0800 Subject: [PATCH 2173/2608] qed: Fix LACP pdu drops for VFs [ Upstream commit ff9296966e5e00b0d0d00477b2365a178f0f06a3 ] VF is always configured to drop control frames (with reserved mac addresses) but to work LACP on the VFs, it would require LACP control frames to be forwarded or transmitted successfully. This patch fixes this in such a way that trusted VFs (marked through ndo_set_vf_trust) would be allowed to pass the control frames such as LACP pdus. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 5 +++++ drivers/net/ethernet/qlogic/qed/qed_l2.h | 3 +++ drivers/net/ethernet/qlogic/qed/qed_sriov.c | 10 ++++++++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 5191b575d57b..4ffdde755db7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -747,6 +747,11 @@ int qed_sp_vport_update(struct qed_hwfn *p_hwfn, return rc; } + if (p_params->update_ctl_frame_check) { + p_cmn->ctl_frame_mac_check_en = p_params->mac_chk_en; + p_cmn->ctl_frame_ethtype_check_en = p_params->ethtype_chk_en; + } + /* Update mcast bins for VFs, PF doesn't use this functionality */ qed_sp_update_mcast_bin(p_hwfn, p_ramrod, p_params); diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h index 91d383f3a661..7c41142452a3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h @@ -218,6 +218,9 @@ struct qed_sp_vport_update_params { struct qed_rss_params *rss_params; struct qed_filter_accept_flags accept_flags; struct qed_sge_tpa_params *sge_tpa_params; + u8 update_ctl_frame_check; + u8 mac_chk_en; + u8 ethtype_chk_en; }; int qed_sp_vport_update(struct qed_hwfn *p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index c6411158afd7..65a53d409e77 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -1963,7 +1963,9 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn, params.vport_id = vf->vport_id; params.max_buffers_per_cqe = start->max_buffers_per_cqe; params.mtu = vf->mtu; - params.check_mac = true; + + /* Non trusted VFs should enable control frame filtering */ + params.check_mac = !vf->p_vf_info.is_trusted_configured; rc = qed_sp_eth_vport_start(p_hwfn, ¶ms); if (rc) { @@ -4910,6 +4912,9 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) params.opaque_fid = vf->opaque_fid; params.vport_id = vf->vport_id; + params.update_ctl_frame_check = 1; + params.mac_chk_en = !vf_info->is_trusted_configured; + if (vf_info->rx_accept_mode & mask) { flags->update_rx_mode_config = 1; flags->rx_accept_filter = vf_info->rx_accept_mode; @@ -4927,7 +4932,8 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) } if (flags->update_rx_mode_config || - flags->update_tx_mode_config) + flags->update_tx_mode_config || + params.update_ctl_frame_check) qed_sp_vport_update(hwfn, ¶ms, QED_SPQ_MODE_EBLOCK, NULL); } From dd155c87ca08b17ffebce151efda853080d67f96 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 28 Jan 2019 10:05:06 -0800 Subject: [PATCH 2174/2608] qed: Fix VF probe failure while FLR [ Upstream commit 327852ec64205bb651be391a069784872098a3b2 ] VFs may hit VF-PF channel timeout while probing, as in some cases it was observed that VF FLR and VF "acquire" message transaction (i.e first message from VF to PF in VF's probe flow) could occur simultaneously which could lead VF to fail sending "acquire" message to PF as VF is marked disabled from HW perspective due to FLR, which will result into channel timeout and VF probe failure. In such cases, try retrying VF "acquire" message so that in later attempts it could be successful to pass message to PF after the VF FLR is completed and can be probed successfully. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_vf.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index dd8ebf6d380f..3220086f99de 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -261,6 +261,7 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) struct pfvf_acquire_resp_tlv *resp = &p_iov->pf2vf_reply->acquire_resp; struct pf_vf_pfdev_info *pfdev_info = &resp->pfdev_info; struct vf_pf_resc_request *p_resc; + u8 retry_cnt = VF_ACQUIRE_THRESH; bool resources_acquired = false; struct vfpf_acquire_tlv *req; int rc = 0, attempts = 0; @@ -314,6 +315,15 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) /* send acquire request */ rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); + + /* Re-try acquire in case of vf-pf hw channel timeout */ + if (retry_cnt && rc == -EBUSY) { + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "VF retrying to acquire due to VPC timeout\n"); + retry_cnt--; + continue; + } + if (rc) goto exit; From b757bcc92f8bfd24b239e2dc09b19cc6a1245f79 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 28 Jan 2019 10:05:07 -0800 Subject: [PATCH 2175/2608] qed: Fix system crash in ll2 xmit [ Upstream commit 7c81626a3c37e4ac320b8ad785694ba498f24794 ] Cache number of fragments in the skb locally as in case of linear skb (with zero fragments), tx completion (or freeing of skb) may happen before driver tries to get number of frgaments from the skb which could lead to stale access to an already freed skb. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index cef619f0ce10..e82adea55ce9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -2299,19 +2299,24 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb) { struct qed_ll2_tx_pkt_info pkt; const skb_frag_t *frag; + u8 flags = 0, nr_frags; int rc = -EINVAL, i; dma_addr_t mapping; u16 vlan = 0; - u8 flags = 0; if (unlikely(skb->ip_summed != CHECKSUM_NONE)) { DP_INFO(cdev, "Cannot transmit a checksumed packet\n"); return -EINVAL; } - if (1 + skb_shinfo(skb)->nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) { + /* Cache number of fragments from SKB since SKB may be freed by + * the completion routine after calling qed_ll2_prepare_tx_packet() + */ + nr_frags = skb_shinfo(skb)->nr_frags; + + if (1 + nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) { DP_ERR(cdev, "Cannot transmit a packet with %d fragments\n", - 1 + skb_shinfo(skb)->nr_frags); + 1 + nr_frags); return -EINVAL; } @@ -2333,7 +2338,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb) } memset(&pkt, 0, sizeof(pkt)); - pkt.num_of_bds = 1 + skb_shinfo(skb)->nr_frags; + pkt.num_of_bds = 1 + nr_frags; pkt.vlan = vlan; pkt.bd_flags = flags; pkt.tx_dest = QED_LL2_TX_DEST_NW; @@ -2341,12 +2346,17 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb) pkt.first_frag_len = skb->len; pkt.cookie = skb; + /* qed_ll2_prepare_tx_packet() may actually send the packet if + * there are no fragments in the skb and subsequently the completion + * routine may run and free the SKB, so no dereferencing the SKB + * beyond this point unless skb has any fragments. + */ rc = qed_ll2_prepare_tx_packet(&cdev->hwfns[0], cdev->ll2->handle, &pkt, 1); if (rc) goto err; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + for (i = 0; i < nr_frags; i++) { frag = &skb_shinfo(skb)->frags[i]; mapping = skb_frag_dma_map(&cdev->pdev->dev, frag, 0, From fa46a54c8c8d3103886354e2433e445196499bf3 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 28 Jan 2019 10:05:08 -0800 Subject: [PATCH 2176/2608] qed: Fix stack out of bounds bug [ Upstream commit ffb057f98928aa099b08e419bbe5afc26ec9f448 ] KASAN reported following bug in qed_init_qm_get_idx_from_flags due to inappropriate casting of "pq_flags". Fix the type of "pq_flags". [ 196.624707] BUG: KASAN: stack-out-of-bounds in qed_init_qm_get_idx_from_flags+0x1a4/0x1b8 [qed] [ 196.624712] Read of size 8 at addr ffff809b00bc7360 by task kworker/0:9/1712 [ 196.624714] [ 196.624720] CPU: 0 PID: 1712 Comm: kworker/0:9 Not tainted 4.18.0-60.el8.aarch64+debug #1 [ 196.624723] Hardware name: To be filled by O.E.M. Saber/Saber, BIOS 0ACKL024 09/26/2018 [ 196.624733] Workqueue: events work_for_cpu_fn [ 196.624738] Call trace: [ 196.624742] dump_backtrace+0x0/0x2f8 [ 196.624745] show_stack+0x24/0x30 [ 196.624749] dump_stack+0xe0/0x11c [ 196.624755] print_address_description+0x68/0x260 [ 196.624759] kasan_report+0x178/0x340 [ 196.624762] __asan_report_load_n_noabort+0x38/0x48 [ 196.624786] qed_init_qm_get_idx_from_flags+0x1a4/0x1b8 [qed] [ 196.624808] qed_init_qm_info+0xec0/0x2200 [qed] [ 196.624830] qed_resc_alloc+0x284/0x7e8 [qed] [ 196.624853] qed_slowpath_start+0x6cc/0x1ae8 [qed] [ 196.624864] __qede_probe.isra.10+0x1cc/0x12c0 [qede] [ 196.624874] qede_probe+0x78/0xf0 [qede] [ 196.624879] local_pci_probe+0xc4/0x180 [ 196.624882] work_for_cpu_fn+0x54/0x98 [ 196.624885] process_one_work+0x758/0x1900 [ 196.624888] worker_thread+0x4e0/0xd18 [ 196.624892] kthread+0x2c8/0x350 [ 196.624897] ret_from_fork+0x10/0x18 [ 196.624899] [ 196.624902] Allocated by task 2: [ 196.624906] kasan_kmalloc.part.1+0x40/0x108 [ 196.624909] kasan_kmalloc+0xb4/0xc8 [ 196.624913] kasan_slab_alloc+0x14/0x20 [ 196.624916] kmem_cache_alloc_node+0x1dc/0x480 [ 196.624921] copy_process.isra.1.part.2+0x1d8/0x4a98 [ 196.624924] _do_fork+0x150/0xfa0 [ 196.624926] kernel_thread+0x48/0x58 [ 196.624930] kthreadd+0x3a4/0x5a0 [ 196.624932] ret_from_fork+0x10/0x18 [ 196.624934] [ 196.624937] Freed by task 0: [ 196.624938] (stack is not available) [ 196.624940] [ 196.624943] The buggy address belongs to the object at ffff809b00bc0000 [ 196.624943] which belongs to the cache thread_stack of size 32768 [ 196.624946] The buggy address is located 29536 bytes inside of [ 196.624946] 32768-byte region [ffff809b00bc0000, ffff809b00bc8000) [ 196.624948] The buggy address belongs to the page: [ 196.624952] page:ffff7fe026c02e00 count:1 mapcount:0 mapping:ffff809b4001c000 index:0x0 compound_mapcount: 0 [ 196.624960] flags: 0xfffff8000008100(slab|head) [ 196.624967] raw: 0fffff8000008100 dead000000000100 dead000000000200 ffff809b4001c000 [ 196.624970] raw: 0000000000000000 0000000000080008 00000001ffffffff 0000000000000000 [ 196.624973] page dumped because: kasan: bad access detected [ 196.624974] [ 196.624976] Memory state around the buggy address: [ 196.624980] ffff809b00bc7200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 196.624983] ffff809b00bc7280: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 196.624985] >ffff809b00bc7300: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 04 f2 f2 f2 [ 196.624988] ^ [ 196.624990] ffff809b00bc7380: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 196.624993] ffff809b00bc7400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 196.624995] ================================================================== Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 16953c4ebd71..410528e7d927 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -435,19 +435,19 @@ static void qed_init_qm_pq(struct qed_hwfn *p_hwfn, /* get pq index according to PQ_FLAGS */ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn, - u32 pq_flags) + unsigned long pq_flags) { struct qed_qm_info *qm_info = &p_hwfn->qm_info; /* Can't have multiple flags set here */ - if (bitmap_weight((unsigned long *)&pq_flags, + if (bitmap_weight(&pq_flags, sizeof(pq_flags) * BITS_PER_BYTE) > 1) { - DP_ERR(p_hwfn, "requested multiple pq flags 0x%x\n", pq_flags); + DP_ERR(p_hwfn, "requested multiple pq flags 0x%lx\n", pq_flags); goto err; } if (!(qed_get_pq_flags(p_hwfn) & pq_flags)) { - DP_ERR(p_hwfn, "pq flag 0x%x is not set\n", pq_flags); + DP_ERR(p_hwfn, "pq flag 0x%lx is not set\n", pq_flags); goto err; } From 6487e6b6d494175e1c729d13dc27e23276d8bd2c Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Thu, 24 Jan 2019 13:25:42 +0800 Subject: [PATCH 2177/2608] scsi: libfc: free skb when receiving invalid flogi resp [ Upstream commit 5d8fc4a9f0eec20b6c07895022a6bea3fb6dfb38 ] The issue to be fixed in this commit is when libfc found it received a invalid FLOGI response from FC switch, it would return without freeing the fc frame, which is just the skb data. This would cause memory leak if FC switch keeps sending invalid FLOGI responses. This fix is just to make it execute `fc_frame_free(fp)` before returning from function `fc_lport_flogi_resp`. Signed-off-by: Ming Lu Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libfc/fc_lport.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 2fd0ec651170..ca7967e390f1 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -1739,14 +1739,14 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, fc_frame_payload_op(fp) != ELS_LS_ACC) { FC_LPORT_DBG(lport, "FLOGI not accepted or bad response\n"); fc_lport_error(lport, fp); - goto err; + goto out; } flp = fc_frame_payload_get(fp, sizeof(*flp)); if (!flp) { FC_LPORT_DBG(lport, "FLOGI bad response\n"); fc_lport_error(lport, fp); - goto err; + goto out; } mfs = ntohs(flp->fl_csp.sp_bb_data) & @@ -1756,7 +1756,7 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, FC_LPORT_DBG(lport, "FLOGI bad mfs:%hu response, " "lport->mfs:%hu\n", mfs, lport->mfs); fc_lport_error(lport, fp); - goto err; + goto out; } if (mfs <= lport->mfs) { From 84a78676bba9472d3e81fad5d74127c7ebb0c2ed Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 24 Jan 2019 13:33:27 +0300 Subject: [PATCH 2178/2608] scsi: 53c700: pass correct "dev" to dma_alloc_attrs() [ Upstream commit 8437fcf14deed67e5ad90b5e8abf62fb20f30881 ] The "hostdata->dev" pointer is NULL here. We set "hostdata->dev = dev;" later in the function and we also use "hostdata->dev" when we call dma_free_attrs() in NCR_700_release(). This bug predates git version control. Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/53c700.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index 6be77b3aa8a5..ac79f2088b31 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -295,7 +295,7 @@ NCR_700_detect(struct scsi_host_template *tpnt, if(tpnt->sdev_attrs == NULL) tpnt->sdev_attrs = NCR_700_dev_attrs; - memory = dma_alloc_attrs(hostdata->dev, TOTAL_MEM_SIZE, &pScript, + memory = dma_alloc_attrs(dev, TOTAL_MEM_SIZE, &pScript, GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); if(memory == NULL) { printk(KERN_ERR "53c700: Failed to allocate memory for driver, detaching\n"); From b9f6c4ea3b097078c2e285fb69c67eca9061e43e Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Thu, 24 Jan 2019 19:31:01 +0000 Subject: [PATCH 2179/2608] platform/x86: Fix unmet dependency warning for SAMSUNG_Q10 [ Upstream commit 0ee4b5f801b73b83a9fb3921d725f2162fd4a2e5 ] Add BACKLIGHT_LCD_SUPPORT for SAMSUNG_Q10 to fix the warning: unmet direct dependencies detected for BACKLIGHT_CLASS_DEVICE. SAMSUNG_Q10 selects BACKLIGHT_CLASS_DEVICE but BACKLIGHT_CLASS_DEVICE depends on BACKLIGHT_LCD_SUPPORT. Copy BACKLIGHT_LCD_SUPPORT dependency into SAMSUNG_Q10 to fix: WARNING: unmet direct dependencies detected for BACKLIGHT_CLASS_DEVICE Depends on [n]: HAS_IOMEM [=y] && BACKLIGHT_LCD_SUPPORT [=n] Selected by [y]: - SAMSUNG_Q10 [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y] && ACPI [=y] Signed-off-by: Sinan Kaya Acked-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 80b87954f6dd..09035705d0a0 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -999,6 +999,7 @@ config INTEL_OAKTRAIL config SAMSUNG_Q10 tristate "Samsung Q10 Extras" depends on ACPI + depends on BACKLIGHT_LCD_SUPPORT select BACKLIGHT_CLASS_DEVICE ---help--- This driver provides support for backlight control on Samsung Q10 From 3c7de41bd199883c3cc04e532827fc272e99c0b0 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 29 Jan 2019 12:46:16 +1000 Subject: [PATCH 2180/2608] cifs: fix computation for MAX_SMB2_HDR_SIZE [ Upstream commit 58d15ed1203f4d858c339ea4d7dafa94bd2a56d3 ] The size of the fixed part of the create response is 88 bytes not 56. Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Signed-off-by: Sasha Levin --- fs/cifs/smb2pdu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index e52454059725..bad458a2b579 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -84,8 +84,8 @@ #define NUMBER_OF_SMB2_COMMANDS 0x0013 -/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */ -#define MAX_SMB2_HDR_SIZE 0x00b0 +/* 52 transform hdr + 64 hdr + 88 create rsp */ +#define MAX_SMB2_HDR_SIZE 204 #define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) #define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd) From 2f3317ddcb55b59e66c1629f39063473949ebf99 Mon Sep 17 00:00:00 2001 From: Thomas Lendacky Date: Thu, 31 Jan 2019 14:33:06 +0000 Subject: [PATCH 2181/2608] x86/microcode/amd: Don't falsely trick the late loading mechanism [ Upstream commit 912139cfbfa6a2bc1da052314d2c29338dae1f6a ] The load_microcode_amd() function searches for microcode patches and attempts to apply a microcode patch if it is of different level than the currently installed level. While the processor won't actually load a level that is less than what is already installed, the logic wrongly returns UCODE_NEW thus signaling to its caller reload_store() that a late loading should be attempted. If the file-system contains an older microcode revision than what is currently running, such a late microcode reload can result in these misleading messages: x86/CPU: CPU features have changed after loading microcode, but might not take effect. x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update. These messages were issued on a system where SME/SEV are not enabled by the BIOS (MSR C001_0010[23] = 0b) because during boot, early_detect_mem_encrypt() is called and cleared the SME and SEV features in this case. However, after the wrong late load attempt, get_cpu_cap() is called and reloads the SME and SEV feature bits, resulting in the messages. Update the microcode level check to not attempt microcode loading if the current level is greater than(!) and not only equal to the current patch level. [ bp: massage commit message. ] Fixes: 2613f36ed965 ("x86/microcode: Attempt late loading only when new microcode is present") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/154894518427.9406.8246222496874202773.stgit@tlendack-t1.amdoffice.net Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/microcode/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 9d33dbf2489e..d0a61d3e2fb9 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -707,7 +707,7 @@ load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) if (!p) { return ret; } else { - if (boot_cpu_data.microcode == p->patch_id) + if (boot_cpu_data.microcode >= p->patch_id) return ret; ret = UCODE_NEW; From c72b4dcd26795d6941db0cedd2c2212bb9b32d2f Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 24 Jan 2019 16:32:55 +0000 Subject: [PATCH 2182/2608] arm64: kprobe: Always blacklist the KVM world-switch code [ Upstream commit f2b3d8566d81deaca31f4e3163def0bea7746e11 ] On systems with VHE the kernel and KVM's world-switch code run at the same exception level. Code that is only used on a VHE system does not need to be annotated as __hyp_text as it can reside anywhere in the kernel text. __hyp_text was also used to prevent kprobes from patching breakpoint instructions into this region, as this code runs at a different exception level. While this is no longer true with VHE, KVM still switches VBAR_EL1, meaning a kprobe's breakpoint executed in the world-switch code will cause a hyp-panic. Move the __hyp_text check in the kprobes blacklist so it applies on VHE systems too, to cover the common code and guest enter/exit assembly. Fixes: 888b3c8720e0 ("arm64: Treat all entry code as non-kprobe-able") Reviewed-by: Christoffer Dall Signed-off-by: James Morse Acked-by: Masami Hiramatsu Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/probes/kprobes.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 0417c929d21a..7d8c33279e9f 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -554,13 +554,13 @@ bool arch_within_kprobe_blacklist(unsigned long addr) addr < (unsigned long)__entry_text_end) || (addr >= (unsigned long)__idmap_text_start && addr < (unsigned long)__idmap_text_end) || + (addr >= (unsigned long)__hyp_text_start && + addr < (unsigned long)__hyp_text_end) || !!search_exception_tables(addr)) return true; if (!is_kernel_in_hyp_mode()) { - if ((addr >= (unsigned long)__hyp_text_start && - addr < (unsigned long)__hyp_text_end) || - (addr >= (unsigned long)__hyp_idmap_text_start && + if ((addr >= (unsigned long)__hyp_idmap_text_start && addr < (unsigned long)__hyp_idmap_text_end)) return true; } From 865c798a37bfacafd35e6c0f2d5d5e5bb572a07d Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 24 Jan 2019 13:53:05 -0800 Subject: [PATCH 2183/2608] apparmor: Fix aa_label_build() error handling for failed merges [ Upstream commit d6d478aee003e19ef90321176552a8ad2929a47f ] aa_label_merge() can return NULL for memory allocations failures make sure to handle and set the correct error in this case. Reported-by: Peng Hao Signed-off-by: John Johansen Signed-off-by: Sasha Levin --- security/apparmor/domain.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index dd754b7850a8..67bf8b7ee8a2 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -1260,7 +1260,10 @@ check: aa_get_label(&profile->label)); if (IS_ERR_OR_NULL(new)) { info = "failed to build target label"; - error = PTR_ERR(new); + if (!new) + error = -ENOMEM; + else + error = PTR_ERR(new); new = NULL; perms.allow = 0; goto audit; From 21b202b7ca599edb5fb39afcfcffae6e6dbb6ec3 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Fri, 18 Jan 2019 19:13:08 +0800 Subject: [PATCH 2184/2608] x86/kexec: Don't setup EFI info if EFI runtime is not enabled [ Upstream commit 2aa958c99c7fd3162b089a1a56a34a0cdb778de1 ] Kexec-ing a kernel with "efi=noruntime" on the first kernel's command line causes the following null pointer dereference: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] Call Trace: efi_runtime_map_copy+0x28/0x30 bzImage64_load+0x688/0x872 arch_kexec_kernel_image_load+0x6d/0x70 kimage_file_alloc_init+0x13e/0x220 __x64_sys_kexec_file_load+0x144/0x290 do_syscall_64+0x55/0x1a0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Just skip the EFI info setup if EFI runtime services are not enabled. [ bp: Massage commit message. ] Suggested-by: Dave Young Signed-off-by: Kairui Song Signed-off-by: Borislav Petkov Acked-by: Dave Young Cc: AKASHI Takahiro Cc: Andrew Morton Cc: Ard Biesheuvel Cc: bhe@redhat.com Cc: David Howells Cc: erik.schmauss@intel.com Cc: fanc.fnst@cn.fujitsu.com Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: kexec@lists.infradead.org Cc: lenb@kernel.org Cc: linux-acpi@vger.kernel.org Cc: Philipp Rudo Cc: rafael.j.wysocki@intel.com Cc: robert.moore@intel.com Cc: Thomas Gleixner Cc: x86-ml Cc: Yannik Sembritzki Link: https://lkml.kernel.org/r/20190118111310.29589-2-kasong@redhat.com Signed-off-by: Sasha Levin --- arch/x86/kernel/kexec-bzimage64.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 928b0c6083c9..4d948d87f01c 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -167,6 +167,9 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr, struct efi_info *current_ei = &boot_params.efi_info; struct efi_info *ei = ¶ms->efi_info; + if (!efi_enabled(EFI_RUNTIME_SERVICES)) + return 0; + if (!current_ei->efi_memmap_size) return 0; From 6b6cd12509424e4e6c25569f55482a83e6cf0b5b Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 1 Feb 2019 14:20:20 -0800 Subject: [PATCH 2185/2608] x86_64: increase stack size for KASAN_EXTRA [ Upstream commit a8e911d13540487942d53137c156bd7707f66e5d ] If the kernel is configured with KASAN_EXTRA, the stack size is increasted significantly because this option sets "-fstack-reuse" to "none" in GCC [1]. As a result, it triggers stack overrun quite often with 32k stack size compiled using GCC 8. For example, this reproducer https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/madvise/madvise06.c triggers a "corrupted stack end detected inside scheduler" very reliably with CONFIG_SCHED_STACK_END_CHECK enabled. There are just too many functions that could have a large stack with KASAN_EXTRA due to large local variables that have been called over and over again without being able to reuse the stacks. Some noticiable ones are size 7648 shrink_page_list 3584 xfs_rmap_convert 3312 migrate_page_move_mapping 3312 dev_ethtool 3200 migrate_misplaced_transhuge_page 3168 copy_process There are other 49 functions are over 2k in size while compiling kernel with "-Wframe-larger-than=" even with a related minimal config on this machine. Hence, it is too much work to change Makefiles for each object to compile without "-fsanitize-address-use-after-scope" individually. [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715#c23 Although there is a patch in GCC 9 to help the situation, GCC 9 probably won't be released in a few months and then it probably take another 6-month to 1-year for all major distros to include it as a default. Hence, the stack usage with KASAN_EXTRA can be revisited again in 2020 when GCC 9 is everywhere. Until then, this patch will help users avoid stack overrun. This has already been fixed for arm64 for the same reason via 6e8830674ea ("arm64: kasan: Increase stack size for KASAN_EXTRA"). Link: http://lkml.kernel.org/r/20190109215209.2903-1-cai@lca.pw Signed-off-by: Qian Cai Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/x86/include/asm/page_64_types.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 74d531f6d518..50c8baaca4b0 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -7,7 +7,11 @@ #endif #ifdef CONFIG_KASAN +#ifdef CONFIG_KASAN_EXTRA +#define KASAN_STACK_ORDER 2 +#else #define KASAN_STACK_ORDER 1 +#endif #else #define KASAN_STACK_ORDER 0 #endif From 659052d1f16e0c3af60a371fa0147c007eba5a97 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 1 Feb 2019 14:20:34 -0800 Subject: [PATCH 2186/2608] mm, memory_hotplug: is_mem_section_removable do not pass the end of a zone [ Upstream commit efad4e475c312456edb3c789d0996d12ed744c13 ] Patch series "mm, memory_hotplug: fix uninitialized pages fallouts", v2. Mikhail Zaslonko has posted fixes for the two bugs quite some time ago [1]. I have pushed back on those fixes because I believed that it is much better to plug the problem at the initialization time rather than play whack-a-mole all over the hotplug code and find all the places which expect the full memory section to be initialized. We have ended up with commit 2830bf6f05fb ("mm, memory_hotplug: initialize struct pages for the full memory section") merged and cause a regression [2][3]. The reason is that there might be memory layouts when two NUMA nodes share the same memory section so the merged fix is simply incorrect. In order to plug this hole we really have to be zone range aware in those handlers. I have split up the original patch into two. One is unchanged (patch 2) and I took a different approach for `removable' crash. [1] http://lkml.kernel.org/r/20181105150401.97287-2-zaslonko@linux.ibm.com [2] https://bugzilla.redhat.com/show_bug.cgi?id=1666948 [3] http://lkml.kernel.org/r/20190125163938.GA20411@dhcp22.suse.cz This patch (of 2): Mikhail has reported the following VM_BUG_ON triggered when reading sysfs removable state of a memory block: page:000003d08300c000 is uninitialized and poisoned page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) Call Trace: is_mem_section_removable+0xb4/0x190 show_mem_removable+0x9a/0xd8 dev_attr_show+0x34/0x70 sysfs_kf_seq_show+0xc8/0x148 seq_read+0x204/0x480 __vfs_read+0x32/0x178 vfs_read+0x82/0x138 ksys_read+0x5a/0xb0 system_call+0xdc/0x2d8 Last Breaking-Event-Address: is_mem_section_removable+0xb4/0x190 Kernel panic - not syncing: Fatal exception: panic_on_oops The reason is that the memory block spans the zone boundary and we are stumbling over an unitialized struct page. Fix this by enforcing zone range in is_mem_section_removable so that we never run away from a zone. Link: http://lkml.kernel.org/r/20190128144506.15603-2-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Mikhail Zaslonko Debugged-by: Mikhail Zaslonko Tested-by: Gerald Schaefer Tested-by: Mikhail Gavrilov Reviewed-by: Oscar Salvador Cc: Pavel Tatashin Cc: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/memory_hotplug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c7c74a927d6f..39db89f3df65 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1256,7 +1256,8 @@ static struct page *next_active_pageblock(struct page *page) bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) { struct page *page = pfn_to_page(start_pfn); - struct page *end_page = page + nr_pages; + unsigned long end_pfn = min(start_pfn + nr_pages, zone_end_pfn(page_zone(page))); + struct page *end_page = pfn_to_page(end_pfn); /* Check the starting page of each pageblock within the range */ for (; page < end_page; page = next_active_pageblock(page)) { From d83382c6c3c751867dbbadd0d4a2740ba028dd85 Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Fri, 1 Feb 2019 14:20:38 -0800 Subject: [PATCH 2187/2608] mm, memory_hotplug: test_pages_in_a_zone do not pass the end of zone [ Upstream commit 24feb47c5fa5b825efb0151f28906dfdad027e61 ] If memory end is not aligned with the sparse memory section boundary, the mapping of such a section is only partly initialized. This may lead to VM_BUG_ON due to uninitialized struct pages access from test_pages_in_a_zone() function triggered by memory_hotplug sysfs handlers. Here are the the panic examples: CONFIG_DEBUG_VM_PGFLAGS=y kernel parameter mem=2050M -------------------------- page:000003d082008000 is uninitialized and poisoned page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) Call Trace: test_pages_in_a_zone+0xde/0x160 show_valid_zones+0x5c/0x190 dev_attr_show+0x34/0x70 sysfs_kf_seq_show+0xc8/0x148 seq_read+0x204/0x480 __vfs_read+0x32/0x178 vfs_read+0x82/0x138 ksys_read+0x5a/0xb0 system_call+0xdc/0x2d8 Last Breaking-Event-Address: test_pages_in_a_zone+0xde/0x160 Kernel panic - not syncing: Fatal exception: panic_on_oops Fix this by checking whether the pfn to check is within the zone. [mhocko@suse.com: separated this change from http://lkml.kernel.org/r/20181105150401.97287-2-zaslonko@linux.ibm.com] Link: http://lkml.kernel.org/r/20190128144506.15603-3-mhocko@kernel.org [mhocko@suse.com: separated this change from http://lkml.kernel.org/r/20181105150401.97287-2-zaslonko@linux.ibm.com] Signed-off-by: Michal Hocko Signed-off-by: Mikhail Zaslonko Tested-by: Mikhail Gavrilov Reviewed-by: Oscar Salvador Tested-by: Gerald Schaefer Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Mikhail Gavrilov Cc: Pavel Tatashin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/memory_hotplug.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 39db89f3df65..c9d3a49bd4e2 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1297,6 +1297,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, i++; if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn) continue; + /* Check if we got outside of the zone */ + if (zone && !zone_spans_pfn(zone, pfn + i)) + return 0; page = pfn_to_page(pfn + i); if (zone && page_zone(page) != zone) return 0; From 8d5b25551442ea58b129fb9a0a1c439894ceec0f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 1 Feb 2019 14:20:58 -0800 Subject: [PATCH 2188/2608] lib/test_kmod.c: potential double free in error handling [ Upstream commit db7ddeab3ce5d64c9696e70d61f45ea9909cd196 ] There is a copy and paste bug so we set "config->test_driver" to NULL twice instead of setting "config->test_fs". Smatch complains that it leads to a double free: lib/test_kmod.c:840 __kmod_config_init() warn: 'config->test_fs' double freed Link: http://lkml.kernel.org/r/20190121140011.GA14283@kadam Fixes: d9c6a72d6fa2 ("kmod: add test driver to stress test the module loader") Signed-off-by: Dan Carpenter Acked-by: Luis Chamberlain Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/test_kmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_kmod.c b/lib/test_kmod.c index 7abb59ce6613..cf619795a182 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -632,7 +632,7 @@ static void __kmod_config_free(struct test_config *config) config->test_driver = NULL; kfree_const(config->test_fs); - config->test_driver = NULL; + config->test_fs = NULL; } static void kmod_config_free(struct kmod_test_device *test_dev) From 9c72acbe4940e1a542336daf9be2ca6493788e67 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 1 Feb 2019 14:21:23 -0800 Subject: [PATCH 2189/2608] fs/drop_caches.c: avoid softlockups in drop_pagecache_sb() [ Upstream commit c27d82f52f75fc9d8d9d40d120d2a96fdeeada5e ] When superblock has lots of inodes without any pagecache (like is the case for /proc), drop_pagecache_sb() will iterate through all of them without dropping sb->s_inode_list_lock which can lead to softlockups (one of our customers hit this). Fix the problem by going to the slow path and doing cond_resched() in case the process needs rescheduling. Link: http://lkml.kernel.org/r/20190114085343.15011-1-jack@suse.cz Signed-off-by: Jan Kara Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/drop_caches.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 82377017130f..d31b6c72b476 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -21,8 +21,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { spin_lock(&inode->i_lock); + /* + * We must skip inodes in unusual state. We may also skip + * inodes without pages but we deliberately won't in case + * we need to reschedule to avoid softlockups. + */ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || - (inode->i_mapping->nrpages == 0)) { + (inode->i_mapping->nrpages == 0 && !need_resched())) { spin_unlock(&inode->i_lock); continue; } @@ -30,6 +35,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); + cond_resched(); invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); toput_inode = inode; From 327e34dfa9945f61db8f340e5a67907df1770c0d Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Fri, 1 Feb 2019 14:21:26 -0800 Subject: [PATCH 2190/2608] autofs: drop dentry reference only when it is never used [ Upstream commit 63ce5f552beb9bdb41546b3a26c4374758b21815 ] autofs_expire_run() calls dput(dentry) to drop the reference count of dentry. However, dentry is read via autofs_dentry_ino(dentry) after that. This may result in a use-free-bug. The patch drops the reference count of dentry only when it is never used. Link: http://lkml.kernel.org/r/154725122396.11260.16053424107144453867.stgit@pluto-themaw-net Signed-off-by: Pan Bian Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/autofs4/expire.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 57725d4a8c59..141f9bc213a3 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -567,7 +567,6 @@ int autofs4_expire_run(struct super_block *sb, pkt.len = dentry->d_name.len; memcpy(pkt.name, dentry->d_name.name, pkt.len); pkt.name[pkt.len] = '\0'; - dput(dentry); if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire))) ret = -EFAULT; @@ -580,6 +579,8 @@ int autofs4_expire_run(struct super_block *sb, complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); + dput(dentry); + return ret; } From 70e6e8e8ee2ac2846af14676d3cffaa78e40631a Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Fri, 1 Feb 2019 14:21:29 -0800 Subject: [PATCH 2191/2608] autofs: fix error return in autofs_fill_super() [ Upstream commit f585b283e3f025754c45bbe7533fc6e5c4643700 ] In autofs_fill_super() on error of get inode/make root dentry the return should be ENOMEM as this is the only failure case of the called functions. Link: http://lkml.kernel.org/r/154725123240.11260.796773942606871359.stgit@pluto-themaw-net Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/autofs4/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 3c7e727612fa..e455388a939c 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -259,8 +259,10 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) } root_inode = autofs4_get_inode(s, S_IFDIR | 0755); root = d_make_root(root_inode); - if (!root) + if (!root) { + ret = -ENOMEM; goto fail_ino; + } pipe = NULL; root->d_fsdata = ino; From 9cb9f1d9d9f76860e8d570f5e8527ccc6920fa7d Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 7 Jan 2019 09:52:43 -0800 Subject: [PATCH 2192/2608] ARM: dts: omap4-droid4: Fix typo in cpcap IRQ flags [ Upstream commit ef4a55b9197a8f844ea0663138e902dcce3e2f36 ] We're now getting the following error: genirq: Setting trigger mode 1 for irq 230 failed (regmap_irq_set_type+0x0/0x15c) cpcap-usb-phy cpcap-usb-phy.0: could not get irq dp: -524 Cc: Sebastian Reichel Reported-by: Pavel Machek Tested-by: Pavel Machek Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi index 4d61e5b1334a..bcced922b280 100644 --- a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi +++ b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi @@ -92,7 +92,7 @@ interrupts-extended = < &cpcap 15 0 &cpcap 14 0 &cpcap 28 0 &cpcap 19 0 &cpcap 18 0 &cpcap 17 0 &cpcap 16 0 &cpcap 49 0 - &cpcap 48 1 + &cpcap 48 0 >; interrupt-names = "id_ground", "id_float", "se0conn", "vbusvld", From 703db5d1b1759fd2aec0ce3c3b6f6c4262a1433d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 10 Jan 2019 14:39:15 +0100 Subject: [PATCH 2193/2608] arm64: dts: renesas: r8a7796: Enable DMA for SCIF2 [ Upstream commit 97f26702bc95b5c3a72671d5c6675e4d6ee0a2f4 ] SCIF2 on R-Car M3-W can be used with both DMAC1 and DMAC2. Fixes: dbcae5ea4bd27409 ("arm64: dts: r8a7796: Enable SCIF DMA") Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r8a7796.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/renesas/r8a7796.dtsi b/arch/arm64/boot/dts/renesas/r8a7796.dtsi index 369092e17e34..016b84552a62 100644 --- a/arch/arm64/boot/dts/renesas/r8a7796.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a7796.dtsi @@ -937,6 +937,9 @@ <&cpg CPG_CORE R8A7796_CLK_S3D1>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; + dmas = <&dmac1 0x13>, <&dmac1 0x12>, + <&dmac2 0x13>, <&dmac2 0x12>; + dma-names = "tx", "rx", "tx", "rx"; power-domains = <&sysc R8A7796_PD_ALWAYS_ON>; resets = <&cpg 310>; status = "disabled"; From 27a0e499a313796fbc56b0ba7d2d2171cc3d4ac3 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Fri, 21 Dec 2018 16:41:42 +0200 Subject: [PATCH 2194/2608] soc: fsl: qbman: avoid race in clearing QMan interrupt [ Upstream commit 89857a8a5c89a406b967ab2be7bd2ccdbe75e73d ] By clearing all interrupt sources, not only those that already occurred, the existing code may acknowledge by mistake interrupts that occurred after the code checks for them. Signed-off-by: Madalin Bucur Signed-off-by: Roy Pledge Signed-off-by: Li Yang Signed-off-by: Sasha Levin --- drivers/soc/fsl/qbman/qman.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 4f27e95efcdd..90892a360c61 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -1048,18 +1048,19 @@ static void qm_mr_process_task(struct work_struct *work); static irqreturn_t portal_isr(int irq, void *ptr) { struct qman_portal *p = ptr; - - u32 clear = QM_DQAVAIL_MASK | p->irq_sources; u32 is = qm_in(&p->p, QM_REG_ISR) & p->irq_sources; + u32 clear = 0; if (unlikely(!is)) return IRQ_NONE; /* DQRR-handling if it's interrupt-driven */ - if (is & QM_PIRQ_DQRI) + if (is & QM_PIRQ_DQRI) { __poll_portal_fast(p, QMAN_POLL_LIMIT); + clear = QM_DQAVAIL_MASK | QM_PIRQ_DQRI; + } /* Handling of anything else that's interrupt-driven */ - clear |= __poll_portal_slow(p, is); + clear |= __poll_portal_slow(p, is) & QM_PIRQ_SLOW; qm_out(&p->p, QM_REG_ISR, clear); return IRQ_HANDLED; } From d74f30ef8972099e2bcc1c8a212f9418c6eebd7c Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 23 Jan 2019 12:37:19 +0800 Subject: [PATCH 2195/2608] bpf: sock recvbuff must be limited by rmem_max in bpf_setsockopt() [ Upstream commit c9e4576743eeda8d24dedc164d65b78877f9a98c ] When sock recvbuff is set by bpf_setsockopt(), the value must by limited by rmem_max. It is the same with sendbuff. Fixes: 8c4b4c7e9ff0 ("bpf: Add setsockopt helper function to bpf") Signed-off-by: Yafang Shao Acked-by: Martin KaFai Lau Acked-by: Lawrence Brakmo Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index 41ede90fc28f..61396648381e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3081,10 +3081,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, /* Only some socketops are supported */ switch (optname) { case SO_RCVBUF: + val = min_t(u32, val, sysctl_rmem_max); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); break; case SO_SNDBUF: + val = min_t(u32, val, sysctl_wmem_max); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); break; From 7031b8ca7fde45b8d7d53d12b9454c8dd2d795ed Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Sat, 29 Dec 2018 13:10:06 +0800 Subject: [PATCH 2196/2608] ARM: pxa: ssp: unneeded to free devm_ allocated data [ Upstream commit ba16adeb346387eb2d1ada69003588be96f098fa ] devm_ allocated data will be automatically freed. The free of devm_ allocated data is invalid. Fixes: 1c459de1e645 ("ARM: pxa: ssp: use devm_ functions") Signed-off-by: Peng Hao [title's prefix changed] Signed-off-by: Robert Jarzmik Signed-off-by: Sasha Levin --- arch/arm/plat-pxa/ssp.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c index ba13f793fbce..b92673efffff 100644 --- a/arch/arm/plat-pxa/ssp.c +++ b/arch/arm/plat-pxa/ssp.c @@ -237,8 +237,6 @@ static int pxa_ssp_remove(struct platform_device *pdev) if (ssp == NULL) return -ENODEV; - iounmap(ssp->mmio_base); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(res->start, resource_size(res)); @@ -248,7 +246,6 @@ static int pxa_ssp_remove(struct platform_device *pdev) list_del(&ssp->node); mutex_unlock(&ssp_lock); - kfree(ssp); return 0; } From 5a6e25e1e9b84b790ec013c13c56e8de0d1e0843 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 10 Dec 2018 13:56:33 +0000 Subject: [PATCH 2197/2608] arm64: dts: add msm8996 compatible to gicv3 [ Upstream commit 2a81efb0de0e33f2d2c83154af0bd3ce389b3269 ] Add compatible to gicv3 node to enable quirk required to restrict writing to GICR_WAKER register which is restricted on msm8996 SoC in Hypervisor. With this quirk MSM8996 can at least boot out of mainline, which can help community to work with boards based on MSM8996. Without this patch Qualcomm DB820c board reboots on mainline. Signed-off-by: Srinivas Kandagatla Signed-off-by: Andy Gross Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8996.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index ab00be277c6f..6f372ec055dd 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -359,7 +359,7 @@ }; intc: interrupt-controller@9bc0000 { - compatible = "arm,gic-v3"; + compatible = "qcom,msm8996-gic-v3", "arm,gic-v3"; #interrupt-cells = <3>; interrupt-controller; #redistributor-regions = <1>; From f01ebdb120b292335ffdd51be04e630fae5dfdfb Mon Sep 17 00:00:00 2001 From: Zhou Yanjie Date: Fri, 25 Jan 2019 02:22:15 +0800 Subject: [PATCH 2198/2608] DTS: CI20: Fix bugs in ci20's device tree. [ Upstream commit 1ca1c87f91d9dc50d6a38e2177b2032996e7901c ] According to the Schematic, the hardware of ci20 leads to uart3, but not to uart2. Uart2 is miswritten in the original code. Signed-off-by: Zhou Yanjie Signed-off-by: Paul Burton Cc: linux-mips Cc: linux-kernel Cc: devicetree@vger.kernel.org Cc: robh+dt@kernel.org Cc: ralf@linux-mips.org Cc: jhogan@kernel.org Cc: mark.rutland@arm.com Cc: malat@debian.org Cc: ezequiel@collabora.co.uk Cc: ulf.hansson@linaro.org Cc: syq Cc: jiaxun.yang Signed-off-by: Sasha Levin --- arch/mips/boot/dts/ingenic/ci20.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts index a4cc52214dbd..dad4aa0ebdd8 100644 --- a/arch/mips/boot/dts/ingenic/ci20.dts +++ b/arch/mips/boot/dts/ingenic/ci20.dts @@ -54,7 +54,7 @@ status = "okay"; pinctrl-names = "default"; - pinctrl-0 = <&pins_uart2>; + pinctrl-0 = <&pins_uart3>; }; &uart4 { @@ -174,9 +174,9 @@ bias-disable; }; - pins_uart2: uart2 { - function = "uart2"; - groups = "uart2-data", "uart2-hwflow"; + pins_uart3: uart3 { + function = "uart3"; + groups = "uart3-data", "uart3-hwflow"; bias-disable; }; From ecfcdee1e9d57fd0d3e38e9491510307358dc9d3 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 22 Jan 2019 11:36:02 +0100 Subject: [PATCH 2199/2608] usb: phy: fix link errors [ Upstream commit f2105d42597f4d10e431b195d69e96dccaf9b012 ] Fix link errors when CONFIG_FSL_USB2_OTG is enabled and USB_OTG_FSM is set to module then the following link error occurs. aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_ioctl': drivers/usb/phy/phy-fsl-usb.c:1083: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:1083:(.text+0x574): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_start_srp': drivers/usb/phy/phy-fsl-usb.c:674: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:674:(.text+0x61c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_set_host': drivers/usb/phy/phy-fsl-usb.c:593: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:593:(.text+0x7a4): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_start_hnp': drivers/usb/phy/phy-fsl-usb.c:695: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:695:(.text+0x858): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `a_wait_enum': drivers/usb/phy/phy-fsl-usb.c:274: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:274:(.text+0x16f0): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o:drivers/usb/phy/phy-fsl-usb.c:619: more undefined references to `otg_statemachine' follow aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_set_peripheral': drivers/usb/phy/phy-fsl-usb.c:619:(.text+0x1fa0): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' make[1]: *** [Makefile:1020: vmlinux] Error 1 make[1]: Target 'Image' not remade because of errors. make: *** [Makefile:152: sub-make] Error 2 make: Target 'Image' not remade because of errors. Rework so that FSL_USB2_OTG depends on that the USB_OTG_FSM is builtin. Signed-off-by: Anders Roxell Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/phy/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index aff702c0eb9f..85a92d0813dd 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -21,7 +21,7 @@ config AB8500_USB config FSL_USB2_OTG bool "Freescale USB OTG Transceiver Driver" - depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM && PM + depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM=y && PM depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y' select USB_PHY help From 18579f6965137f4c134e22b8dd906eba8a712dfc Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 28 Jan 2019 16:59:35 +0100 Subject: [PATCH 2200/2608] irqchip/mmp: Only touch the PJ4 IRQ & FIQ bits on enable/disable [ Upstream commit 2380a22b60ce6f995eac806e69c66e397b59d045 ] Resetting bit 4 disables the interrupt delivery to the "secure processor" core. This breaks the keyboard on a OLPC XO 1.75 laptop, where the firmware running on the "secure processor" bit-bangs the PS/2 protocol over the GPIO lines. It is not clear what the rest of the bits are and Marvell was unhelpful when asked for documentation. Aside from the SP bit, there are probably priority bits. Leaving the unknown bits as the firmware set them up seems to be a wiser course of action compared to just turning them off. Signed-off-by: Lubomir Rintel Acked-by: Pavel Machek [maz: fixed-up subject and commit message] Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- drivers/irqchip/irq-mmp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c index 25f32e1d7764..3496b61a312a 100644 --- a/drivers/irqchip/irq-mmp.c +++ b/drivers/irqchip/irq-mmp.c @@ -34,6 +34,9 @@ #define SEL_INT_PENDING (1 << 6) #define SEL_INT_NUM_MASK 0x3f +#define MMP2_ICU_INT_ROUTE_PJ4_IRQ (1 << 5) +#define MMP2_ICU_INT_ROUTE_PJ4_FIQ (1 << 6) + struct icu_chip_data { int nr_irqs; unsigned int virq_base; @@ -190,7 +193,8 @@ static const struct mmp_intc_conf mmp_conf = { static const struct mmp_intc_conf mmp2_conf = { .conf_enable = 0x20, .conf_disable = 0x0, - .conf_mask = 0x7f, + .conf_mask = MMP2_ICU_INT_ROUTE_PJ4_IRQ | + MMP2_ICU_INT_ROUTE_PJ4_FIQ, }; static void __exception_irq_entry mmp_handle_irq(struct pt_regs *regs) From 13e429213f160a92780d9144bfdfe286e9ff6a84 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 30 Jan 2019 15:54:19 +0100 Subject: [PATCH 2201/2608] net: stmmac: Fallback to Platform Data clock in Watchdog conversion [ Upstream commit 4ec5302fa906ec9d86597b236f62315bacdb9622 ] If we don't have DT then stmmac_clk will not be available. Let's add a new Platform Data field so that we can specify the refclk by this mean. This way we can still use the coalesce command in PCI based setups. Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 14 ++++++++++---- include/linux/stmmac.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index c3c6335cbe9a..ecddd9948788 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -702,8 +702,11 @@ static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv) { unsigned long clk = clk_get_rate(priv->plat->stmmac_clk); - if (!clk) - return 0; + if (!clk) { + clk = priv->plat->clk_ref_rate; + if (!clk) + return 0; + } return (usec * (clk / 1000000)) / 256; } @@ -712,8 +715,11 @@ static u32 stmmac_riwt2usec(u32 riwt, struct stmmac_priv *priv) { unsigned long clk = clk_get_rate(priv->plat->stmmac_clk); - if (!clk) - return 0; + if (!clk) { + clk = priv->plat->clk_ref_rate; + if (!clk) + return 0; + } return (riwt * 256) / (clk / 1000000); } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 32feac5bbd75..5844105a482b 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -183,6 +183,7 @@ struct plat_stmmacenet_data { struct clk *pclk; struct clk *clk_ptp_ref; unsigned int clk_ptp_rate; + unsigned int clk_ref_rate; struct reset_control *stmmac_rst; struct stmmac_axi *axi; int has_gmac4; From 405bff8e8799005ac56c901c78f222e3c7e0375c Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 30 Jan 2019 15:54:20 +0100 Subject: [PATCH 2202/2608] net: stmmac: Send TSO packets always from Queue 0 [ Upstream commit c5acdbee22a1b200dde07effd26fd1f649e9ab8a ] The number of TSO enabled channels in HW can be different than the number of total channels. There is no way to determined, at runtime, the number of TSO capable channels and its safe to assume that if TSO is enabled then at least channel 0 will be TSO capable. Lets always send TSO packets from Queue 0. Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 0e66a5082140..b1454c63e675 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3019,8 +3019,17 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) /* Manage oversized TCP frames for GMAC4 device */ if (skb_is_gso(skb) && priv->tso) { - if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) + if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { + /* + * There is no way to determine the number of TSO + * capable Queues. Let's use always the Queue 0 + * because if TSO is supported then at least this + * one will be capable. + */ + skb_set_queue_mapping(skb, 0); + return stmmac_tso_xmit(skb, dev); + } } if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) { From a63215cb5311d48bf7c5d9c26b6f4e08f33ac74e Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 30 Jan 2019 15:54:21 +0100 Subject: [PATCH 2203/2608] net: stmmac: Disable EEE mode earlier in XMIT callback [ Upstream commit e2cd682deb231ba6f80524bb84e57e7138261149 ] In stmmac xmit callback we use a different flow for TSO packets but TSO xmit callback is not disabling the EEE mode. Fix this by disabling earlier the EEE mode, i.e. before calling the TSO xmit callback. Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b1454c63e675..0cc83e8417ef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3017,6 +3017,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) tx_q = &priv->tx_queue[queue]; + if (priv->tx_path_in_lpi_mode) + stmmac_disable_eee_mode(priv); + /* Manage oversized TCP frames for GMAC4 device */ if (skb_is_gso(skb) && priv->tso) { if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { @@ -3044,9 +3047,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; } - if (priv->tx_path_in_lpi_mode) - stmmac_disable_eee_mode(priv); - entry = tx_q->cur_tx; first_entry = entry; From 8939e8cdd004ba5d7dd3bbf9c105990f9ebe96f8 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Thu, 31 Jan 2019 11:19:43 +0000 Subject: [PATCH 2204/2608] irqchip/gic-v3-its: Fix ITT_entry_size accessor [ Upstream commit 56841070ccc87b463ac037d2d1f2beb8e5e35f0c ] According to ARM IHI 0069C (ID070116), we should use GITS_TYPER's bits [7:4] as ITT_entry_size instead of [8:4]. Although this is pretty annoying, it only results in a potential over-allocation of memory, and nothing bad happens. Fixes: 3dfa576bfb45 ("irqchip/gic-v3-its: Add probing for VLPI properties") Signed-off-by: Zenghui Yu [maz: massaged subject and commit message] Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index bacb499c512c..845ff8c51564 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -306,7 +306,7 @@ #define GITS_TYPER_PLPIS (1UL << 0) #define GITS_TYPER_VLPIS (1UL << 1) #define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 -#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0x1f) + 1) +#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0xf) + 1) #define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) From 167a0989ec15a0b738dc8fd25922b1fae00c0dba Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 31 Jan 2019 13:57:58 +0100 Subject: [PATCH 2205/2608] relay: check return of create_buf_file() properly [ Upstream commit 2c1cf00eeacb784781cf1c9896b8af001246d339 ] If create_buf_file() returns an error, don't try to reference it later as a valid dentry pointer. This problem was exposed when debugfs started to return errors instead of just NULL for some calls when they do not succeed properly. Also, the check for WARN_ON(dentry) was just wrong :) Reported-by: Kees Cook Reported-and-tested-by: syzbot+16c3a70e1e9b29346c43@syzkaller.appspotmail.com Reported-by: Tetsuo Handa Cc: Andrew Morton Cc: David Rientjes Fixes: ff9fb72bc077 ("debugfs: return error values, not NULL") Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- kernel/relay.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/relay.c b/kernel/relay.c index 1537158c67b3..61d37e6da22d 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -427,6 +427,8 @@ static struct dentry *relay_create_buf_file(struct rchan *chan, dentry = chan->cb->create_buf_file(tmpname, chan->parent, S_IRUSR, buf, &chan->is_global); + if (IS_ERR(dentry)) + dentry = NULL; kfree(tmpname); @@ -460,7 +462,7 @@ static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu) dentry = chan->cb->create_buf_file(NULL, NULL, S_IRUSR, buf, &chan->is_global); - if (WARN_ON(dentry)) + if (IS_ERR_OR_NULL(dentry)) goto free_buf; } From 3084058a72e297e7dbce277b8161fc323d087d1d Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Thu, 31 Jan 2019 10:19:33 +0100 Subject: [PATCH 2206/2608] bpf, selftests: fix handling of sparse CPU allocations [ Upstream commit 1bb54c4071f585ebef56ce8fdfe6026fa2cbcddd ] Previously, bpf_num_possible_cpus() had a bug when calculating a number of possible CPUs in the case of sparse CPU allocations, as it was considering only the first range or element of /sys/devices/system/cpu/possible. E.g. in the case of "0,2-3" (CPU 1 is not available), the function returned 1 instead of 3. This patch fixes the function by making it parse all CPU ranges and elements. Signed-off-by: Martynas Pumputis Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/bpf_util.h | 32 +++++++++++++++++--------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index d0811b3d6a6f..4bf720364934 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -13,7 +13,7 @@ static inline unsigned int bpf_num_possible_cpus(void) unsigned int start, end, possible_cpus = 0; char buff[128]; FILE *fp; - int n; + int len, n, i, j = 0; fp = fopen(fcpu, "r"); if (!fp) { @@ -21,17 +21,27 @@ static inline unsigned int bpf_num_possible_cpus(void) exit(1); } - while (fgets(buff, sizeof(buff), fp)) { - n = sscanf(buff, "%u-%u", &start, &end); - if (n == 0) { - printf("Failed to retrieve # possible CPUs!\n"); - exit(1); - } else if (n == 1) { - end = start; - } - possible_cpus = start == 0 ? end + 1 : 0; - break; + if (!fgets(buff, sizeof(buff), fp)) { + printf("Failed to read %s!\n", fcpu); + exit(1); } + + len = strlen(buff); + for (i = 0; i <= len; i++) { + if (buff[i] == ',' || buff[i] == '\0') { + buff[i] = '\0'; + n = sscanf(&buff[j], "%u-%u", &start, &end); + if (n <= 0) { + printf("Failed to retrieve # possible CPUs!\n"); + exit(1); + } else if (n == 1) { + end = start; + } + possible_cpus += end - start + 1; + j = i + 1; + } + } + fclose(fp); return possible_cpus; From dcab2483a1a3eff37411377202837d8a473d885f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 30 Jan 2019 18:12:43 -0800 Subject: [PATCH 2207/2608] bpf: fix lockdep false positive in percpu_freelist [ Upstream commit a89fac57b5d080771efd4d71feaae19877cf68f0 ] Lockdep warns about false positive: [ 12.492084] 00000000e6b28347 (&head->lock){+...}, at: pcpu_freelist_push+0x2a/0x40 [ 12.492696] but this lock was taken by another, HARDIRQ-safe lock in the past: [ 12.493275] (&rq->lock){-.-.} [ 12.493276] [ 12.493276] [ 12.493276] and interrupts could create inverse lock ordering between them. [ 12.493276] [ 12.494435] [ 12.494435] other info that might help us debug this: [ 12.494979] Possible interrupt unsafe locking scenario: [ 12.494979] [ 12.495518] CPU0 CPU1 [ 12.495879] ---- ---- [ 12.496243] lock(&head->lock); [ 12.496502] local_irq_disable(); [ 12.496969] lock(&rq->lock); [ 12.497431] lock(&head->lock); [ 12.497890] [ 12.498104] lock(&rq->lock); [ 12.498368] [ 12.498368] *** DEADLOCK *** [ 12.498368] [ 12.498837] 1 lock held by dd/276: [ 12.499110] #0: 00000000c58cb2ee (rcu_read_lock){....}, at: trace_call_bpf+0x5e/0x240 [ 12.499747] [ 12.499747] the shortest dependencies between 2nd lock and 1st lock: [ 12.500389] -> (&rq->lock){-.-.} { [ 12.500669] IN-HARDIRQ-W at: [ 12.500934] _raw_spin_lock+0x2f/0x40 [ 12.501373] scheduler_tick+0x4c/0xf0 [ 12.501812] update_process_times+0x40/0x50 [ 12.502294] tick_periodic+0x27/0xb0 [ 12.502723] tick_handle_periodic+0x1f/0x60 [ 12.503203] timer_interrupt+0x11/0x20 [ 12.503651] __handle_irq_event_percpu+0x43/0x2c0 [ 12.504167] handle_irq_event_percpu+0x20/0x50 [ 12.504674] handle_irq_event+0x37/0x60 [ 12.505139] handle_level_irq+0xa7/0x120 [ 12.505601] handle_irq+0xa1/0x150 [ 12.506018] do_IRQ+0x77/0x140 [ 12.506411] ret_from_intr+0x0/0x1d [ 12.506834] _raw_spin_unlock_irqrestore+0x53/0x60 [ 12.507362] __setup_irq+0x481/0x730 [ 12.507789] setup_irq+0x49/0x80 [ 12.508195] hpet_time_init+0x21/0x32 [ 12.508644] x86_late_time_init+0xb/0x16 [ 12.509106] start_kernel+0x390/0x42a [ 12.509554] secondary_startup_64+0xa4/0xb0 [ 12.510034] IN-SOFTIRQ-W at: [ 12.510305] _raw_spin_lock+0x2f/0x40 [ 12.510772] try_to_wake_up+0x1c7/0x4e0 [ 12.511220] swake_up_locked+0x20/0x40 [ 12.511657] swake_up_one+0x1a/0x30 [ 12.512070] rcu_process_callbacks+0xc5/0x650 [ 12.512553] __do_softirq+0xe6/0x47b [ 12.512978] irq_exit+0xc3/0xd0 [ 12.513372] smp_apic_timer_interrupt+0xa9/0x250 [ 12.513876] apic_timer_interrupt+0xf/0x20 [ 12.514343] default_idle+0x1c/0x170 [ 12.514765] do_idle+0x199/0x240 [ 12.515159] cpu_startup_entry+0x19/0x20 [ 12.515614] start_kernel+0x422/0x42a [ 12.516045] secondary_startup_64+0xa4/0xb0 [ 12.516521] INITIAL USE at: [ 12.516774] _raw_spin_lock_irqsave+0x38/0x50 [ 12.517258] rq_attach_root+0x16/0xd0 [ 12.517685] sched_init+0x2f2/0x3eb [ 12.518096] start_kernel+0x1fb/0x42a [ 12.518525] secondary_startup_64+0xa4/0xb0 [ 12.518986] } [ 12.519132] ... key at: [] __key.71384+0x0/0x8 [ 12.519649] ... acquired at: [ 12.519892] pcpu_freelist_pop+0x7b/0xd0 [ 12.520221] bpf_get_stackid+0x1d2/0x4d0 [ 12.520563] ___bpf_prog_run+0x8b4/0x11a0 [ 12.520887] [ 12.521008] -> (&head->lock){+...} { [ 12.521292] HARDIRQ-ON-W at: [ 12.521539] _raw_spin_lock+0x2f/0x40 [ 12.521950] pcpu_freelist_push+0x2a/0x40 [ 12.522396] bpf_get_stackid+0x494/0x4d0 [ 12.522828] ___bpf_prog_run+0x8b4/0x11a0 [ 12.523296] INITIAL USE at: [ 12.523537] _raw_spin_lock+0x2f/0x40 [ 12.523944] pcpu_freelist_populate+0xc0/0x120 [ 12.524417] htab_map_alloc+0x405/0x500 [ 12.524835] __do_sys_bpf+0x1a3/0x1a90 [ 12.525253] do_syscall_64+0x4a/0x180 [ 12.525659] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 12.526167] } [ 12.526311] ... key at: [] __key.13130+0x0/0x8 [ 12.526812] ... acquired at: [ 12.527047] __lock_acquire+0x521/0x1350 [ 12.527371] lock_acquire+0x98/0x190 [ 12.527680] _raw_spin_lock+0x2f/0x40 [ 12.527994] pcpu_freelist_push+0x2a/0x40 [ 12.528325] bpf_get_stackid+0x494/0x4d0 [ 12.528645] ___bpf_prog_run+0x8b4/0x11a0 [ 12.528970] [ 12.529092] [ 12.529092] stack backtrace: [ 12.529444] CPU: 0 PID: 276 Comm: dd Not tainted 5.0.0-rc3-00018-g2fa53f892422 #475 [ 12.530043] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 [ 12.530750] Call Trace: [ 12.530948] dump_stack+0x5f/0x8b [ 12.531248] check_usage_backwards+0x10c/0x120 [ 12.531598] ? ___bpf_prog_run+0x8b4/0x11a0 [ 12.531935] ? mark_lock+0x382/0x560 [ 12.532229] mark_lock+0x382/0x560 [ 12.532496] ? print_shortest_lock_dependencies+0x180/0x180 [ 12.532928] __lock_acquire+0x521/0x1350 [ 12.533271] ? find_get_entry+0x17f/0x2e0 [ 12.533586] ? find_get_entry+0x19c/0x2e0 [ 12.533902] ? lock_acquire+0x98/0x190 [ 12.534196] lock_acquire+0x98/0x190 [ 12.534482] ? pcpu_freelist_push+0x2a/0x40 [ 12.534810] _raw_spin_lock+0x2f/0x40 [ 12.535099] ? pcpu_freelist_push+0x2a/0x40 [ 12.535432] pcpu_freelist_push+0x2a/0x40 [ 12.535750] bpf_get_stackid+0x494/0x4d0 [ 12.536062] ___bpf_prog_run+0x8b4/0x11a0 It has been explained that is a false positive here: https://lkml.org/lkml/2018/7/25/756 Recap: - stackmap uses pcpu_freelist - The lock in pcpu_freelist is a percpu lock - stackmap is only used by tracing bpf_prog - A tracing bpf_prog cannot be run if another bpf_prog has already been running (ensured by the percpu bpf_prog_active counter). Eric pointed out that this lockdep splats stops other legit lockdep splats in selftests/bpf/test_progs.c. Fix this by calling local_irq_save/restore for stackmap. Another false positive had also been worked around by calling local_irq_save in commit 89ad2fa3f043 ("bpf: fix lockdep splat"). That commit added unnecessary irq_save/restore to fast path of bpf hash map. irqs are already disabled at that point, since htab is holding per bucket spin_lock with irqsave. Let's reduce overhead for htab by introducing __pcpu_freelist_push/pop function w/o irqsave and convert pcpu_freelist_push/pop to irqsave to be used elsewhere (right now only in stackmap). It stops lockdep false positive in stackmap with a bit of acceptable overhead. Fixes: 557c0c6e7df8 ("bpf: convert stackmap to pre-allocation") Reported-by: Naresh Kamboju Reported-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- kernel/bpf/hashtab.c | 4 ++-- kernel/bpf/percpu_freelist.c | 41 +++++++++++++++++++++++++----------- kernel/bpf/percpu_freelist.h | 4 ++++ 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3d0ecc273cc6..84237f640789 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -655,7 +655,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) } if (htab_is_prealloc(htab)) { - pcpu_freelist_push(&htab->freelist, &l->fnode); + __pcpu_freelist_push(&htab->freelist, &l->fnode); } else { atomic_dec(&htab->count); l->htab = htab; @@ -717,7 +717,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, } else { struct pcpu_freelist_node *l; - l = pcpu_freelist_pop(&htab->freelist); + l = __pcpu_freelist_pop(&htab->freelist); if (!l) return ERR_PTR(-E2BIG); l_new = container_of(l, struct htab_elem, fnode); diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 673fa6fe2d73..0c1b4ba9e90e 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -28,8 +28,8 @@ void pcpu_freelist_destroy(struct pcpu_freelist *s) free_percpu(s->freelist); } -static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, - struct pcpu_freelist_node *node) +static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head, + struct pcpu_freelist_node *node) { raw_spin_lock(&head->lock); node->next = head->first; @@ -37,12 +37,22 @@ static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, raw_spin_unlock(&head->lock); } -void pcpu_freelist_push(struct pcpu_freelist *s, +void __pcpu_freelist_push(struct pcpu_freelist *s, struct pcpu_freelist_node *node) { struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist); - __pcpu_freelist_push(head, node); + ___pcpu_freelist_push(head, node); +} + +void pcpu_freelist_push(struct pcpu_freelist *s, + struct pcpu_freelist_node *node) +{ + unsigned long flags; + + local_irq_save(flags); + __pcpu_freelist_push(s, node); + local_irq_restore(flags); } void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, @@ -63,7 +73,7 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, for_each_possible_cpu(cpu) { again: head = per_cpu_ptr(s->freelist, cpu); - __pcpu_freelist_push(head, buf); + ___pcpu_freelist_push(head, buf); i++; buf += elem_size; if (i == nr_elems) @@ -74,14 +84,12 @@ again: local_irq_restore(flags); } -struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) +struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s) { struct pcpu_freelist_head *head; struct pcpu_freelist_node *node; - unsigned long flags; int orig_cpu, cpu; - local_irq_save(flags); orig_cpu = cpu = raw_smp_processor_id(); while (1) { head = per_cpu_ptr(s->freelist, cpu); @@ -89,16 +97,25 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) node = head->first; if (node) { head->first = node->next; - raw_spin_unlock_irqrestore(&head->lock, flags); + raw_spin_unlock(&head->lock); return node; } raw_spin_unlock(&head->lock); cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = 0; - if (cpu == orig_cpu) { - local_irq_restore(flags); + if (cpu == orig_cpu) return NULL; - } } } + +struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) +{ + struct pcpu_freelist_node *ret; + unsigned long flags; + + local_irq_save(flags); + ret = __pcpu_freelist_pop(s); + local_irq_restore(flags); + return ret; +} diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h index 3049aae8ea1e..c3960118e617 100644 --- a/kernel/bpf/percpu_freelist.h +++ b/kernel/bpf/percpu_freelist.h @@ -22,8 +22,12 @@ struct pcpu_freelist_node { struct pcpu_freelist_node *next; }; +/* pcpu_freelist_* do spin_lock_irqsave. */ void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *); +/* __pcpu_freelist_* do spin_lock only. caller must disable irqs. */ +void __pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); +struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *); void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, u32 nr_elems); int pcpu_freelist_init(struct pcpu_freelist *); From 2b34bef88a2ee48936997c77bc0b04149cac39ca Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Thu, 31 Jan 2019 14:25:50 +0100 Subject: [PATCH 2208/2608] drm/sun4i: tcon: Prepare and enable TCON channel 0 clock at init [ Upstream commit b14e945bda8ae227d1bf2b1837c0c4a61721cd1a ] When initializing clocks, a reference to the TCON channel 0 clock is obtained. However, the clock is never prepared and enabled later. Switching from simplefb to DRM actually disables the clock (that was usually configured by U-Boot) because of that. On the V3s, this results in a hang when writing to some mixer registers when switching over to DRM from simplefb. Fix this by preparing and enabling the clock when initializing other clocks. Waiting for sun4i_tcon_channel_enable to enable the clock is apparently too late and results in the same mixer register access hang. Signed-off-by: Paul Kocialkowski Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190131132550.26355-1-paul.kocialkowski@bootlin.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 7b909d814d38..095bd6b4ae80 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -371,6 +371,7 @@ static int sun4i_tcon_init_clocks(struct device *dev, dev_err(dev, "Couldn't get the TCON channel 0 clock\n"); return PTR_ERR(tcon->sclk0); } + clk_prepare_enable(tcon->sclk0); if (tcon->quirks->has_channel_1) { tcon->sclk1 = devm_clk_get(dev, "tcon-ch1"); @@ -385,6 +386,7 @@ static int sun4i_tcon_init_clocks(struct device *dev, static void sun4i_tcon_free_clocks(struct sun4i_tcon *tcon) { + clk_disable_unprepare(tcon->sclk0); clk_disable_unprepare(tcon->clk); } From 253543f745286b00d637a12034657444d2db02c5 Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Wed, 23 Jan 2019 16:33:47 +0000 Subject: [PATCH 2209/2608] dmaengine: at_xdmac: Fix wrongfull report of a channel as in use [ Upstream commit dc3f595b6617ebc0307e0ce151e8f2f2b2489b95 ] atchan->status variable is used to store two different information: - pass channel interrupts status from interrupt handler to tasklet; - channel information like whether it is cyclic or paused; This causes a bug when device_terminate_all() is called, (AT_XDMAC_CHAN_IS_CYCLIC cleared on atchan->status) and then a late End of Block interrupt arrives (AT_XDMAC_CIS_BIS), which sets bit 0 of atchan->status. Bit 0 is also used for AT_XDMAC_CHAN_IS_CYCLIC, so when a new descriptor for a cyclic transfer is created, the driver reports the channel as in use: if (test_and_set_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status)) { dev_err(chan2dev(chan), "channel currently used\n"); return NULL; } This patch fixes the bug by adding a different struct member to keep the interrupts status separated from the channel status bits. Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Codrin Ciubotariu Acked-by: Ludovic Desroches Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/at_xdmac.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 94236ec9d410..4db2cd1c611d 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -203,6 +203,7 @@ struct at_xdmac_chan { u32 save_cim; u32 save_cnda; u32 save_cndc; + u32 irq_status; unsigned long status; struct tasklet_struct tasklet; struct dma_slave_config sconfig; @@ -1580,8 +1581,8 @@ static void at_xdmac_tasklet(unsigned long data) struct at_xdmac_desc *desc; u32 error_mask; - dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08lx\n", - __func__, atchan->status); + dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n", + __func__, atchan->irq_status); error_mask = AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS @@ -1589,15 +1590,15 @@ static void at_xdmac_tasklet(unsigned long data) if (at_xdmac_chan_is_cyclic(atchan)) { at_xdmac_handle_cyclic(atchan); - } else if ((atchan->status & AT_XDMAC_CIS_LIS) - || (atchan->status & error_mask)) { + } else if ((atchan->irq_status & AT_XDMAC_CIS_LIS) + || (atchan->irq_status & error_mask)) { struct dma_async_tx_descriptor *txd; - if (atchan->status & AT_XDMAC_CIS_RBEIS) + if (atchan->irq_status & AT_XDMAC_CIS_RBEIS) dev_err(chan2dev(&atchan->chan), "read bus error!!!"); - if (atchan->status & AT_XDMAC_CIS_WBEIS) + if (atchan->irq_status & AT_XDMAC_CIS_WBEIS) dev_err(chan2dev(&atchan->chan), "write bus error!!!"); - if (atchan->status & AT_XDMAC_CIS_ROIS) + if (atchan->irq_status & AT_XDMAC_CIS_ROIS) dev_err(chan2dev(&atchan->chan), "request overflow error!!!"); spin_lock_bh(&atchan->lock); @@ -1652,7 +1653,7 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) atchan = &atxdmac->chan[i]; chan_imr = at_xdmac_chan_read(atchan, AT_XDMAC_CIM); chan_status = at_xdmac_chan_read(atchan, AT_XDMAC_CIS); - atchan->status = chan_status & chan_imr; + atchan->irq_status = chan_status & chan_imr; dev_vdbg(atxdmac->dma.dev, "%s: chan%d: imr=0x%x, status=0x%x\n", __func__, i, chan_imr, chan_status); @@ -1666,7 +1667,7 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) at_xdmac_chan_read(atchan, AT_XDMAC_CDA), at_xdmac_chan_read(atchan, AT_XDMAC_CUBC)); - if (atchan->status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS)) + if (atchan->irq_status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS)) at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask); tasklet_schedule(&atchan->tasklet); From 6a78f5dccfff5c0b64b6956e6f6beadd669e0108 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 1 Feb 2019 12:42:06 +0100 Subject: [PATCH 2210/2608] vsock/virtio: fix kernel panic after device hot-unplug [ Upstream commit 22b5c0b63f32568e130fa2df4ba23efce3eb495b ] virtio_vsock_remove() invokes the vsock_core_exit() also if there are opened sockets for the AF_VSOCK protocol family. In this way the vsock "transport" pointer is set to NULL, triggering the kernel panic at the first socket activity. This patch move the vsock_core_init()/vsock_core_exit() in the virtio_vsock respectively in module_init and module_exit functions, that cannot be invoked until there are open sockets. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1609699 Reported-by: Yan Fu Signed-off-by: Stefano Garzarella Acked-by: Stefan Hajnoczi Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/vmw_vsock/virtio_transport.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index fdb294441682..8cdf6ddec9b7 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -75,6 +75,9 @@ static u32 virtio_transport_get_local_cid(void) { struct virtio_vsock *vsock = virtio_vsock_get(); + if (!vsock) + return VMADDR_CID_ANY; + return vsock->guest_cid; } @@ -584,10 +587,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) virtio_vsock_update_guest_cid(vsock); - ret = vsock_core_init(&virtio_transport.transport); - if (ret < 0) - goto out_vqs; - vsock->rx_buf_nr = 0; vsock->rx_buf_max_nr = 0; atomic_set(&vsock->queued_replies, 0); @@ -618,8 +617,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) mutex_unlock(&the_virtio_vsock_mutex); return 0; -out_vqs: - vsock->vdev->config->del_vqs(vsock->vdev); out: kfree(vsock); mutex_unlock(&the_virtio_vsock_mutex); @@ -669,7 +666,6 @@ static void virtio_vsock_remove(struct virtio_device *vdev) mutex_lock(&the_virtio_vsock_mutex); the_virtio_vsock = NULL; - vsock_core_exit(); mutex_unlock(&the_virtio_vsock_mutex); vdev->config->del_vqs(vdev); @@ -702,14 +698,28 @@ static int __init virtio_vsock_init(void) virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); if (!virtio_vsock_workqueue) return -ENOMEM; + ret = register_virtio_driver(&virtio_vsock_driver); if (ret) - destroy_workqueue(virtio_vsock_workqueue); + goto out_wq; + + ret = vsock_core_init(&virtio_transport.transport); + if (ret) + goto out_vdr; + + return 0; + +out_vdr: + unregister_virtio_driver(&virtio_vsock_driver); +out_wq: + destroy_workqueue(virtio_vsock_workqueue); return ret; + } static void __exit virtio_vsock_exit(void) { + vsock_core_exit(); unregister_virtio_driver(&virtio_vsock_driver); destroy_workqueue(virtio_vsock_workqueue); } From 46a96f8729b4ef232d70ed893d50da1c7cb434a4 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 1 Feb 2019 12:42:07 +0100 Subject: [PATCH 2211/2608] vsock/virtio: reset connected sockets on device removal [ Upstream commit 85965487abc540368393a15491e6e7fcd230039d ] When the virtio transport device disappear, we should reset all connected sockets in order to inform the users. Signed-off-by: Stefano Garzarella Reviewed-by: Stefan Hajnoczi Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/vmw_vsock/virtio_transport.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 8cdf6ddec9b7..2ff751eba037 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -634,6 +634,9 @@ static void virtio_vsock_remove(struct virtio_device *vdev) flush_work(&vsock->event_work); flush_work(&vsock->send_pkt_work); + /* Reset all connected sockets when the device disappear */ + vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vdev->config->reset(vdev); mutex_lock(&vsock->rx_lock); From f2dbb2b5d97143761de075856596ec8e704fe02d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 30 Jan 2019 21:48:44 +0200 Subject: [PATCH 2212/2608] dmaengine: dmatest: Abort test in case of mapping error [ Upstream commit 6454368a804c4955ccd116236037536f81e5b1f1 ] In case of mapping error the DMA addresses are invalid and continuing will screw system memory or potentially something else. [ 222.480310] dmatest: dma0chan7-copy0: summary 1 tests, 3 failures 6 iops 349 KB/s (0) ... [ 240.912725] check: Corrupted low memory at 00000000c7c75ac9 (2940 phys) = 5656000000000000 [ 240.921998] check: Corrupted low memory at 000000005715a1cd (2948 phys) = 279f2aca5595ab2b [ 240.931280] check: Corrupted low memory at 000000002f4024c0 (2950 phys) = 5e5624f349e793cf ... Abort any test if mapping failed. Fixes: 4076e755dbec ("dmatest: convert to dmaengine_unmap_data") Cc: Dan Williams Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/dmatest.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 80cc2be6483c..e39336127741 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -626,11 +626,9 @@ static int dmatest_func(void *data) srcs[i] = um->addr[i] + src_off; ret = dma_mapping_error(dev->dev, um->addr[i]); if (ret) { - dmaengine_unmap_put(um); result("src mapping error", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } um->to_cnt++; } @@ -645,11 +643,9 @@ static int dmatest_func(void *data) DMA_BIDIRECTIONAL); ret = dma_mapping_error(dev->dev, dsts[i]); if (ret) { - dmaengine_unmap_put(um); result("dst mapping error", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } um->bidi_cnt++; } @@ -679,12 +675,10 @@ static int dmatest_func(void *data) } if (!tx) { - dmaengine_unmap_put(um); result("prep error", total_tests, src_off, dst_off, len, ret); msleep(100); - failed_tests++; - continue; + goto error_unmap_continue; } done->done = false; @@ -693,12 +687,10 @@ static int dmatest_func(void *data) cookie = tx->tx_submit(tx); if (dma_submit_error(cookie)) { - dmaengine_unmap_put(um); result("submit error", total_tests, src_off, dst_off, len, ret); msleep(100); - failed_tests++; - continue; + goto error_unmap_continue; } dma_async_issue_pending(chan); @@ -711,16 +703,14 @@ static int dmatest_func(void *data) dmaengine_unmap_put(um); result("test timed out", total_tests, src_off, dst_off, len, 0); - failed_tests++; - continue; + goto error_unmap_continue; } else if (status != DMA_COMPLETE) { dmaengine_unmap_put(um); result(status == DMA_ERROR ? "completion error status" : "completion busy status", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } dmaengine_unmap_put(um); @@ -765,6 +755,12 @@ static int dmatest_func(void *data) verbose_result("test passed", total_tests, src_off, dst_off, len, 0); } + + continue; + +error_unmap_continue: + dmaengine_unmap_put(um); + failed_tests++; } ktime = ktime_sub(ktime_get(), ktime); ktime = ktime_sub(ktime, comparetime); From 25825f52ba4138e09dfbeaaa104f363b8256e5cf Mon Sep 17 00:00:00 2001 From: Naresh Kamboju Date: Tue, 29 Jan 2019 06:28:35 +0000 Subject: [PATCH 2213/2608] selftests: netfilter: fix config fragment CONFIG_NF_TABLES_INET [ Upstream commit 952b72f89ae23b316da8c1021b18d0c388ad6cc4 ] In selftests the config fragment for netfilter was added as NF_TABLES_INET=y and this patch correct it as CONFIG_NF_TABLES_INET=y Signed-off-by: Naresh Kamboju Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- tools/testing/selftests/netfilter/config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config index 1017313e41a8..59caa8f71cd8 100644 --- a/tools/testing/selftests/netfilter/config +++ b/tools/testing/selftests/netfilter/config @@ -1,2 +1,2 @@ CONFIG_NET_NS=y -NF_TABLES_INET=y +CONFIG_NF_TABLES_INET=y From 0ea42d0813665ec60682eeac3ab83adbfef556c0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 29 Jan 2019 15:16:23 +0100 Subject: [PATCH 2214/2608] selftests: netfilter: add simple masq/redirect test cases [ Upstream commit 98bfc3414bda335dbd7fec58bde6266f991801d7 ] Check basic nat/redirect/masquerade for ipv4 and ipv6. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/nft_nat.sh | 762 +++++++++++++++++++ 2 files changed, 763 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/nft_nat.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 47ed6cef93fb..c9ff2b47bd1c 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests -TEST_PROGS := nft_trans_stress.sh +TEST_PROGS := nft_trans_stress.sh nft_nat.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh new file mode 100755 index 000000000000..8ec76681605c --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -0,0 +1,762 @@ +#!/bin/bash +# +# This test is for basic NAT functionality: snat, dnat, redirect, masquerade. +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add ns0 +ip netns add ns1 +ip netns add ns2 + +ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 +ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 + +ip -net ns0 link set lo up +ip -net ns0 link set veth0 up +ip -net ns0 addr add 10.0.1.1/24 dev veth0 +ip -net ns0 addr add dead:1::1/64 dev veth0 + +ip -net ns0 link set veth1 up +ip -net ns0 addr add 10.0.2.1/24 dev veth1 +ip -net ns0 addr add dead:2::1/64 dev veth1 + +for i in 1 2; do + ip -net ns$i link set lo up + ip -net ns$i link set eth0 up + ip -net ns$i addr add 10.0.$i.99/24 dev eth0 + ip -net ns$i route add default via 10.0.$i.1 + ip -net ns$i addr add dead:$i::99/64 dev eth0 + ip -net ns$i route add default via dead:$i::1 +done + +bad_counter() +{ + local ns=$1 + local counter=$2 + local expect=$3 + + echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns nft list counter inet filter $counter 1>&2 +} + +check_counters() +{ + ns=$1 + local lret=0 + + cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84") + if [ $? -ne 0 ]; then + bad_counter $ns ns0in "packets 1 bytes 84" + lret=1 + fi + cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84") + if [ $? -ne 0 ]; then + bad_counter $ns ns0out "packets 1 bytes 84" + lret=1 + fi + + expect="packets 1 bytes 104" + cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter $ns ns0in6 "$expect" + lret=1 + fi + cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter $ns ns0out6 "$expect" + lret=1 + fi + + return $lret +} + +check_ns0_counters() +{ + local ns=$1 + local lret=0 + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0in "packets 0 bytes 0" + lret=1 + fi + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0in6 "packets 0 bytes 0" + lret=1 + fi + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0out "packets 0 bytes 0" + lret=1 + fi + cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0out6 "packets 0 bytes 0" + lret=1 + fi + + for dir in "in" "out" ; do + expect="packets 1 bytes 84" + cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 $ns$dir "$expect" + lret=1 + fi + + expect="packets 1 bytes 104" + cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 $ns$dir6 "$expect" + lret=1 + fi + done + + return $lret +} + +reset_counters() +{ + for i in 0 1 2;do + ip netns exec ns$i nft reset counters inet > /dev/null + done +} + +test_local_dnat6() +{ + local lret=0 +ip netns exec ns0 nft -f - < /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping6 failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns1$dir "$expect" + lret=1 + fi + done + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns0$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2" + ip netns exec ns0 nft flush chain ip6 nat output + + return $lret +} + +test_local_dnat() +{ + local lret=0 +ip netns exec ns0 nft -f - < /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns1$dir "$expect" + lret=1 + fi + done + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns0$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2" + + ip netns exec ns0 nft flush chain ip nat output + + reset_counters + ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns1$dir "$expect" + lret=1 + fi + done + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 1 count in ns1 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0$dir "$expect" + lret=1 + fi + done + + # expect 0 packet in ns2 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns2$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush" + + return $lret +} + + +test_masquerade6() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 via ipv6" + return 1 + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft flush chain ip6 nat postrouting + if [ $? -ne 0 ]; then + echo "ERROR: Could not flush ip6 nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2" + + return $lret +} + +test_masquerade() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: canot ping ns1 from ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft flush chain ip nat postrouting + if [ $? -ne 0 ]; then + echo "ERROR: Could not flush nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP masquerade for ns2" + + return $lret +} + +test_redirect6() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannnot ping ns1 from ns2 via ipv6" + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft delete table ip6 nat + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete ip6 nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2" + + return $lret +} + +test_redirect() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft delete table ip nat + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP redirection for ns2" + + return $lret +} + + +# ip netns exec ns0 ping -c 1 -q 10.0.$i.99 +for i in 0 1 2; do +ip netns exec ns$i nft -f - < /dev/null + if [ $? -ne 0 ];then + echo "ERROR: Could not reach other namespace(s)" 1>&2 + ret=1 + fi + + ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null + if [ $? -ne 0 ];then + echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2 + ret=1 + fi + check_counters ns$i + if [ $? -ne 0 ]; then + ret=1 + fi + + check_ns0_counters ns$i + if [ $? -ne 0 ]; then + ret=1 + fi + reset_counters +done + +if [ $ret -eq 0 ];then + echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2" +fi + +reset_counters +test_local_dnat +test_local_dnat6 + +reset_counters +test_masquerade +test_masquerade6 + +reset_counters +test_redirect +test_redirect6 + +for i in 0 1 2; do ip netns del ns$i;done + +exit $ret From c1c60bac48457bc88392a0953799606c5f5d57d9 Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Tue, 29 Jan 2019 15:51:42 +0100 Subject: [PATCH 2215/2608] netfilter: nf_nat: skip nat clash resolution for same-origin entries [ Upstream commit 4e35c1cb9460240e983a01745b5f29fe3a4d8e39 ] It is possible that two concurrent packets originating from the same socket of a connection-less protocol (e.g. UDP) can end up having different IP_CT_DIR_REPLY tuples which results in one of the packets being dropped. To illustrate this, consider the following simplified scenario: 1. Packet A and B are sent at the same time from two different threads by same UDP socket. No matching conntrack entry exists yet. Both packets cause allocation of a new conntrack entry. 2. get_unique_tuple gets called for A. No clashing entry found. conntrack entry for A is added to main conntrack table. 3. get_unique_tuple is called for B and will find that the reply tuple of B is already taken by A. It will allocate a new UDP source port for B to resolve the clash. 4. conntrack entry for B cannot be added to main conntrack table because its ORIGINAL direction is clashing with A and the REPLY directions of A and B are not the same anymore due to UDP source port reallocation done in step 3. This patch modifies nf_conntrack_tuple_taken so it doesn't consider colliding reply tuples if the IP_CT_DIR_ORIGINAL tuples are equal. [ Florian: simplify patch to not use .allow_clash setting and always ignore identical flows ] Signed-off-by: Martynas Pumputis Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_core.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b793b55d1488..f07357ba9629 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -869,6 +869,22 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, } if (nf_ct_key_equal(h, tuple, zone, net)) { + /* Tuple is taken already, so caller will need to find + * a new source port to use. + * + * Only exception: + * If the *original tuples* are identical, then both + * conntracks refer to the same flow. + * This is a rare situation, it can occur e.g. when + * more than one UDP packet is sent from same socket + * in different threads. + * + * Let nf_ct_resolve_clash() deal with this later. + */ + if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) + continue; + NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1; From d4802260ed30ac142da8739402bb03e28d22f9de Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 4 Feb 2019 17:40:07 +0100 Subject: [PATCH 2216/2608] s390/qeth: fix use-after-free in error path [ Upstream commit afa0c5904ba16d59b0454f7ee4c807dae350f432 ] The error path in qeth_alloc_qdio_buffers() that takes care of cleaning up the Output Queues is buggy. It first frees the queue, but then calls qeth_clear_outq_buffers() with that very queue struct. Make the call to qeth_clear_outq_buffers() part of the free action (in the correct order), and while at it fix the naming of the helper. Fixes: 0da9581ddb0f ("qeth: exploit asynchronous delivery of storage blocks") Signed-off-by: Julian Wiedmann Reviewed-by: Alexandra Winter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/s390/net/qeth_core_main.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 69ef5f4060ed..6566fceef38d 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2472,11 +2472,12 @@ out: return rc; } -static void qeth_free_qdio_out_buf(struct qeth_qdio_out_q *q) +static void qeth_free_output_queue(struct qeth_qdio_out_q *q) { if (!q) return; + qeth_clear_outq_buffers(q, 1); qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); kfree(q); } @@ -2549,10 +2550,8 @@ out_freeoutqbufs: card->qdio.out_qs[i]->bufs[j] = NULL; } out_freeoutq: - while (i > 0) { - qeth_free_qdio_out_buf(card->qdio.out_qs[--i]); - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - } + while (i > 0) + qeth_free_output_queue(card->qdio.out_qs[--i]); kfree(card->qdio.out_qs); card->qdio.out_qs = NULL; out_freepool: @@ -2585,10 +2584,8 @@ static void qeth_free_qdio_buffers(struct qeth_card *card) qeth_free_buffer_pool(card); /* free outbound qdio_qs */ if (card->qdio.out_qs) { - for (i = 0; i < card->qdio.no_out_queues; ++i) { - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - qeth_free_qdio_out_buf(card->qdio.out_qs[i]); - } + for (i = 0; i < card->qdio.no_out_queues; i++) + qeth_free_output_queue(card->qdio.out_qs[i]); kfree(card->qdio.out_qs); card->qdio.out_qs = NULL; } From bb4bf9df6d439c2c33cd452533c5f182faadc9da Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 28 Jan 2019 14:35:26 +0100 Subject: [PATCH 2217/2608] perf symbols: Filter out hidden symbols from labels [ Upstream commit 59a17706915fe5ea6f711e1f92d4fb706bce07fe ] When perf is built with the annobin plugin (RHEL8 build) extra symbols are added to its binary: # nm perf | grep annobin | head -10 0000000000241100 t .annobin_annotate.c 0000000000326490 t .annobin_annotate.c 0000000000249255 t .annobin_annotate.c_end 00000000003283a8 t .annobin_annotate.c_end 00000000001bce18 t .annobin_annotate.c_end.hot 00000000001bce18 t .annobin_annotate.c_end.hot 00000000001bc3e2 t .annobin_annotate.c_end.unlikely 00000000001bc400 t .annobin_annotate.c_end.unlikely 00000000001bce18 t .annobin_annotate.c.hot 00000000001bce18 t .annobin_annotate.c.hot ... Those symbols have no use for report or annotation and should be skipped. Moreover they interfere with the DWARF unwind test on the PPC arch, where they are mixed with checked symbols and then the test fails: # perf test dwarf -v 59: Test dwarf unwind : --- start --- test child forked, pid 8515 unwind: .annobin_dwarf_unwind.c:ip = 0x10dba40dc (0x2740dc) ... got: .annobin_dwarf_unwind.c 0x10dba40dc, expecting test__arch_unwind_sample unwind: failed with 'no error' The annobin symbols are defined as NOTYPE/LOCAL/HIDDEN: # readelf -s ./perf | grep annobin | head -1 40: 00000000001bce4f 0 NOTYPE LOCAL HIDDEN 13 .annobin_init.c They can still pass the check for the label symbol. Adding check for HIDDEN and INTERNAL (as suggested by Nick below) visibility and filter out such symbols. > Just to be awkward, if you are going to ignore STV_HIDDEN > symbols then you should probably also ignore STV_INTERNAL ones > as well... Annobin does not generate them, but you never know, > one day some other tool might create some. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Masami Hiramatsu Cc: Michael Petlan Cc: Namhyung Kim Cc: Nick Clifton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190128133526.GD15461@krava Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/symbol-elf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 8ad4296de98b..3d39332b3a06 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -87,6 +87,11 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym) return GELF_ST_TYPE(sym->st_info); } +static inline uint8_t elf_sym__visibility(const GElf_Sym *sym) +{ + return GELF_ST_VISIBILITY(sym->st_other); +} + #ifndef STT_GNU_IFUNC #define STT_GNU_IFUNC 10 #endif @@ -111,7 +116,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym) return elf_sym__type(sym) == STT_NOTYPE && sym->st_name != 0 && sym->st_shndx != SHN_UNDEF && - sym->st_shndx != SHN_ABS; + sym->st_shndx != SHN_ABS && + elf_sym__visibility(sym) != STV_HIDDEN && + elf_sym__visibility(sym) != STV_INTERNAL; } static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type) From a4fc33cf212a244e138720c5237da6ad2cfebc2f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 29 Jan 2019 15:12:34 +0100 Subject: [PATCH 2218/2608] perf trace: Support multiple "vfs_getname" probes [ Upstream commit 6ab3bc240ade47a0f52bc16d97edd9accbe0024e ] With a suitably defined "probe:vfs_getname" probe, 'perf trace' can "beautify" its output, so syscalls like open() or openat() can print the "filename" argument instead of just its hex address, like: $ perf trace -e open -- touch /dev/null [...] 0.590 ( 0.014 ms): touch/18063 open(filename: /dev/null, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3 [...] The output without such beautifier looks like: 0.529 ( 0.011 ms): touch/18075 open(filename: 0xc78cf288, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3 However, when the vfs_getname probe expands to multiple probes and it is not the first one that is hit, the beautifier fails, as following: 0.326 ( 0.010 ms): touch/18072 open(filename: , flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3 Fix it by hooking into all the expanded probes (inlines), now, for instance: [root@quaco ~]# perf probe -l probe:vfs_getname (on getname_flags:73@fs/namei.c with pathname) probe:vfs_getname_1 (on getname_flags:73@fs/namei.c with pathname) [root@quaco ~]# perf trace -e open* sleep 1 0.010 ( 0.005 ms): sleep/5588 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: RDONLY|CLOEXEC) = 3 0.029 ( 0.006 ms): sleep/5588 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: RDONLY|CLOEXEC) = 3 0.194 ( 0.008 ms): sleep/5588 openat(dfd: CWD, filename: /usr/lib/locale/locale-archive, flags: RDONLY|CLOEXEC) = 3 [root@quaco ~]# Works, further verified with: [root@quaco ~]# perf test vfs 65: Use vfs_getname probe to get syscall args filenames : Ok 66: Add vfs_getname probe to get syscall args filenames : Ok 67: Check open filename arg using perf trace + vfs_getname: Ok [root@quaco ~]# Reported-by: Michael Petlan Tested-by: Michael Petlan Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-mv8kolk17xla1smvmp3qabv1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-trace.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 8e3c4ec00017..b224bf3f2b99 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2109,19 +2109,30 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) { - struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); + bool found = false; + struct perf_evsel *evsel, *tmp; + struct parse_events_error err = { .idx = 0, }; + int ret = parse_events(evlist, "probe:vfs_getname*", &err); - if (IS_ERR(evsel)) + if (ret) return false; - if (perf_evsel__field(evsel, "pathname") == NULL) { + evlist__for_each_entry_safe(evlist, evsel, tmp) { + if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname")) + continue; + + if (perf_evsel__field(evsel, "pathname")) { + evsel->handler = trace__vfs_getname; + found = true; + continue; + } + + list_del_init(&evsel->node); + evsel->evlist = NULL; perf_evsel__delete(evsel); - return false; } - evsel->handler = trace__vfs_getname; - perf_evlist__add(evlist, evsel); - return true; + return found; } static struct perf_evsel *perf_evsel__new_pgfault(u64 config) From a09faaf5d2de312ec77029933fc80f5a79aadb90 Mon Sep 17 00:00:00 2001 From: Jun-Ru Chang Date: Tue, 29 Jan 2019 11:56:07 +0800 Subject: [PATCH 2219/2608] MIPS: Remove function size check in get_frame_info() [ Upstream commit 2b424cfc69728224fcb5fad138ea7260728e0901 ] Patch (b6c7a324df37b "MIPS: Fix get_frame_info() handling of microMIPS function size.") introduces additional function size check for microMIPS by only checking insn between ip and ip + func_size. However, func_size in get_frame_info() is always 0 if KALLSYMS is not enabled. This causes get_frame_info() to return immediately without calculating correct frame_size, which in turn causes "Can't analyze schedule() prologue" warning messages at boot time. This patch removes func_size check, and let the frame_size check run up to 128 insns for both MIPS and microMIPS. Signed-off-by: Jun-Ru Chang Signed-off-by: Tony Wu Signed-off-by: Paul Burton Fixes: b6c7a324df37b ("MIPS: Fix get_frame_info() handling of microMIPS function size.") Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Signed-off-by: Sasha Levin --- arch/mips/kernel/process.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index e8b166e9146a..ea563bfea0e1 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -370,7 +370,7 @@ static inline int is_sp_move_ins(union mips_instruction *ip, int *frame_size) static int get_frame_info(struct mips_frame_info *info) { bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS); - union mips_instruction insn, *ip, *ip_end; + union mips_instruction insn, *ip; const unsigned int max_insns = 128; unsigned int last_insn_size = 0; unsigned int i; @@ -383,10 +383,9 @@ static int get_frame_info(struct mips_frame_info *info) if (!ip) goto err; - ip_end = (void *)ip + info->func_size; - - for (i = 0; i < max_insns && ip < ip_end; i++) { + for (i = 0; i < max_insns; i++) { ip = (void *)ip + last_insn_size; + if (is_mmips && mm_insn_16bit(ip->halfword[0])) { insn.word = ip->halfword[0] << 16; last_insn_size = 2; From e18d0dad0c43b8879434f7f92d39b40b3f563289 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 10 Jan 2019 07:59:16 -0800 Subject: [PATCH 2220/2608] i2c: omap: Use noirq system sleep pm ops to idle device for suspend [ Upstream commit c6e2bd956936d925748581e4d0294f10f1d92f2c ] We currently get the following error with pixcir_ts driver during a suspend resume cycle: omap_i2c 4802a000.i2c: controller timed out pixcir_ts 1-005c: pixcir_int_enable: can't read reg 0x34 : -110 pixcir_ts 1-005c: Failed to disable interrupt generation: -110 pixcir_ts 1-005c: Failed to stop dpm_run_callback(): pixcir_i2c_ts_resume+0x0/0x98 [pixcir_i2c_ts] returns -110 PM: Device 1-005c failed to resume: error -110 And at least am437x based devices with pixcir_ts will fail to resume to a touchscreen that is configured as the wakeup-source in device tree for these devices. This is because pixcir_ts tries to reconfigure it's registers for noirq suspend which fails. This also leaves i2c-omap in enabled state for suspend. Let's fix the pixcir_ts issue and make sure i2c-omap is suspended by adding SET_NOIRQ_SYSTEM_SLEEP_PM_OPS. Let's also get rid of some ifdefs while at it and replace them with __maybe_unused as SET_RUNTIME_PM_OPS and SET_NOIRQ_SYSTEM_SLEEP_PM_OPS already deal with the various PM Kconfig options. Reported-by: Keerthy Signed-off-by: Tony Lindgren Acked-by: Vignesh R Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-omap.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 23c2ea2baedc..12ba183693d6 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1477,8 +1477,7 @@ static int omap_i2c_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int omap_i2c_runtime_suspend(struct device *dev) +static int __maybe_unused omap_i2c_runtime_suspend(struct device *dev) { struct omap_i2c_dev *omap = dev_get_drvdata(dev); @@ -1504,7 +1503,7 @@ static int omap_i2c_runtime_suspend(struct device *dev) return 0; } -static int omap_i2c_runtime_resume(struct device *dev) +static int __maybe_unused omap_i2c_runtime_resume(struct device *dev) { struct omap_i2c_dev *omap = dev_get_drvdata(dev); @@ -1519,20 +1518,18 @@ static int omap_i2c_runtime_resume(struct device *dev) } static const struct dev_pm_ops omap_i2c_pm_ops = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(omap_i2c_runtime_suspend, omap_i2c_runtime_resume, NULL) }; -#define OMAP_I2C_PM_OPS (&omap_i2c_pm_ops) -#else -#define OMAP_I2C_PM_OPS NULL -#endif /* CONFIG_PM */ static struct platform_driver omap_i2c_driver = { .probe = omap_i2c_probe, .remove = omap_i2c_remove, .driver = { .name = "omap_i2c", - .pm = OMAP_I2C_PM_OPS, + .pm = &omap_i2c_pm_ops, .of_match_table = of_match_ptr(omap_i2c_of_match), }, }; From ef1c919ec66b5cd8c8f19dd3683f90064813f1f5 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 21 Jan 2019 22:49:37 +0900 Subject: [PATCH 2221/2608] fs: ratelimit __find_get_block_slow() failure message. [ Upstream commit 43636c804df0126da669c261fc820fb22f62bfc2 ] When something let __find_get_block_slow() hit all_mapped path, it calls printk() for 100+ times per a second. But there is no need to print same message with such high frequency; it is just asking for stall warning, or at least bloating log files. [ 399.866302][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8 [ 399.873324][T15342] b_state=0x00000029, b_size=512 [ 399.878403][T15342] device loop0 blocksize: 4096 [ 399.883296][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8 [ 399.890400][T15342] b_state=0x00000029, b_size=512 [ 399.895595][T15342] device loop0 blocksize: 4096 [ 399.900556][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8 [ 399.907471][T15342] b_state=0x00000029, b_size=512 [ 399.912506][T15342] device loop0 blocksize: 4096 This patch reduces frequency to up to once per a second, in addition to concatenating three lines into one. [ 399.866302][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8, b_state=0x00000029, b_size=512, device loop0 blocksize: 4096 Signed-off-by: Tetsuo Handa Reviewed-by: Jan Kara Cc: Dmitry Vyukov Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- fs/buffer.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index b96f3b98a6ef..8086cc8ff0bc 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -208,6 +208,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) struct buffer_head *head; struct page *page; int all_mapped = 1; + static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1); index = block >> (PAGE_SHIFT - bd_inode->i_blkbits); page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED); @@ -235,15 +236,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) * file io on the block device and getblk. It gets dealt with * elsewhere, don't buffer_error if we had some unmapped buffers */ - if (all_mapped) { - printk("__find_get_block_slow() failed. " - "block=%llu, b_blocknr=%llu\n", - (unsigned long long)block, - (unsigned long long)bh->b_blocknr); - printk("b_state=0x%08lx, b_size=%zu\n", - bh->b_state, bh->b_size); - printk("device %pg blocksize: %d\n", bdev, - 1 << bd_inode->i_blkbits); + ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE); + if (all_mapped && __ratelimit(&last_warned)) { + printk("__find_get_block_slow() failed. block=%llu, " + "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, " + "device %pg blocksize: %d\n", + (unsigned long long)block, + (unsigned long long)bh->b_blocknr, + bh->b_state, bh->b_size, bdev, + 1 << bd_inode->i_blkbits); } out_unlock: spin_unlock(&bd_mapping->private_lock); From 14b718fd0bdb2a8f1afd9dedfce8c9b46bdf6d00 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Wed, 6 Feb 2019 14:43:42 -0800 Subject: [PATCH 2222/2608] qed: Fix EQ full firmware assert. [ Upstream commit 660492bcf4a7561b5fdc13be0ae0b0c0a8c120be ] When slowpath messages are sent with high rate, the resulting events can lead to a FW assert in case they are not handled fast enough (Event Queue Full assert). Attempt to send queued slowpath messages only after the newly evacuated entries in the EQ ring are indicated to FW. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_sp.h | 1 + drivers/net/ethernet/qlogic/qed/qed_spq.c | 15 +++++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index 01a213d4ee9c..e7192f3babc2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -380,6 +380,7 @@ void qed_consq_setup(struct qed_hwfn *p_hwfn); * @param p_hwfn */ void qed_consq_free(struct qed_hwfn *p_hwfn); +int qed_spq_pend_post(struct qed_hwfn *p_hwfn); /** * @file diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index 467755b6dd0b..01f8e2b5cb6c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -404,6 +404,11 @@ int qed_eq_completion(struct qed_hwfn *p_hwfn, void *cookie) qed_eq_prod_update(p_hwfn, qed_chain_get_prod_idx(p_chain)); + /* Attempt to post pending requests */ + spin_lock_bh(&p_hwfn->p_spq->lock); + rc = qed_spq_pend_post(p_hwfn); + spin_unlock_bh(&p_hwfn->p_spq->lock); + return rc; } @@ -747,7 +752,7 @@ static int qed_spq_post_list(struct qed_hwfn *p_hwfn, return 0; } -static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) +int qed_spq_pend_post(struct qed_hwfn *p_hwfn) { struct qed_spq *p_spq = p_hwfn->p_spq; struct qed_spq_entry *p_ent = NULL; @@ -879,7 +884,6 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent = NULL; struct qed_spq_entry *tmp; struct qed_spq_entry *found = NULL; - int rc; if (!p_hwfn) return -EINVAL; @@ -937,12 +941,7 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, */ qed_spq_return_entry(p_hwfn, found); - /* Attempt to post pending requests */ - spin_lock_bh(&p_spq->lock); - rc = qed_spq_pend_post(p_hwfn); - spin_unlock_bh(&p_spq->lock); - - return rc; + return 0; } int qed_consq_alloc(struct qed_hwfn *p_hwfn) From 6fbdc61620d47ae14d17d37953a3e43e74d6b75d Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 6 Feb 2019 14:43:44 -0800 Subject: [PATCH 2223/2608] qed: Consider TX tcs while deriving the max num_queues for PF. [ Upstream commit fb1faab74ddef9ec2d841d54e5d0912a097b3abe ] Max supported queues is derived incorrectly in the case of multi-CoS. Need to consider TCs while calculating num_queues for PF. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 4ffdde755db7..62cde3854a5c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -2170,7 +2170,7 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, u16 num_queues = 0; /* Since the feature controls only queue-zones, - * make sure we have the contexts [rx, tx, xdp] to + * make sure we have the contexts [rx, xdp, tcs] to * match. */ for_each_hwfn(cdev, i) { @@ -2180,7 +2180,8 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, u16 cids; cids = hwfn->pf_params.eth_pf_params.num_cons; - num_queues += min_t(u16, l2_queues, cids / 3); + cids /= (2 + info->num_tc); + num_queues += min_t(u16, l2_queues, cids); } /* queues might theoretically be >256, but interrupts' From e2f4d4675420401218e1ebdaa3258d81cbf17716 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Sat, 9 Mar 2019 15:32:13 -0800 Subject: [PATCH 2224/2608] Input: wacom_serial4 - add support for Wacom ArtPad II tablet commit 44fc95e218a09d7966a9d448941fdb003f6bb69f upstream. Tablet initially begins communicating at 9600 baud, so this command should be used to connect to the device: $ inputattach --daemon --baud 9600 --wacom_iv /dev/ttyS0 https://github.com/linuxwacom/xf86-input-wacom/issues/40 Signed-off-by: Jason Gerecke Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/tablet/wacom_serial4.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/tablet/wacom_serial4.c b/drivers/input/tablet/wacom_serial4.c index 38bfaca48eab..150f9eecaca7 100644 --- a/drivers/input/tablet/wacom_serial4.c +++ b/drivers/input/tablet/wacom_serial4.c @@ -187,6 +187,7 @@ enum { MODEL_DIGITIZER_II = 0x5544, /* UD */ MODEL_GRAPHIRE = 0x4554, /* ET */ MODEL_PENPARTNER = 0x4354, /* CT */ + MODEL_ARTPAD_II = 0x4B54, /* KT */ }; static void wacom_handle_model_response(struct wacom *wacom) @@ -245,6 +246,7 @@ static void wacom_handle_model_response(struct wacom *wacom) wacom->flags = F_HAS_STYLUS2 | F_HAS_SCROLLWHEEL; break; + case MODEL_ARTPAD_II: case MODEL_DIGITIZER_II: wacom->dev->name = "Wacom Digitizer II"; wacom->dev->id.version = MODEL_DIGITIZER_II; From d89a25ffd66fbbc8bcfaa881257bab5ac485a176 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Sat, 9 Mar 2019 15:48:04 -0800 Subject: [PATCH 2225/2608] Input: elan_i2c - add id for touchpad found in Lenovo s21e-20 commit e154ab69321ce2c54f19863d75c77b4e2dc9d365 upstream. Lenovo s21e-20 uses ELAN0601 in its ACPI tables for the Elan touchpad. Signed-off-by: Vincent Batts Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index fce70f4ef004..2ce805d31ed1 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1252,6 +1252,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0000", 0 }, { "ELAN0100", 0 }, { "ELAN0600", 0 }, + { "ELAN0601", 0 }, { "ELAN0602", 0 }, { "ELAN0605", 0 }, { "ELAN0608", 0 }, From 81f470ebe37ecb7822f99cc16e7a68477f8071f0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 11 Feb 2019 12:43:23 -0600 Subject: [PATCH 2226/2608] iscsi_ibft: Fix missing break in switch statement commit df997abeebadaa4824271009e2d2b526a70a11cb upstream. Add missing break statement in order to prevent the code from falling through to case ISCSI_BOOT_TGT_NAME, which is unnecessary. This bug was found thanks to the ongoing efforts to enable -Wimplicit-fallthrough. Fixes: b33a84a38477 ("ibft: convert iscsi_ibft module to iscsi boot lib") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/iscsi_ibft.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c index 14042a64bdd5..132b9bae4b6a 100644 --- a/drivers/firmware/iscsi_ibft.c +++ b/drivers/firmware/iscsi_ibft.c @@ -542,6 +542,7 @@ static umode_t __init ibft_check_tgt_for(void *data, int type) case ISCSI_BOOT_TGT_NIC_ASSOC: case ISCSI_BOOT_TGT_CHAP_TYPE: rc = S_IRUGO; + break; case ISCSI_BOOT_TGT_NAME: if (tgt->tgt_name_len) rc = S_IRUGO; From 0472ddf81d02c85d480b470d749599094aaa1bde Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 15 Feb 2019 15:42:42 -0600 Subject: [PATCH 2227/2608] scsi: aacraid: Fix missing break in switch statement commit 5e420fe635813e5746b296cfc8fff4853ae205a2 upstream. Add missing break statement and fix identation issue. This bug was found thanks to the ongoing efforts to enable -Wimplicit-fallthrough. Fixes: 9cb62fa24e0d ("aacraid: Log firmware AIF messages") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/commsup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 3e38bae6ecde..a284527999c5 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1332,8 +1332,9 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr) ADD : DELETE; break; } - case AifBuManagerEvent: - aac_handle_aif_bu(dev, aifcmd); + break; + case AifBuManagerEvent: + aac_handle_aif_bu(dev, aifcmd); break; } From c238ab2fb928b40d8af691b24628ca512e1c62e4 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 24 Jan 2019 08:52:33 +0100 Subject: [PATCH 2228/2608] arm64: dts: hikey: Give wifi some time after power-on commit 83b944174ad79825ae84a47af1a0354485b24602 upstream. Somewhere along recent changes to power control of the wl1835, power-on became very unreliable on the hikey, failing like this: wl1271_sdio: probe of mmc2:0001:1 failed with error -16 wl1271_sdio: probe of mmc2:0001:2 failed with error -16 After playing with some dt parameters and comparing to other users of this chip, it turned out we need some power-on delay to make things stable again. In contrast to those other users which define 200 ms, the hikey would already be happy with 1 ms. Still, we use the safer 10 ms, like on the Ultra96. Fixes: ea452678734e ("arm64: dts: hikey: Fix WiFi support") Cc: #4.12+ Signed-off-by: Jan Kiszka Acked-by: Ulf Hansson Signed-off-by: Wei Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index 3aee6123d161..6887cc1a743d 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -118,6 +118,7 @@ reset-gpios = <&gpio0 5 GPIO_ACTIVE_LOW>; clocks = <&pmic>; clock-names = "ext_clock"; + post-power-on-delay-ms = <10>; power-off-delay-us = <10>; }; From 9e4ce0485a832489da333e735324d3300f4b6cde Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 24 Jan 2019 13:22:57 +0100 Subject: [PATCH 2229/2608] ARM: dts: exynos: Fix pinctrl definition for eMMC RTSN line on Odroid X2/U3 commit ec33745bccc8f336957c751f4153421cc9ef5a54 upstream. Commit 225da7e65a03 ("ARM: dts: add eMMC reset line for exynos4412-odroid-common") added MMC power sequence for eMMC card of Odroid X2/U3. It reused generic sd1_cd pin control configuration node and only disabled pull-up. However that time the pinctrl configuration was not applied during MMC power sequence driver initialization. This has been changed later by commit d97a1e5d7cd2 ("mmc: pwrseq: convert to proper platform device"). It turned out then, that the provided pinctrl configuration is not correct, because the eMMC_RTSN line is being re-configured as 'special function/card detect function for mmc1 controller' not the simple 'output', thus the power sequence driver doesn't really set the pin value. This in effect broke the reboot of Odroid X2/U3 boards. Fix this by providing separate node with eMMC_RTSN pin configuration. Cc: Reported-by: Markus Reichl Suggested-by: Ulf Hansson Fixes: 225da7e65a03 ("ARM: dts: add eMMC reset line for exynos4412-odroid-common") Signed-off-by: Marek Szyprowski Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos4412-odroid-common.dtsi | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi index 102acd78be15..0d516529bf54 100644 --- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi +++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi @@ -60,7 +60,7 @@ }; emmc_pwrseq: pwrseq { - pinctrl-0 = <&sd1_cd>; + pinctrl-0 = <&emmc_rstn>; pinctrl-names = "default"; compatible = "mmc-pwrseq-emmc"; reset-gpios = <&gpk1 2 GPIO_ACTIVE_LOW>; @@ -161,12 +161,6 @@ cpu0-supply = <&buck2_reg>; }; -/* RSTN signal for eMMC */ -&sd1_cd { - samsung,pin-pud = ; - samsung,pin-drv = ; -}; - &pinctrl_1 { gpio_power_key: power_key { samsung,pins = "gpx1-3"; @@ -184,6 +178,11 @@ samsung,pins = "gpx3-7"; samsung,pin-pud = ; }; + + emmc_rstn: emmc-rstn { + samsung,pins = "gpk1-2"; + samsung,pin-pud = ; + }; }; &ehci { From 1b3cd7be3e8bd093cdb165ee3013dcb43ab7eff3 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 15 Feb 2019 11:36:50 +0100 Subject: [PATCH 2230/2608] ARM: dts: exynos: Add minimal clkout parameters to Exynos3250 PMU commit a66352e005488ecb4b534ba1af58a9f671eba9b8 upstream. Add minimal parameters needed by the Exynos CLKOUT driver to Exynos3250 PMU node. This fixes the following warning on boot: exynos_clkout_init: failed to register clkout clock Fixes: d19bb397e19e ("ARM: dts: exynos: Update PMU node with CLKOUT related data") Cc: Signed-off-by: Marek Szyprowski Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos3250.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi index 3ed3d1a0fd40..aa06a02c3ff5 100644 --- a/arch/arm/boot/dts/exynos3250.dtsi +++ b/arch/arm/boot/dts/exynos3250.dtsi @@ -172,6 +172,9 @@ interrupt-controller; #interrupt-cells = <3>; interrupt-parent = <&gic>; + clock-names = "clkout8"; + clocks = <&cmu CLK_FIN_PLL>; + #clock-cells = <1>; }; mipi_phy: video-phy { From 0bcbfa51a77def77d079c6cd8acbd8ebd6a69c66 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 24 Jan 2019 13:06:58 +0100 Subject: [PATCH 2231/2608] drm: disable uncached DMA optimization for ARM and arm64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e02f5c1bb2283cfcee68f2f0feddcc06150f13aa ] The DRM driver stack is designed to work with cache coherent devices only, but permits an optimization to be enabled in some cases, where for some buffers, both the CPU and the GPU use uncached mappings, removing the need for DMA snooping and allocation in the CPU caches. The use of uncached GPU mappings relies on the correct implementation of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU will use cached mappings nonetheless. On x86 platforms, this does not seem to matter, as uncached CPU mappings will snoop the caches in any case. However, on ARM and arm64, enabling this optimization on a platform where NoSnoop is ignored results in loss of coherency, which breaks correct operation of the device. Since we have no way of detecting whether NoSnoop works or not, just disable this optimization entirely for ARM and arm64. Cc: Christian Koenig Cc: Alex Deucher Cc: David Zhou Cc: Huang Rui Cc: Junwei Zhang Cc: Michel Daenzer Cc: David Airlie Cc: Daniel Vetter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Sean Paul Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Will Deacon Cc: Christoph Hellwig Cc: Robin Murphy Cc: amd-gfx list Cc: dri-devel Reported-by: Carsten Haitzler Signed-off-by: Ard Biesheuvel Reviewed-by: Christian König Reviewed-by: Alex Deucher Link: https://patchwork.kernel.org/patch/10778815/ Signed-off-by: Christian König Signed-off-by: Sasha Levin --- include/drm/drm_cache.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h index beab0f0d0cfb..250e2d13c61b 100644 --- a/include/drm/drm_cache.h +++ b/include/drm/drm_cache.h @@ -45,6 +45,24 @@ static inline bool drm_arch_can_wc_memory(void) return false; #elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON3) return false; +#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64) + /* + * The DRM driver stack is designed to work with cache coherent devices + * only, but permits an optimization to be enabled in some cases, where + * for some buffers, both the CPU and the GPU use uncached mappings, + * removing the need for DMA snooping and allocation in the CPU caches. + * + * The use of uncached GPU mappings relies on the correct implementation + * of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU + * will use cached mappings nonetheless. On x86 platforms, this does not + * seem to matter, as uncached CPU mappings will snoop the caches in any + * case. However, on ARM and arm64, enabling this optimization on a + * platform where NoSnoop is ignored results in loss of coherency, which + * breaks correct operation of the device. Since we have no way of + * detecting whether NoSnoop works or not, just disable this + * optimization entirely for ARM and arm64. + */ + return false; #else return true; #endif From 8256eef5a6a5a288f89f4a4160cf19375a2f6603 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 13 Jul 2018 11:12:22 +0100 Subject: [PATCH 2232/2608] ARM: 8781/1: Fix Thumb-2 syscall return for binutils 2.29+ [ Upstream commit afc9f65e01cd114cb2cedf544d22239116ce0cc6 ] When building the kernel as Thumb-2 with binutils 2.29 or newer, if the assembler has seen the .type directive (via ENDPROC()) for a symbol, it automatically handles the setting of the lowest bit when the symbol is used with ADR. The badr macro on the other hand handles this lowest bit manually. This leads to a jump to a wrong address in the wrong state in the syscall return path: Internal error: Oops - undefined instruction: 0 [#2] SMP THUMB2 Modules linked in: CPU: 0 PID: 652 Comm: modprobe Tainted: G D 4.18.0-rc3+ #8 PC is at ret_fast_syscall+0x4/0x62 LR is at sys_brk+0x109/0x128 pc : [<80101004>] lr : [<801c8a35>] psr: 60000013 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 50c5387d Table: 9e82006a DAC: 00000051 Process modprobe (pid: 652, stack limit = 0x(ptrval)) 80101000 : 80101000: b672 cpsid i 80101002: f8d9 2008 ldr.w r2, [r9, #8] 80101006: f1b2 4ffe cmp.w r2, #2130706432 ; 0x7f000000 80101184 : 80101184: f8d9 a000 ldr.w sl, [r9] 80101188: e92d 0030 stmdb sp!, {r4, r5} 8010118c: f01a 0ff0 tst.w sl, #240 ; 0xf0 80101190: d117 bne.n 801011c2 <__sys_trace> 80101192: 46ba mov sl, r7 80101194: f5ba 7fc8 cmp.w sl, #400 ; 0x190 80101198: bf28 it cs 8010119a: f04f 0a00 movcs.w sl, #0 8010119e: f3af 8014 nop.w {20} 801011a2: f2af 1ea2 subw lr, pc, #418 ; 0x1a2 To fix this, add a new symbol name which doesn't have ENDPROC used on it and use that with badr. We can't remove the badr usage since that would would cause breakage with older binutils. Signed-off-by: Vincent Whitchurch Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/kernel/entry-common.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 54c10503d71f..d7dc808a3d15 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -46,6 +46,7 @@ saved_pc .req lr * features make this path too inefficient. */ ret_fast_syscall: +__ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) disable_irq_notrace @ disable interrupts @@ -75,6 +76,7 @@ fast_work_pending: * r0 first to avoid needing to save registers around each C function call. */ ret_fast_syscall: +__ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 @@ -241,7 +243,7 @@ local_restart: tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls? bne __sys_trace - invoke_syscall tbl, scno, r10, ret_fast_syscall + invoke_syscall tbl, scno, r10, __ret_fast_syscall add r1, sp, #S_OFF 2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) From 5a507c217779a6ef65e22bd7d337fed949f61c2b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 6 Mar 2019 15:41:57 +0100 Subject: [PATCH 2233/2608] gfs2: Fix missed wakeups in find_insert_glock commit 605b0487f0bc1ae9963bf52ece0f5c8055186f81 upstream. Mark Syms has reported seeing tasks that are stuck waiting in find_insert_glock. It turns out that struct lm_lockname contains four padding bytes on 64-bit architectures that function glock_waitqueue doesn't skip when hashing the glock name. As a result, we can end up waking up the wrong waitqueue, and the waiting tasks may be stuck forever. Fix that by using ht_parms.key_len instead of sizeof(struct lm_lockname) for the key length. Reported-by: Mark Syms Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/glock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 11066d8647d2..d5284d0dbdb5 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -107,7 +107,7 @@ static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode, static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name) { - u32 hash = jhash2((u32 *)name, sizeof(*name) / 4, 0); + u32 hash = jhash2((u32 *)name, ht_parms.key_len / 4, 0); return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS); } From 3944df7bae9dd13c3bfefd25fe2915e8b04220b3 Mon Sep 17 00:00:00 2001 From: "Daniel F. Dickinson" Date: Sat, 22 Dec 2018 01:09:13 -0500 Subject: [PATCH 2234/2608] ath9k: Avoid OF no-EEPROM quirks without qca,no-eeprom commit ce938231bd3b1d7af3cbd8836f084801090470e1 upstream. ath9k_of_init() function[0] was initially written on the assumption that if someone had an explicit ath9k OF node that "there must be something wrong, why would someone add an OF node if everything is fine"[1] (Quoting Martin Blumenstingl ) "it turns out it's not that simple. with your requirements I'm now aware of two use-cases where the current code in ath9k_of_init() doesn't work without modifications"[1] The "your requirements" Martin speaks of is the result of the fact that I have a device (PowerCloud Systems CR5000) has some kind of default - not unique mac address - set and requires to set the correct MAC address via mac-address devicetree property, however: "some cards come with a physical EEPROM chip [or OTP] so "qca,no-eeprom" should not be set (your use-case). in this case AH_USE_EEPROM should be set (which is the default when there is no OF node)"[1] The other use case is: the firmware on some PowerMac G5 seems to add a OF node for the ath9k card automatically. depending on the EEPROM on the card AH_NO_EEP_SWAP should be unset (which is the default when there is no OF node). see [3] After this patch to ath9k_of_init() the new behavior will be: if there's no OF node then everything is the same as before if there's an empty OF node then ath9k will use the hardware EEPROM (before ath9k would fail to initialize because no EEPROM data was provided by userspace) if there's an OF node with only a MAC address then ath9k will use the MAC address and the hardware EEPROM (see the case above) with "qca,no-eeprom" EEPROM data from userspace will be requested. the behavior here will not change [1] Martin provides additional background on EEPROM swapping[1]. Thanks to Christian Lamparter for all his help on troubleshooting this issue and the basis for this patch. [0]https://elixir.bootlin.com/linux/v4.20-rc7/source/drivers/net/wireless/ath/ath9k/init.c#L615 [1]https://github.com/openwrt/openwrt/pull/1645#issuecomment-448027058 [2]https://github.com/openwrt/openwrt/pull/1613 [3]https://patchwork.kernel.org/patch/10241731/ Fixes: 138b41253d9c ("ath9k: parse the device configuration from an OF node") Reviewed-by: Martin Blumenstingl Tested-by: Martin Blumenstingl Signed-off-by: Daniel F. Dickinson Signed-off-by: Kalle Valo Cc: Christian Lamparter Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index bb7936090b91..b8e520fc2870 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -580,15 +580,15 @@ static int ath9k_of_init(struct ath_softc *sc) ret = ath9k_eeprom_request(sc, eeprom_name); if (ret) return ret; + + ah->ah_flags &= ~AH_USE_EEPROM; + ah->ah_flags |= AH_NO_EEP_SWAP; } mac = of_get_mac_address(np); if (mac) ether_addr_copy(common->macaddr, mac); - ah->ah_flags &= ~AH_USE_EEPROM; - ah->ah_flags |= AH_NO_EEP_SWAP; - return 0; } From 6f166975a72c85094ca5364f85efd46b36f0f86a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 7 Feb 2019 20:36:53 +0100 Subject: [PATCH 2235/2608] driver core: Postpone DMA tear-down until after devres release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 376991db4b6464e906d699ef07681e2ffa8ab08c upstream. When unbinding the (IOMMU-enabled) R-Car SATA device on Salvator-XS (R-Car H3 ES2.0), in preparation of rebinding against vfio-platform for device pass-through for virtualization:     echo ee300000.sata > /sys/bus/platform/drivers/sata_rcar/unbind the kernel crashes with:     Unable to handle kernel paging request at virtual address ffffffbf029ffffc     Mem abort info:       ESR = 0x96000006       Exception class = DABT (current EL), IL = 32 bits       SET = 0, FnV = 0       EA = 0, S1PTW = 0     Data abort info:       ISV = 0, ISS = 0x00000006       CM = 0, WnR = 0     swapper pgtable: 4k pages, 39-bit VAs, pgdp = 000000007e8c586c     [ffffffbf029ffffc] pgd=000000073bfc6003, pud=000000073bfc6003, pmd=0000000000000000     Internal error: Oops: 96000006 [#1] SMP     Modules linked in:     CPU: 0 PID: 1098 Comm: bash Not tainted 5.0.0-rc5-salvator-x-00452-g37596f884f4318ef #287     Hardware name: Renesas Salvator-X 2nd version board based on r8a7795 ES2.0+ (DT)     pstate: 60400005 (nZCv daif +PAN -UAO)     pc : __free_pages+0x8/0x58     lr : __dma_direct_free_pages+0x50/0x5c     sp : ffffff801268baa0     x29: ffffff801268baa0 x28: 0000000000000000     x27: ffffffc6f9c60bf0 x26: ffffffc6f9c60bf0     x25: ffffffc6f9c60810 x24: 0000000000000000     x23: 00000000fffff000 x22: ffffff8012145000     x21: 0000000000000800 x20: ffffffbf029fffc8     x19: 0000000000000000 x18: ffffffc6f86c42c8     x17: 0000000000000000 x16: 0000000000000070     x15: 0000000000000003 x14: 0000000000000000     x13: ffffff801103d7f8 x12: 0000000000000028     x11: ffffff8011117604 x10: 0000000000009ad8     x9 : ffffff80110126d0 x8 : ffffffc6f7563000     x7 : 6b6b6b6b6b6b6b6b x6 : 0000000000000018     x5 : ffffff8011cf3cc8 x4 : 0000000000004000     x3 : 0000000000080000 x2 : 0000000000000001     x1 : 0000000000000000 x0 : ffffffbf029fffc8     Process bash (pid: 1098, stack limit = 0x00000000c38e3e32)     Call trace:      __free_pages+0x8/0x58      __dma_direct_free_pages+0x50/0x5c      arch_dma_free+0x1c/0x98      dma_direct_free+0x14/0x24      dma_free_attrs+0x9c/0xdc      dmam_release+0x18/0x20      release_nodes+0x25c/0x28c      devres_release_all+0x48/0x4c      device_release_driver_internal+0x184/0x1f0      device_release_driver+0x14/0x1c      unbind_store+0x70/0xb8      drv_attr_store+0x24/0x34      sysfs_kf_write+0x4c/0x64      kernfs_fop_write+0x154/0x1c4      __vfs_write+0x34/0x164      vfs_write+0xb4/0x16c      ksys_write+0x5c/0xbc      __arm64_sys_write+0x14/0x1c      el0_svc_common+0x98/0x114      el0_svc_handler+0x1c/0x24      el0_svc+0x8/0xc     Code: d51b4234 17fffffa a9bf7bfd 910003fd (b9403404)     ---[ end trace 8c564cdd3a1a840f ]--- While I've bisected this to commit e8e683ae9a736407 ("iommu/of: Fix probe-deferral"), and reverting that commit on post-v5.0-rc4 kernels does fix the problem, this turned out to be a red herring. On arm64, arch_teardown_dma_ops() resets dev->dma_ops to NULL. Hence if a driver has used a managed DMA allocation API, the allocated DMA memory will be freed using the direct DMA ops, while it may have been allocated using a custom DMA ops (iommu_dma_ops in this case). Fix this by reversing the order of the calls to devres_release_all() and arch_teardown_dma_ops(). Signed-off-by: Geert Uytterhoeven Acked-by: Christoph Hellwig Reviewed-by: Rafael J. Wysocki Cc: stable Reviewed-by: Robin Murphy [rm: backport for 4.12-4.19 - kernels before 5.0 will not see the crash above, but may get silent memory corruption instead] Signed-off-by: Robin Murphy Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index d928cc6d0638..cb3672cfdaaa 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -850,9 +850,9 @@ static void __device_release_driver(struct device *dev, struct device *parent) drv->remove(dev); device_links_driver_cleanup(dev); - dma_deconfigure(dev); devres_release_all(dev); + dma_deconfigure(dev); dev->driver = NULL; dev_set_drvdata(dev, NULL); if (dev->pm_domain && dev->pm_domain->dismiss) From 3abe75e37347254f6d5401b4da267c20e220b8f5 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Tue, 5 Mar 2019 22:23:15 +0100 Subject: [PATCH 2236/2608] perf/x86/intel: Make cpuc allocations consistent commit d01b1f96a82e5dd7841a1d39db3abfdaf95f70ab upstream The cpuc data structure allocation is different between fake and real cpuc's; use the same code to init/free both. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/core.c | 13 +++++-------- arch/x86/events/intel/core.c | 29 ++++++++++++++++++----------- arch/x86/events/perf_event.h | 11 ++++++++--- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index e14a39598e8a..65e44f0588e2 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1968,7 +1968,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu) */ static void free_fake_cpuc(struct cpu_hw_events *cpuc) { - kfree(cpuc->shared_regs); + intel_cpuc_finish(cpuc); kfree(cpuc); } @@ -1980,14 +1980,11 @@ static struct cpu_hw_events *allocate_fake_cpuc(void) cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL); if (!cpuc) return ERR_PTR(-ENOMEM); - - /* only needed, if we have extra_regs */ - if (x86_pmu.extra_regs) { - cpuc->shared_regs = allocate_shared_regs(cpu); - if (!cpuc->shared_regs) - goto error; - } cpuc->is_fake = 1; + + if (intel_cpuc_prepare(cpuc, cpu)) + goto error; + return cpuc; error: free_fake_cpuc(cpuc); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 9f556c94a0b8..b1bec46698cc 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3262,7 +3262,7 @@ ssize_t intel_event_sysfs_show(char *page, u64 config) return x86_event_sysfs_show(page, config, event); } -struct intel_shared_regs *allocate_shared_regs(int cpu) +static struct intel_shared_regs *allocate_shared_regs(int cpu) { struct intel_shared_regs *regs; int i; @@ -3294,10 +3294,9 @@ static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu) return c; } -static int intel_pmu_cpu_prepare(int cpu) -{ - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); +int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) +{ if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { cpuc->shared_regs = allocate_shared_regs(cpu); if (!cpuc->shared_regs) @@ -3307,7 +3306,7 @@ static int intel_pmu_cpu_prepare(int cpu) if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); - cpuc->constraint_list = kzalloc(sz, GFP_KERNEL); + cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); if (!cpuc->constraint_list) goto err_shared_regs; @@ -3332,6 +3331,11 @@ err: return -ENOMEM; } +static int intel_pmu_cpu_prepare(int cpu) +{ + return intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu); +} + static void flip_smm_bit(void *data) { unsigned long set = *(unsigned long *)data; @@ -3403,9 +3407,8 @@ static void intel_pmu_cpu_starting(int cpu) } } -static void free_excl_cntrs(int cpu) +static void free_excl_cntrs(struct cpu_hw_events *cpuc) { - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); struct intel_excl_cntrs *c; c = cpuc->excl_cntrs; @@ -3423,9 +3426,8 @@ static void intel_pmu_cpu_dying(int cpu) fini_debug_store_on_cpu(cpu); } -static void intel_pmu_cpu_dead(int cpu) +void intel_cpuc_finish(struct cpu_hw_events *cpuc) { - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); struct intel_shared_regs *pc; pc = cpuc->shared_regs; @@ -3435,7 +3437,12 @@ static void intel_pmu_cpu_dead(int cpu) cpuc->shared_regs = NULL; } - free_excl_cntrs(cpu); + free_excl_cntrs(cpuc); +} + +static void intel_pmu_cpu_dead(int cpu) +{ + intel_cpuc_finish(&per_cpu(cpu_hw_events, cpu)); } static void intel_pmu_sched_task(struct perf_event_context *ctx, @@ -4494,7 +4501,7 @@ static __init int fixup_ht_bug(void) hardlockup_detector_perf_restart(); for_each_online_cpu(c) - free_excl_cntrs(c); + free_excl_cntrs(&per_cpu(cpu_hw_events, c)); cpus_read_unlock(); pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n"); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index fbbc10338987..ee96c4883188 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -880,7 +880,8 @@ struct event_constraint * x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event); -struct intel_shared_regs *allocate_shared_regs(int cpu); +extern int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu); +extern void intel_cpuc_finish(struct cpu_hw_events *cpuc); int intel_pmu_init(void); @@ -1014,9 +1015,13 @@ static inline int intel_pmu_init(void) return 0; } -static inline struct intel_shared_regs *allocate_shared_regs(int cpu) +static inline int intel_cpuc_prepare(struct cpu_hw_event *cpuc, int cpu) +{ + return 0; +} + +static inline void intel_cpuc_finish(struct cpu_hw_event *cpuc) { - return NULL; } static inline int is_ht_workaround_enabled(void) From 2a78ea141f9088dd4e6745c07dd3971162a73930 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Tue, 5 Mar 2019 22:23:16 +0100 Subject: [PATCH 2237/2608] perf/x86/intel: Generalize dynamic constraint creation commit 11f8b2d65ca9029591c8df26bb6bd063c312b7fe upstream Such that we can re-use it. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 51 +++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index b1bec46698cc..3c29e7fe53f1 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2639,6 +2639,35 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc) raw_spin_unlock(&excl_cntrs->lock); } +static struct event_constraint * +dyn_constraint(struct cpu_hw_events *cpuc, struct event_constraint *c, int idx) +{ + WARN_ON_ONCE(!cpuc->constraint_list); + + if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) { + struct event_constraint *cx; + + /* + * grab pre-allocated constraint entry + */ + cx = &cpuc->constraint_list[idx]; + + /* + * initialize dynamic constraint + * with static constraint + */ + *cx = *c; + + /* + * mark constraint as dynamic + */ + cx->flags |= PERF_X86_EVENT_DYNAMIC; + c = cx; + } + + return c; +} + static struct event_constraint * intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, int idx, struct event_constraint *c) @@ -2669,27 +2698,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, * only needed when constraint has not yet * been cloned (marked dynamic) */ - if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) { - struct event_constraint *cx; - - /* - * grab pre-allocated constraint entry - */ - cx = &cpuc->constraint_list[idx]; - - /* - * initialize dynamic constraint - * with static constraint - */ - *cx = *c; - - /* - * mark constraint as dynamic, so we - * can free it later on - */ - cx->flags |= PERF_X86_EVENT_DYNAMIC; - c = cx; - } + c = dyn_constraint(cpuc, c, idx); /* * From here on, the constraint is dynamic. From 0e6487a0c7a8099231e0c14312a31efcf5f38e4e Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Tue, 5 Mar 2019 22:23:17 +0100 Subject: [PATCH 2238/2608] x86: Add TSX Force Abort CPUID/MSR commit 52f64909409c17adf54fcf5f9751e0544ca3a6b4 upstream Skylake systems will receive a microcode update to address a TSX errata. This microcode will (by default) clobber PMC3 when TSX instructions are (speculatively or not) executed. It also provides an MSR to cause all TSX transaction to abort and preserve PMC3. Add the CPUID enumeration and MSR definition. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 7d910827126b..e90940ecb436 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -339,6 +339,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index fed3636dce9a..b0df002c60df 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -610,6 +610,12 @@ #define MSR_IA32_TSC_DEADLINE 0x000006E0 + +#define MSR_TSX_FORCE_ABORT 0x0000010F + +#define MSR_TFA_RTM_FORCE_ABORT_BIT 0 +#define MSR_TFA_RTM_FORCE_ABORT BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT) + /* P4/Xeon+ specific */ #define MSR_IA32_MCG_EAX 0x00000180 #define MSR_IA32_MCG_EBX 0x00000181 From d7ec8d8cc95f2899e7047fb5eda53d7f48553ffe Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Tue, 5 Mar 2019 22:23:18 +0100 Subject: [PATCH 2239/2608] perf/x86/intel: Implement support for TSX Force Abort commit 400816f60c543153656ac74eaf7f36f6b7202378 upstream Skylake (and later) will receive a microcode update to address a TSX errata. This microcode will, on execution of a TSX instruction (speculative or not) use (clobber) PMC3. This update will also provide a new MSR to change this behaviour along with a CPUID bit to enumerate the presence of this new MSR. When the MSR gets set; the microcode will no longer use PMC3 but will Force Abort every TSX transaction (upon executing COMMIT). When TSX Force Abort (TFA) is allowed (default); the MSR gets set when PMC3 gets scheduled and cleared when, after scheduling, PMC3 is unused. When TFA is not allowed; clear PMC3 from all constraints such that it will not get used. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 74 ++++++++++++++++++++++++++++++++++-- arch/x86/events/perf_event.h | 6 +++ 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 3c29e7fe53f1..65a369a42338 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1995,6 +1995,39 @@ static void intel_pmu_nhm_enable_all(int added) intel_pmu_enable_all(added); } +static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on) +{ + u64 val = on ? MSR_TFA_RTM_FORCE_ABORT : 0; + + if (cpuc->tfa_shadow != val) { + cpuc->tfa_shadow = val; + wrmsrl(MSR_TSX_FORCE_ABORT, val); + } +} + +static void intel_tfa_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) +{ + /* + * We're going to use PMC3, make sure TFA is set before we touch it. + */ + if (cntr == 3 && !cpuc->is_fake) + intel_set_tfa(cpuc, true); +} + +static void intel_tfa_pmu_enable_all(int added) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * If we find PMC3 is no longer used when we enable the PMU, we can + * clear TFA. + */ + if (!test_bit(3, cpuc->active_mask)) + intel_set_tfa(cpuc, false); + + intel_pmu_enable_all(added); +} + static inline u64 intel_pmu_get_status(void) { u64 status; @@ -3218,6 +3251,26 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx, return c; } +static bool allow_tsx_force_abort = true; + +static struct event_constraint * +tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c = hsw_get_event_constraints(cpuc, idx, event); + + /* + * Without TFA we must not use PMC3. + */ + if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) { + c = dyn_constraint(cpuc, c, idx); + c->idxmsk64 &= ~(1ULL << 3); + c->weight--; + } + + return c; +} + /* * Broadwell: * @@ -3312,13 +3365,15 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) goto err; } - if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { + if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) { size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); if (!cpuc->constraint_list) goto err_shared_regs; + } + if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { cpuc->excl_cntrs = allocate_excl_cntrs(cpu); if (!cpuc->excl_cntrs) goto err_constraint_list; @@ -3425,9 +3480,10 @@ static void free_excl_cntrs(struct cpu_hw_events *cpuc) if (c->core_id == -1 || --c->refcnt == 0) kfree(c); cpuc->excl_cntrs = NULL; - kfree(cpuc->constraint_list); - cpuc->constraint_list = NULL; } + + kfree(cpuc->constraint_list); + cpuc->constraint_list = NULL; } static void intel_pmu_cpu_dying(int cpu) @@ -3912,8 +3968,11 @@ static struct attribute *intel_pmu_caps_attrs[] = { NULL }; +DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort); + static struct attribute *intel_pmu_attrs[] = { &dev_attr_freeze_on_smi.attr, + NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */ NULL, }; @@ -4369,6 +4428,15 @@ __init int intel_pmu_init(void) x86_pmu.cpu_events = get_hsw_events_attrs(); intel_pmu_pebs_data_source_skl( boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); + + if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) { + x86_pmu.flags |= PMU_FL_TFA; + x86_pmu.get_event_constraints = tfa_get_event_constraints; + x86_pmu.enable_all = intel_tfa_pmu_enable_all; + x86_pmu.commit_scheduling = intel_tfa_commit_scheduling; + intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr; + } + pr_cont("Skylake events, "); name = "skylake"; break; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ee96c4883188..9702f4ed4748 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -238,6 +238,11 @@ struct cpu_hw_events { struct intel_excl_cntrs *excl_cntrs; int excl_thread_id; /* 0 or 1 */ + /* + * SKL TSX_FORCE_ABORT shadow + */ + u64 tfa_shadow; + /* * AMD specific bits */ @@ -672,6 +677,7 @@ do { \ #define PMU_FL_HAS_RSP_1 0x2 /* has 2 equivalent offcore_rsp regs */ #define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */ #define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */ +#define PMU_FL_TFA 0x20 /* deal with TSX force abort */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr From d9896164529697fade02aafc65a06722f7191d68 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Mar 2019 14:03:24 -0700 Subject: [PATCH 2240/2608] Linux 4.14.106 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d5375891a7eb..ecc3a2a82a49 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 105 +SUBLEVEL = 106 EXTRAVERSION = NAME = Petit Gorille From 785eb09ceb61a9c7d2a6c72e033bef04a89f1778 Mon Sep 17 00:00:00 2001 From: Erik Schmauss Date: Fri, 10 Aug 2018 14:43:02 -0700 Subject: [PATCH 2241/2608] ACPICA: Reference Counts: increase max to 0x4000 for large servers commit 8b23570ab001c1982c8a068cde468ff067255314 upstream. Increase the reference count limit to 0x4000 as the current one is not sufficient for some large server systems. Reviewed-by: Dimitri Sivanich Tested-by: Russ Anderson Reported-by: Mike Travis Signed-off-by: Mike Travis Signed-off-by: Erik Schmauss [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki Cc: Frank van der Linden Signed-off-by: Greg Kroah-Hartman --- include/acpi/acconfig.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h index 6db3b4668b1a..707ce8aca824 100644 --- a/include/acpi/acconfig.h +++ b/include/acpi/acconfig.h @@ -123,7 +123,7 @@ /* Maximum object reference count (detects object deletion issues) */ -#define ACPI_MAX_REFERENCE_COUNT 0x1000 +#define ACPI_MAX_REFERENCE_COUNT 0x4000 /* Default page size for use in mapping memory for operation regions */ From 1ce1eb5fb1daa64eba1dc1d58b13fb3b327be8ed Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 6 Dec 2017 01:50:40 +0000 Subject: [PATCH 2242/2608] perf tools: Fix compile error with libunwind x86 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 44df1afdb174fd6038e419f80efd914c0b5f2f85 upstream. Fix a compile error: ... CC util/libunwind/x86_32.o In file included from util/libunwind/x86_32.c:33:0: util/libunwind/../../arch/x86/util/unwind-libunwind.c: In function 'libunwind__x86_reg_id': util/libunwind/../../arch/x86/util/unwind-libunwind.c:110:11: error: 'EINVAL' undeclared (first use in this function) return -EINVAL; ^ util/libunwind/../../arch/x86/util/unwind-libunwind.c:110:11: note: each undeclared identifier is reported only once for each function it appears in mv: cannot stat 'util/libunwind/.x86_32.o.tmp': No such file or directory make[4]: *** [util/libunwind/x86_32.o] Error 1 make[3]: *** [util] Error 2 make[2]: *** [libperf-in.o] Error 2 make[1]: *** [sub-make] Error 2 make: *** [all] Error 2 It happens when libunwind-x86 feature is detected. Signed-off-by: Wang Nan Link: http://lkml.kernel.org/r/20171206015040.114574-1-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo Cc: Daniel Díaz Signed-off-by: Greg Kroah-Hartman --- tools/perf/arch/x86/util/unwind-libunwind.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c index 9c917f80c906..05920e3edf7a 100644 --- a/tools/perf/arch/x86/util/unwind-libunwind.c +++ b/tools/perf/arch/x86/util/unwind-libunwind.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -#ifndef REMOTE_UNWIND_LIBUNWIND #include +#ifndef REMOTE_UNWIND_LIBUNWIND #include #include "perf_regs.h" #include "../../util/unwind.h" From 136e109797f3e9e61076a401eb1353362be592b4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 10 Mar 2019 10:39:37 -0700 Subject: [PATCH 2243/2608] gro_cells: make sure device is up in gro_cells_receive() [ Upstream commit 2a5ff07a0eb945f291e361aa6f6becca8340ba46 ] We keep receiving syzbot reports [1] that show that tunnels do not play the rcu/IFF_UP rules properly. At device dismantle phase, gro_cells_destroy() will be called only after a full rcu grace period is observed after IFF_UP has been cleared. This means that IFF_UP needs to be tested before queueing packets into netif_rx() or gro_cells. This patch implements the test in gro_cells_receive() because too many callers do not seem to bother enough. [1] BUG: unable to handle kernel paging request at fffff4ca0b9ffffe PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 21 Comm: kworker/u4:1 Not tainted 5.0.0+ #97 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net RIP: 0010:__skb_unlink include/linux/skbuff.h:1929 [inline] RIP: 0010:__skb_dequeue include/linux/skbuff.h:1945 [inline] RIP: 0010:__skb_queue_purge include/linux/skbuff.h:2656 [inline] RIP: 0010:gro_cells_destroy net/core/gro_cells.c:89 [inline] RIP: 0010:gro_cells_destroy+0x19d/0x360 net/core/gro_cells.c:78 Code: 03 42 80 3c 20 00 0f 85 53 01 00 00 48 8d 7a 08 49 8b 47 08 49 c7 07 00 00 00 00 48 89 f9 49 c7 47 08 00 00 00 00 48 c1 e9 03 <42> 80 3c 21 00 0f 85 10 01 00 00 48 89 c1 48 89 42 08 48 c1 e9 03 RSP: 0018:ffff8880aa3f79a8 EFLAGS: 00010a02 RAX: 00ffffffffffffe8 RBX: ffffe8ffffc64b70 RCX: 1ffff8ca0b9ffffe RDX: ffffc6505cffffe8 RSI: ffffffff858410ca RDI: ffffc6505cfffff0 RBP: ffff8880aa3f7a08 R08: ffff8880aa3e8580 R09: fffffbfff1263645 R10: fffffbfff1263644 R11: ffffffff8931b223 R12: dffffc0000000000 R13: 0000000000000000 R14: ffffe8ffffc64b80 R15: ffffe8ffffc64b75 kobject: 'loop2' (000000004bd7d84a): kobject_uevent_env FS: 0000000000000000(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffff4ca0b9ffffe CR3: 0000000094941000 CR4: 00000000001406f0 Call Trace: kobject: 'loop2' (000000004bd7d84a): fill_kobj_path: path = '/devices/virtual/block/loop2' ip_tunnel_dev_free+0x19/0x60 net/ipv4/ip_tunnel.c:1010 netdev_run_todo+0x51c/0x7d0 net/core/dev.c:8970 rtnl_unlock+0xe/0x10 net/core/rtnetlink.c:116 ip_tunnel_delete_nets+0x423/0x5f0 net/ipv4/ip_tunnel.c:1124 vti_exit_batch_net+0x23/0x30 net/ipv4/ip_vti.c:495 ops_exit_list.isra.0+0x105/0x160 net/core/net_namespace.c:156 cleanup_net+0x3fb/0x960 net/core/net_namespace.c:551 process_one_work+0x98e/0x1790 kernel/workqueue.c:2173 worker_thread+0x98/0xe40 kernel/workqueue.c:2319 kthread+0x357/0x430 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 Modules linked in: CR2: fffff4ca0b9ffffe [ end trace 513fc9c1338d1cb3 ] RIP: 0010:__skb_unlink include/linux/skbuff.h:1929 [inline] RIP: 0010:__skb_dequeue include/linux/skbuff.h:1945 [inline] RIP: 0010:__skb_queue_purge include/linux/skbuff.h:2656 [inline] RIP: 0010:gro_cells_destroy net/core/gro_cells.c:89 [inline] RIP: 0010:gro_cells_destroy+0x19d/0x360 net/core/gro_cells.c:78 Code: 03 42 80 3c 20 00 0f 85 53 01 00 00 48 8d 7a 08 49 8b 47 08 49 c7 07 00 00 00 00 48 89 f9 49 c7 47 08 00 00 00 00 48 c1 e9 03 <42> 80 3c 21 00 0f 85 10 01 00 00 48 89 c1 48 89 42 08 48 c1 e9 03 RSP: 0018:ffff8880aa3f79a8 EFLAGS: 00010a02 RAX: 00ffffffffffffe8 RBX: ffffe8ffffc64b70 RCX: 1ffff8ca0b9ffffe RDX: ffffc6505cffffe8 RSI: ffffffff858410ca RDI: ffffc6505cfffff0 RBP: ffff8880aa3f7a08 R08: ffff8880aa3e8580 R09: fffffbfff1263645 R10: fffffbfff1263644 R11: ffffffff8931b223 R12: dffffc0000000000 kobject: 'loop3' (00000000e4ee57a6): kobject_uevent_env R13: 0000000000000000 R14: ffffe8ffffc64b80 R15: ffffe8ffffc64b75 FS: 0000000000000000(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffff4ca0b9ffffe CR3: 0000000094941000 CR4: 00000000001406f0 Fixes: c9e6bc644e55 ("net: add gro_cells infrastructure") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/gro_cells.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index acf45ddbe924..e095fb871d91 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -13,22 +13,36 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) { struct net_device *dev = skb->dev; struct gro_cell *cell; + int res; - if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) - return netif_rx(skb); + rcu_read_lock(); + if (unlikely(!(dev->flags & IFF_UP))) + goto drop; + + if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) { + res = netif_rx(skb); + goto unlock; + } cell = this_cpu_ptr(gcells->cells); if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { +drop: atomic_long_inc(&dev->rx_dropped); kfree_skb(skb); - return NET_RX_DROP; + res = NET_RX_DROP; + goto unlock; } __skb_queue_tail(&cell->napi_skbs, skb); if (skb_queue_len(&cell->napi_skbs) == 1) napi_schedule(&cell->napi); - return NET_RX_SUCCESS; + + res = NET_RX_SUCCESS; + +unlock: + rcu_read_unlock(); + return res; } EXPORT_SYMBOL(gro_cells_receive); From c8c6b846249b8cfe825d86e71df9cbbea0d9c01c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 6 Mar 2019 10:42:53 +0100 Subject: [PATCH 2244/2608] ipv4/route: fail early when inet dev is missing [ Upstream commit 22c74764aa2943ecdf9f07c900d8a9c8ba6c9265 ] If a non local multicast packet reaches ip_route_input_rcu() while the ingress device IPv4 private data (in_dev) is NULL, we end up doing a NULL pointer dereference in IN_DEV_MFORWARD(). Since the later call to ip_route_input_mc() is going to fail if !in_dev, we can fail early in such scenario and avoid the dangerous code path. v1 -> v2: - clarified the commit message, no code changes Reported-by: Tianhao Zhao Fixes: e58e41596811 ("net: Enable support for VRF with ipv4 multicast") Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cb30f4e4e553..0a35d294abec 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2123,12 +2123,13 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, int our = 0; int err = -EINVAL; - if (in_dev) - our = ip_check_mc_rcu(in_dev, daddr, saddr, - ip_hdr(skb)->protocol); + if (!in_dev) + return err; + our = ip_check_mc_rcu(in_dev, daddr, saddr, + ip_hdr(skb)->protocol); /* check l3 master if no match yet */ - if ((!in_dev || !our) && netif_is_l3_slave(dev)) { + if (!our && netif_is_l3_slave(dev)) { struct in_device *l3_in_dev; l3_in_dev = __in_dev_get_rcu(skb->dev); From af6822a7915acf7eacd84cabf4a1c9013345aed0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Mar 2019 06:50:11 -0700 Subject: [PATCH 2245/2608] l2tp: fix infoleak in l2tp_ip6_recvmsg() [ Upstream commit 163d1c3d6f17556ed3c340d3789ea93be95d6c28 ] Back in 2013 Hannes took care of most of such leaks in commit bceaa90240b6 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") But the bug in l2tp_ip6_recvmsg() has not been fixed. syzbot report : BUG: KMSAN: kernel-infoleak in _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32 CPU: 1 PID: 10996 Comm: syz-executor362 Not tainted 5.0.0+ #11 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:600 kmsan_internal_check_memory+0x9f4/0xb10 mm/kmsan/kmsan.c:694 kmsan_copy_to_user+0xab/0xc0 mm/kmsan/kmsan_hooks.c:601 _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32 copy_to_user include/linux/uaccess.h:174 [inline] move_addr_to_user+0x311/0x570 net/socket.c:227 ___sys_recvmsg+0xb65/0x1310 net/socket.c:2283 do_recvmmsg+0x646/0x10c0 net/socket.c:2390 __sys_recvmmsg net/socket.c:2469 [inline] __do_sys_recvmmsg net/socket.c:2492 [inline] __se_sys_recvmmsg+0x1d1/0x350 net/socket.c:2485 __x64_sys_recvmmsg+0x62/0x80 net/socket.c:2485 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x445819 Code: e8 6c b6 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b 12 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f64453eddb8 EFLAGS: 00000246 ORIG_RAX: 000000000000012b RAX: ffffffffffffffda RBX: 00000000006dac28 RCX: 0000000000445819 RDX: 0000000000000005 RSI: 0000000020002f80 RDI: 0000000000000003 RBP: 00000000006dac20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dac2c R13: 00007ffeba8f87af R14: 00007f64453ee9c0 R15: 20c49ba5e353f7cf Local variable description: ----addr@___sys_recvmsg Variable was created at: ___sys_recvmsg+0xf6/0x1310 net/socket.c:2244 do_recvmmsg+0x646/0x10c0 net/socket.c:2390 Bytes 0-31 of 32 are uninitialized Memory access of size 32 starts at ffff8880ae62fbb0 Data copied to user address 0000000020000000 Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ip6.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 3c77507601c7..bec13226ce4f 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -684,9 +684,6 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*lsa); - if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len, addr_len); @@ -716,6 +713,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, lsa->l2tp_conn_id = 0; if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) lsa->l2tp_scope_id = inet6_iif(skb); + *addr_len = sizeof(*lsa); } if (np->rxopt.all) From 1aa1e0a3f6dde5c37bb5d6a9121847b95b255c1f Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Wed, 6 Mar 2019 22:45:01 +0800 Subject: [PATCH 2246/2608] net: hsr: fix memory leak in hsr_dev_finalize() [ Upstream commit 6caabe7f197d3466d238f70915d65301f1716626 ] If hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER) failed to add port, it directly returns res and forgets to free the node that allocated in hsr_create_self_node(), and forgets to delete the node->mac_list linked in hsr->self_node_db. BUG: memory leak unreferenced object 0xffff8881cfa0c780 (size 64): comm "syz-executor.0", pid 2077, jiffies 4294717969 (age 2415.377s) hex dump (first 32 bytes): e0 c7 a0 cf 81 88 ff ff 00 02 00 00 00 00 ad de ................ 00 e6 49 cd 81 88 ff ff c0 9b 87 d0 81 88 ff ff ..I............. backtrace: [<00000000e2ff5070>] hsr_dev_finalize+0x736/0x960 [hsr] [<000000003ed2e597>] hsr_newlink+0x2b2/0x3e0 [hsr] [<000000003fa8c6b6>] __rtnl_newlink+0xf1f/0x1600 net/core/rtnetlink.c:3182 [<000000001247a7ad>] rtnl_newlink+0x66/0x90 net/core/rtnetlink.c:3240 [<00000000e7d1b61d>] rtnetlink_rcv_msg+0x54e/0xb90 net/core/rtnetlink.c:5130 [<000000005556bd3a>] netlink_rcv_skb+0x129/0x340 net/netlink/af_netlink.c:2477 [<00000000741d5ee6>] netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] [<00000000741d5ee6>] netlink_unicast+0x49a/0x650 net/netlink/af_netlink.c:1336 [<000000009d56f9b7>] netlink_sendmsg+0x88b/0xdf0 net/netlink/af_netlink.c:1917 [<0000000046b35c59>] sock_sendmsg_nosec net/socket.c:621 [inline] [<0000000046b35c59>] sock_sendmsg+0xc3/0x100 net/socket.c:631 [<00000000d208adc9>] __sys_sendto+0x33e/0x560 net/socket.c:1786 [<00000000b582837a>] __do_sys_sendto net/socket.c:1798 [inline] [<00000000b582837a>] __se_sys_sendto net/socket.c:1794 [inline] [<00000000b582837a>] __x64_sys_sendto+0xdd/0x1b0 net/socket.c:1794 [<00000000c866801d>] do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 [<00000000fea382d9>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<00000000e01dacb3>] 0xffffffffffffffff Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.") Reported-by: Hulk Robot Signed-off-by: Mao Wenan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/hsr/hsr_device.c | 4 +++- net/hsr/hsr_framereg.c | 12 ++++++++++++ net/hsr/hsr_framereg.h | 1 + 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 172d8309f89e..0e0b2a09c453 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -487,7 +487,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER); if (res) - return res; + goto err_add_port; res = register_netdevice(hsr_dev); if (res) @@ -507,6 +507,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], fail: hsr_for_each_port(hsr, port) hsr_del_port(port); +err_add_port: + hsr_del_node(&hsr->self_node_db); return res; } diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 284a9b820df8..6705420b3111 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -124,6 +124,18 @@ int hsr_create_self_node(struct list_head *self_node_db, return 0; } +void hsr_del_node(struct list_head *self_node_db) +{ + struct hsr_node *node; + + rcu_read_lock(); + node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list); + rcu_read_unlock(); + if (node) { + list_del_rcu(&node->mac_list); + kfree(node); + } +} /* Allocate an hsr_node and add it to node_db. 'addr' is the node's AddressA; * seq_out is used to initialize filtering of outgoing duplicate frames diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 4e04f0e868e9..43958a338095 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -16,6 +16,7 @@ struct hsr_node; +void hsr_del_node(struct list_head *self_node_db); struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[], u16 seq_out); struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, From 60001460c89d2c92fe53e8b1865a1ddec7fb01eb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Mar 2019 09:36:33 -0800 Subject: [PATCH 2247/2608] net/hsr: fix possible crash in add_timer() [ Upstream commit 1e027960edfaa6a43f9ca31081729b716598112b ] syzbot found another add_timer() issue, this time in net/hsr [1] Let's use mod_timer() which is safe. [1] kernel BUG at kernel/time/timer.c:1136! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 15909 Comm: syz-executor.3 Not tainted 5.0.0+ #97 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 kobject: 'loop2' (00000000f5629718): kobject_uevent_env RIP: 0010:add_timer kernel/time/timer.c:1136 [inline] RIP: 0010:add_timer+0x654/0xbe0 kernel/time/timer.c:1134 Code: 0f 94 c5 31 ff 44 89 ee e8 09 61 0f 00 45 84 ed 0f 84 77 fd ff ff e8 bb 5f 0f 00 e8 07 10 a0 ff e9 68 fd ff ff e8 ac 5f 0f 00 <0f> 0b e8 a5 5f 0f 00 0f 0b e8 9e 5f 0f 00 4c 89 b5 58 ff ff ff e9 RSP: 0018:ffff8880656eeca0 EFLAGS: 00010246 kobject: 'loop2' (00000000f5629718): fill_kobj_path: path = '/devices/virtual/block/loop2' RAX: 0000000000040000 RBX: 1ffff1100caddd9a RCX: ffffc9000c436000 RDX: 0000000000040000 RSI: ffffffff816056c4 RDI: ffff88806a2f6cc8 RBP: ffff8880656eed58 R08: ffff888067f4a300 R09: ffff888067f4abc8 R10: 0000000000000000 R11: 0000000000000000 R12: ffff88806a2f6cc0 R13: dffffc0000000000 R14: 0000000000000001 R15: ffff8880656eed30 FS: 00007fc2019bf700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000738000 CR3: 0000000067e8e000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: hsr_check_announce net/hsr/hsr_device.c:99 [inline] hsr_check_carrier_and_operstate+0x567/0x6f0 net/hsr/hsr_device.c:120 hsr_netdev_notify+0x297/0xa00 net/hsr/hsr_main.c:51 notifier_call_chain+0xc7/0x240 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1739 call_netdevice_notifiers_extack net/core/dev.c:1751 [inline] call_netdevice_notifiers net/core/dev.c:1765 [inline] dev_open net/core/dev.c:1436 [inline] dev_open+0x143/0x160 net/core/dev.c:1424 team_port_add drivers/net/team/team.c:1203 [inline] team_add_slave+0xa07/0x15d0 drivers/net/team/team.c:1933 do_set_master net/core/rtnetlink.c:2358 [inline] do_set_master+0x1d4/0x230 net/core/rtnetlink.c:2332 do_setlink+0x966/0x3510 net/core/rtnetlink.c:2493 rtnl_setlink+0x271/0x3b0 net/core/rtnetlink.c:2747 rtnetlink_rcv_msg+0x465/0xb00 net/core/rtnetlink.c:5192 netlink_rcv_skb+0x17a/0x460 net/netlink/af_netlink.c:2485 rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5210 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x536/0x720 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x8ae/0xd70 net/netlink/af_netlink.c:1925 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xdd/0x130 net/socket.c:632 sock_write_iter+0x27c/0x3e0 net/socket.c:923 call_write_iter include/linux/fs.h:1869 [inline] do_iter_readv_writev+0x5e0/0x8e0 fs/read_write.c:680 do_iter_write fs/read_write.c:956 [inline] do_iter_write+0x184/0x610 fs/read_write.c:937 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1001 do_writev+0xf6/0x290 fs/read_write.c:1036 __do_sys_writev fs/read_write.c:1109 [inline] __se_sys_writev fs/read_write.c:1106 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1106 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457f29 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fc2019bec78 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457f29 RDX: 0000000000000001 RSI: 00000000200000c0 RDI: 0000000000000003 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc2019bf6d4 R13: 00000000004c4a60 R14: 00000000004dd218 R15: 00000000ffffffff Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Arvid Brodin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/hsr/hsr_device.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 0e0b2a09c453..cfe20f15f618 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -94,9 +94,8 @@ static void hsr_check_announce(struct net_device *hsr_dev, && (old_operstate != IF_OPER_UP)) { /* Went up */ hsr->announce_count = 0; - hsr->announce_timer.expires = jiffies + - msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); - add_timer(&hsr->announce_timer); + mod_timer(&hsr->announce_timer, + jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); } if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP)) @@ -332,6 +331,7 @@ static void hsr_announce(unsigned long data) { struct hsr_priv *hsr; struct hsr_port *master; + unsigned long interval; hsr = (struct hsr_priv *) data; @@ -343,18 +343,16 @@ static void hsr_announce(unsigned long data) hsr->protVersion); hsr->announce_count++; - hsr->announce_timer.expires = jiffies + - msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); + interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); } else { send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK, hsr->protVersion); - hsr->announce_timer.expires = jiffies + - msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); + interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); } if (is_admin_up(master->dev)) - add_timer(&hsr->announce_timer); + mod_timer(&hsr->announce_timer, jiffies + interval); rcu_read_unlock(); } From a68ac22fc4366b37d9271a5300da59bfeb448cd3 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 11 Mar 2019 16:29:32 +0800 Subject: [PATCH 2248/2608] net: sit: fix UBSAN Undefined behaviour in check_6rd [ Upstream commit a843dc4ebaecd15fca1f4d35a97210f72ea1473b ] In func check_6rd,tunnel->ip6rd.relay_prefixlen may equal to 32,so UBSAN complain about it. UBSAN: Undefined behaviour in net/ipv6/sit.c:781:47 shift exponent 32 is too large for 32-bit type 'unsigned int' CPU: 6 PID: 20036 Comm: syz-executor.0 Not tainted 4.19.27 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xca/0x13e lib/dump_stack.c:113 ubsan_epilogue+0xe/0x81 lib/ubsan.c:159 __ubsan_handle_shift_out_of_bounds+0x293/0x2e8 lib/ubsan.c:425 check_6rd.constprop.9+0x433/0x4e0 net/ipv6/sit.c:781 try_6rd net/ipv6/sit.c:806 [inline] ipip6_tunnel_xmit net/ipv6/sit.c:866 [inline] sit_tunnel_xmit+0x141c/0x2720 net/ipv6/sit.c:1033 __netdev_start_xmit include/linux/netdevice.h:4300 [inline] netdev_start_xmit include/linux/netdevice.h:4309 [inline] xmit_one net/core/dev.c:3243 [inline] dev_hard_start_xmit+0x17c/0x780 net/core/dev.c:3259 __dev_queue_xmit+0x1656/0x2500 net/core/dev.c:3829 neigh_output include/net/neighbour.h:501 [inline] ip6_finish_output2+0xa36/0x2290 net/ipv6/ip6_output.c:120 ip6_finish_output+0x3e7/0xa20 net/ipv6/ip6_output.c:154 NF_HOOK_COND include/linux/netfilter.h:278 [inline] ip6_output+0x1e2/0x720 net/ipv6/ip6_output.c:171 dst_output include/net/dst.h:444 [inline] ip6_local_out+0x99/0x170 net/ipv6/output_core.c:176 ip6_send_skb+0x9d/0x2f0 net/ipv6/ip6_output.c:1697 ip6_push_pending_frames+0xc0/0x100 net/ipv6/ip6_output.c:1717 rawv6_push_pending_frames net/ipv6/raw.c:616 [inline] rawv6_sendmsg+0x2435/0x3530 net/ipv6/raw.c:946 inet_sendmsg+0xf8/0x5c0 net/ipv4/af_inet.c:798 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg+0xc8/0x110 net/socket.c:631 ___sys_sendmsg+0x6cf/0x890 net/socket.c:2114 __sys_sendmsg+0xf0/0x1b0 net/socket.c:2152 do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Signed-off-by: linmiaohe Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/sit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index c5b60190b1db..e23190725244 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -767,8 +767,9 @@ static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, pbw0 = tunnel->ip6rd.prefixlen >> 5; pbi0 = tunnel->ip6rd.prefixlen & 0x1f; - d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >> - tunnel->ip6rd.relay_prefixlen; + d = tunnel->ip6rd.relay_prefixlen < 32 ? + (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >> + tunnel->ip6rd.relay_prefixlen : 0; pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen; if (pbi1 > 0) From ffd4228bdf861b3a206738854f171eef85093c54 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 10 Mar 2019 09:07:14 -0700 Subject: [PATCH 2249/2608] net/x25: fix use-after-free in x25_device_event() [ Upstream commit 95d6ebd53c79522bf9502dbc7e89e0d63f94dae4 ] In case of failure x25_connect() does a x25_neigh_put(x25->neighbour) but forgets to clear x25->neighbour pointer, thus triggering use-after-free. Since the socket is visible in x25_list, we need to hold x25_list_lock to protect the operation. syzbot report : BUG: KASAN: use-after-free in x25_kill_by_device net/x25/af_x25.c:217 [inline] BUG: KASAN: use-after-free in x25_device_event+0x296/0x2b0 net/x25/af_x25.c:252 Read of size 8 at addr ffff8880a030edd0 by task syz-executor003/7854 CPU: 0 PID: 7854 Comm: syz-executor003 Not tainted 5.0.0+ #97 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:135 x25_kill_by_device net/x25/af_x25.c:217 [inline] x25_device_event+0x296/0x2b0 net/x25/af_x25.c:252 notifier_call_chain+0xc7/0x240 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1739 call_netdevice_notifiers_extack net/core/dev.c:1751 [inline] call_netdevice_notifiers net/core/dev.c:1765 [inline] __dev_notify_flags+0x1e9/0x2c0 net/core/dev.c:7607 dev_change_flags+0x10d/0x170 net/core/dev.c:7643 dev_ifsioc+0x2b0/0x940 net/core/dev_ioctl.c:237 dev_ioctl+0x1b8/0xc70 net/core/dev_ioctl.c:488 sock_do_ioctl+0x1bd/0x300 net/socket.c:995 sock_ioctl+0x32b/0x610 net/socket.c:1096 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:509 [inline] do_vfs_ioctl+0xd6e/0x1390 fs/ioctl.c:696 ksys_ioctl+0xab/0xd0 fs/ioctl.c:713 __do_sys_ioctl fs/ioctl.c:720 [inline] __se_sys_ioctl fs/ioctl.c:718 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:718 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4467c9 Code: e8 0c e8 ff ff 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 5b 07 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fdbea222d98 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00000000006dbc58 RCX: 00000000004467c9 RDX: 0000000020000340 RSI: 0000000000008914 RDI: 0000000000000003 RBP: 00000000006dbc50 R08: 00007fdbea223700 R09: 0000000000000000 R10: 00007fdbea223700 R11: 0000000000000246 R12: 00000000006dbc5c R13: 6000030030626669 R14: 0000000000000000 R15: 0000000030626669 Allocated by task 7843: save_stack+0x45/0xd0 mm/kasan/common.c:73 set_track mm/kasan/common.c:85 [inline] __kasan_kmalloc mm/kasan/common.c:495 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:468 kasan_kmalloc+0x9/0x10 mm/kasan/common.c:509 kmem_cache_alloc_trace+0x151/0x760 mm/slab.c:3615 kmalloc include/linux/slab.h:545 [inline] x25_link_device_up+0x46/0x3f0 net/x25/x25_link.c:249 x25_device_event+0x116/0x2b0 net/x25/af_x25.c:242 notifier_call_chain+0xc7/0x240 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1739 call_netdevice_notifiers_extack net/core/dev.c:1751 [inline] call_netdevice_notifiers net/core/dev.c:1765 [inline] __dev_notify_flags+0x121/0x2c0 net/core/dev.c:7605 dev_change_flags+0x10d/0x170 net/core/dev.c:7643 dev_ifsioc+0x2b0/0x940 net/core/dev_ioctl.c:237 dev_ioctl+0x1b8/0xc70 net/core/dev_ioctl.c:488 sock_do_ioctl+0x1bd/0x300 net/socket.c:995 sock_ioctl+0x32b/0x610 net/socket.c:1096 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:509 [inline] do_vfs_ioctl+0xd6e/0x1390 fs/ioctl.c:696 ksys_ioctl+0xab/0xd0 fs/ioctl.c:713 __do_sys_ioctl fs/ioctl.c:720 [inline] __se_sys_ioctl fs/ioctl.c:718 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:718 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 7865: save_stack+0x45/0xd0 mm/kasan/common.c:73 set_track mm/kasan/common.c:85 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:457 kasan_slab_free+0xe/0x10 mm/kasan/common.c:465 __cache_free mm/slab.c:3494 [inline] kfree+0xcf/0x230 mm/slab.c:3811 x25_neigh_put include/net/x25.h:253 [inline] x25_connect+0x8d8/0xde0 net/x25/af_x25.c:824 __sys_connect+0x266/0x330 net/socket.c:1685 __do_sys_connect net/socket.c:1696 [inline] __se_sys_connect net/socket.c:1693 [inline] __x64_sys_connect+0x73/0xb0 net/socket.c:1693 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8880a030edc0 which belongs to the cache kmalloc-256 of size 256 The buggy address is located 16 bytes inside of 256-byte region [ffff8880a030edc0, ffff8880a030eec0) The buggy address belongs to the page: page:ffffea000280c380 count:1 mapcount:0 mapping:ffff88812c3f07c0 index:0x0 flags: 0x1fffc0000000200(slab) raw: 01fffc0000000200 ffffea0002806788 ffffea00027f0188 ffff88812c3f07c0 raw: 0000000000000000 ffff8880a030e000 000000010000000c 0000000000000000 page dumped because: kasan: bad access detected Signed-off-by: Eric Dumazet Reported-by: syzbot+04babcefcd396fabec37@syzkaller.appspotmail.com Cc: andrew hendry Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/x25/af_x25.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 47f600564f24..716d53b683e0 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -812,8 +812,12 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, sock->state = SS_CONNECTED; rc = 0; out_put_neigh: - if (rc) + if (rc) { + read_lock_bh(&x25_list_lock); x25_neigh_put(x25->neighbour); + x25->neighbour = NULL; + read_unlock_bh(&x25_list_lock); + } out_put_route: x25_route_put(rt); out: From 91820c10ea6149c9af32be20d7cfda3375ab6529 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Mar 2019 13:48:44 -0700 Subject: [PATCH 2250/2608] net/x25: reset state in x25_connect() [ Upstream commit ee74d0bd4325efb41e38affe5955f920ed973f23 ] In case x25_connect() fails and frees the socket neighbour, we also need to undo the change done to x25->state. Before my last bug fix, we had use-after-free so this patch fixes a latent bug. syzbot report : kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 16137 Comm: syz-executor.1 Not tainted 5.0.0+ #117 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:x25_write_internal+0x1e8/0xdf0 net/x25/x25_subr.c:173 Code: 00 40 88 b5 e0 fe ff ff 0f 85 01 0b 00 00 48 8b 8b 80 04 00 00 48 ba 00 00 00 00 00 fc ff df 48 8d 79 1c 48 89 fe 48 c1 ee 03 <0f> b6 34 16 48 89 fa 83 e2 07 83 c2 03 40 38 f2 7c 09 40 84 f6 0f RSP: 0018:ffff888076717a08 EFLAGS: 00010207 RAX: ffff88805f2f2292 RBX: ffff8880a0ae6000 RCX: 0000000000000000 kobject: 'loop5' (0000000018d0d0ee): kobject_uevent_env RDX: dffffc0000000000 RSI: 0000000000000003 RDI: 000000000000001c RBP: ffff888076717b40 R08: ffff8880950e0580 R09: ffffed100be5e46d R10: ffffed100be5e46c R11: ffff88805f2f2363 R12: ffff888065579840 kobject: 'loop5' (0000000018d0d0ee): fill_kobj_path: path = '/devices/virtual/block/loop5' R13: 1ffff1100ece2f47 R14: 0000000000000013 R15: 0000000000000013 FS: 00007fb88cf43700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f9a42a41028 CR3: 0000000087a67000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: x25_release+0xd0/0x340 net/x25/af_x25.c:658 __sock_release+0xd3/0x2b0 net/socket.c:579 sock_close+0x1b/0x30 net/socket.c:1162 __fput+0x2df/0x8d0 fs/file_table.c:278 ____fput+0x16/0x20 fs/file_table.c:309 task_work_run+0x14a/0x1c0 kernel/task_work.c:113 get_signal+0x1961/0x1d50 kernel/signal.c:2388 do_signal+0x87/0x1940 arch/x86/kernel/signal.c:816 exit_to_usermode_loop+0x244/0x2c0 arch/x86/entry/common.c:162 prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline] syscall_return_slowpath arch/x86/entry/common.c:268 [inline] do_syscall_64+0x52d/0x610 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457f29 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fb88cf42c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: fffffffffffffe00 RBX: 0000000000000003 RCX: 0000000000457f29 RDX: 0000000000000012 RSI: 0000000020000080 RDI: 0000000000000004 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fb88cf436d4 R13: 00000000004be462 R14: 00000000004cec98 R15: 00000000ffffffff Modules linked in: Fixes: 95d6ebd53c79 ("net/x25: fix use-after-free in x25_device_event()") Signed-off-by: Eric Dumazet Cc: andrew hendry Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/x25/af_x25.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 716d53b683e0..8d0b0f2ee697 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -817,6 +817,7 @@ out_put_neigh: x25_neigh_put(x25->neighbour); x25->neighbour = NULL; read_unlock_bh(&x25_list_lock); + x25->state = X25_STATE_0; } out_put_route: x25_route_put(rt); From eff9b85fe90978a740f053e95441ee937463f25e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 13 Mar 2019 17:00:48 +0800 Subject: [PATCH 2251/2608] pptp: dst_release sk_dst_cache in pptp_sock_destruct [ Upstream commit 9417d81f4f8adfe20a12dd1fadf73a618cbd945d ] sk_setup_caps() is called to set sk->sk_dst_cache in pptp_connect, so we have to dst_release(sk->sk_dst_cache) in pptp_sock_destruct, otherwise, the dst refcnt will leak. It can be reproduced by this syz log: r1 = socket$pptp(0x18, 0x1, 0x2) bind$pptp(r1, &(0x7f0000000100)={0x18, 0x2, {0x0, @local}}, 0x1e) connect$pptp(r1, &(0x7f0000000000)={0x18, 0x2, {0x3, @remote}}, 0x1e) Consecutive dmesg warnings will occur: unregister_netdevice: waiting for lo to become free. Usage count = 1 v1->v2: - use rcu_dereference_protected() instead of rcu_dereference_check(), as suggested by Eric. Fixes: 00959ade36ac ("PPTP: PPP over IPv4 (Point-to-Point Tunneling Protocol)") Reported-by: Xiumei Mu Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/pptp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 9b70a3af678e..68b274b3e448 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -539,6 +539,7 @@ static void pptp_sock_destruct(struct sock *sk) pppox_unbind_sock(sk); } skb_queue_purge(&sk->sk_receive_queue); + dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); } static int pptp_create(struct net *net, struct socket *sock, int kern) From 1a443e2d40e6e278c8f468b835c7f3e21904aa7a Mon Sep 17 00:00:00 2001 From: Masaru Nagai Date: Thu, 7 Mar 2019 11:24:47 +0100 Subject: [PATCH 2252/2608] ravb: Decrease TxFIFO depth of Q3 and Q2 to one [ Upstream commit ae9819e339b451da7a86ab6fe38ecfcb6814e78a ] Hardware has the CBS (Credit Based Shaper) which affects only Q3 and Q2. When updating the CBS settings, even if the driver does so after waiting for Tx DMA finished, there is a possibility that frame data still remains in TxFIFO. To avoid this, decrease TxFIFO depth of Q3 and Q2 to one. This patch has been exercised this using netperf TCP_MAERTS, TCP_STREAM and UDP_STREAM tests run on an Ebisu board. No performance change was detected, outside of noise in the tests, both in terms of throughput and CPU utilisation. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Masaru Nagai Signed-off-by: Kazuya Mizuguchi [simon: updated changelog] Signed-off-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/ravb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index ff3a293ffe36..ce79af4a7f6f 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -458,7 +458,7 @@ static int ravb_dmac_init(struct net_device *ndev) RCR_EFFS | RCR_ENCF | RCR_ETS0 | RCR_ESF | 0x18000000, RCR); /* Set FIFO size */ - ravb_write(ndev, TGC_TQP_AVBMODE1 | 0x00222200, TGC); + ravb_write(ndev, TGC_TQP_AVBMODE1 | 0x00112200, TGC); /* Timestamp enable */ ravb_write(ndev, TCCR_TFEN, TCCR); From 68588407399ab282872f7d9e85c79a6def785214 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 8 Mar 2019 14:50:54 +0800 Subject: [PATCH 2253/2608] route: set the deleted fnhe fnhe_daddr to 0 in ip_del_fnhe to fix a race [ Upstream commit ee60ad219f5c7c4fb2f047f88037770063ef785f ] The race occurs in __mkroute_output() when 2 threads lookup a dst: CPU A CPU B find_exception() find_exception() [fnhe expires] ip_del_fnhe() [fnhe is deleted] rt_bind_exception() In rt_bind_exception() it will bind a deleted fnhe with the new dst, and this dst will get no chance to be freed. It causes a dev defcnt leak and consecutive dmesg warnings: unregister_netdevice: waiting for ethX to become free. Usage count = 1 Especially thanks Jon to identify the issue. This patch fixes it by setting fnhe_daddr to 0 in ip_del_fnhe() to stop binding the deleted fnhe with a new dst when checking fnhe's fnhe_daddr and daddr in rt_bind_exception(). It works as both ip_del_fnhe() and rt_bind_exception() are protected by fnhe_lock and the fhne is freed by kfree_rcu(). Fixes: deed49df7390 ("route: check and remove route cache when we get route") Signed-off-by: Jon Maxwell Signed-off-by: Xin Long Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0a35d294abec..a1bf87711bfa 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1315,6 +1315,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) if (fnhe->fnhe_daddr == daddr) { rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); + /* set fnhe_daddr to 0 to ensure it won't bind with + * new dsts in rt_bind_exception(). + */ + fnhe->fnhe_daddr = 0; fnhe_flush_routes(fnhe); kfree_rcu(fnhe, rcu); break; From a63c7fca080e9bdd3368c9c4ab573c01d03f1332 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 9 Mar 2019 00:29:58 +0000 Subject: [PATCH 2254/2608] rxrpc: Fix client call queueing, waiting for channel [ Upstream commit 69ffaebb90369ce08657b5aea4896777b9d6e8fc ] rxrpc_get_client_conn() adds a new call to the front of the waiting_calls queue if the connection it's going to use already exists. This is bad as it allows calls to get starved out. Fix this by adding to the tail instead. Also change the other enqueue point in the same function to put it on the front (ie. when we have a new connection). This makes the point that in the case of a new connection the new call goes at the front (though it doesn't actually matter since the queue should be unoccupied). Fixes: 45025bceef17 ("rxrpc: Improve management and caching of client connection objects") Signed-off-by: David Howells Reviewed-by: Marc Dionne Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/conn_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 78a154173d90..0aa4bf09fb9c 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -351,7 +351,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, * normally have to take channel_lock but we do this before anyone else * can see the connection. */ - list_add_tail(&call->chan_wait_link, &candidate->waiting_calls); + list_add(&call->chan_wait_link, &candidate->waiting_calls); if (cp->exclusive) { call->conn = candidate; @@ -430,7 +430,7 @@ found_extant_conn: call->conn = conn; call->security_ix = conn->security_ix; call->service_id = conn->service_id; - list_add(&call->chan_wait_link, &conn->waiting_calls); + list_add_tail(&call->chan_wait_link, &conn->waiting_calls); spin_unlock(&conn->channel_lock); _leave(" = 0 [extant %d]", conn->debug_id); return 0; From d51c4c0c1fbc338d1b756d9d9d1ba438534546cd Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 11 Mar 2019 11:41:05 -0700 Subject: [PATCH 2255/2608] tcp: Don't access TCP_SKB_CB before initializing it [ Upstream commit f2feaefdabb0a6253aa020f65e7388f07a9ed47c ] Since commit eeea10b83a13 ("tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()"), tcp_vX_fill_cb is only called after tcp_filter(). That means, TCP_SKB_CB(skb)->end_seq still points to the IP-part of the cb. We thus should not mock with it, as this can trigger bugs (thanks syzkaller): [ 12.349396] ================================================================== [ 12.350188] BUG: KASAN: slab-out-of-bounds in ip6_datagram_recv_specific_ctl+0x19b3/0x1a20 [ 12.351035] Read of size 1 at addr ffff88006adbc208 by task test_ip6_datagr/1799 Setting end_seq is actually no more necessary in tcp_filter as it gets initialized later on in tcp_vX_fill_cb. Cc: Eric Dumazet Fixes: eeea10b83a13 ("tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()") Signed-off-by: Christoph Paasch Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e593301f442f..97a414dbdaf4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1578,15 +1578,8 @@ EXPORT_SYMBOL(tcp_add_backlog); int tcp_filter(struct sock *sk, struct sk_buff *skb) { struct tcphdr *th = (struct tcphdr *)skb->data; - unsigned int eaten = skb->len; - int err; - err = sk_filter_trim_cap(sk, skb, th->doff * 4); - if (!err) { - eaten -= skb->len; - TCP_SKB_CB(skb)->end_seq -= eaten; - } - return err; + return sk_filter_trim_cap(sk, skb, th->doff * 4); } EXPORT_SYMBOL(tcp_filter); From bb73b637993bd77087c0f3e56e86a040f230e435 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 8 Mar 2019 22:09:47 +0100 Subject: [PATCH 2256/2608] tcp: handle inet_csk_reqsk_queue_add() failures [ Upstream commit 9d3e1368bb45893a75a5dfb7cd21fdebfa6b47af ] Commit 7716682cc58e ("tcp/dccp: fix another race at listener dismantle") let inet_csk_reqsk_queue_add() fail, and adjusted {tcp,dccp}_check_req() accordingly. However, TFO and syncookies weren't modified, thus leaking allocated resources on error. Contrary to tcp_check_req(), in both syncookies and TFO cases, we need to drop the request socket. Also, since the child socket is created with inet_csk_clone_lock(), we have to unlock it and drop an extra reference (->sk_refcount is initially set to 2 and inet_csk_reqsk_queue_add() drops only one ref). For TFO, we also need to revert the work done by tcp_try_fastopen() (with reqsk_fastopen_remove()). Fixes: 7716682cc58e ("tcp/dccp: fix another race at listener dismantle") Signed-off-by: Guillaume Nault Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/syncookies.c | 7 ++++++- net/ipv4/tcp_input.c | 8 +++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 77cf32a80952..2f871424925e 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -216,7 +216,12 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, refcount_set(&req->rsk_refcnt, 1); tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); - inet_csk_reqsk_queue_add(sk, req, child); + if (!inet_csk_reqsk_queue_add(sk, req, child)) { + bh_unlock_sock(child); + sock_put(child); + child = NULL; + reqsk_put(req); + } } else { reqsk_free(req); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e24c0d7adf65..c8227e07d574 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6406,7 +6406,13 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, af_ops->send_synack(fastopen_sk, dst, &fl, req, &foc, TCP_SYNACK_FASTOPEN); /* Add the child socket directly into the accept queue */ - inet_csk_reqsk_queue_add(sk, req, fastopen_sk); + if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) { + reqsk_fastopen_remove(fastopen_sk, req, false); + bh_unlock_sock(fastopen_sk); + sock_put(fastopen_sk); + reqsk_put(req); + goto drop; + } sk->sk_data_ready(sk); bh_unlock_sock(fastopen_sk); sock_put(fastopen_sk); From 1ed2291ca6e2782d507f316abcbcb279828dcab7 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 8 Mar 2019 16:40:57 +0100 Subject: [PATCH 2257/2608] vxlan: Fix GRO cells race condition between receive and link delete [ Upstream commit ad6c9986bcb627c7c22b8f9e9a934becc27df87c ] If we receive a packet while deleting a VXLAN device, there's a chance vxlan_rcv() is called at the same time as vxlan_dellink(). This is fine, except that vxlan_dellink() should never ever touch stuff that's still in use, such as the GRO cells list. Otherwise, vxlan_rcv() crashes while queueing packets via gro_cells_receive(). Move the gro_cells_destroy() to vxlan_uninit(), which runs after the RCU grace period is elapsed and nothing needs the gro_cells anymore. This is now done in the same way as commit 8e816df87997 ("geneve: Use GRO cells infrastructure.") originally implemented for GENEVE. Reported-by: Jianlin Shi Fixes: 58ce31cca1ff ("vxlan: GRO support at tunnel layer") Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index a1b40b9c4906..eb1c91e86fcd 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2462,6 +2462,8 @@ static void vxlan_uninit(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); + gro_cells_destroy(&vxlan->gro_cells); + vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni); free_percpu(dev->tstats); @@ -3523,7 +3525,6 @@ static void vxlan_dellink(struct net_device *dev, struct list_head *head) vxlan_flush(vxlan, true); - gro_cells_destroy(&vxlan->gro_cells); list_del(&vxlan->next); unregister_netdevice_queue(dev, head); } From 06c7cd5b40e1fd95f3f6303aa5a505d9bb68c1bb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 10 Mar 2019 10:36:40 -0700 Subject: [PATCH 2258/2608] vxlan: test dev->flags & IFF_UP before calling gro_cells_receive() [ Upstream commit 59cbf56fcd98ba2a715b6e97c4e43f773f956393 ] Same reasons than the ones explained in commit 4179cb5a4c92 ("vxlan: test dev->flags & IFF_UP before calling netif_rx()") netif_rx() or gro_cells_receive() must be called under a strict contract. At device dismantle phase, core networking clears IFF_UP and flush_all_backlogs() is called after rcu grace period to make sure no incoming packet might be in a cpu backlog and still referencing the device. A similar protocol is used for gro_cells infrastructure, as gro_cells_destroy() will be called only after a full rcu grace period is observed after IFF_UP has been cleared. Most drivers call netif_rx() from their interrupt handler, and since the interrupts are disabled at device dismantle, netif_rx() does not have to check dev->flags & IFF_UP Virtual drivers do not have this guarantee, and must therefore make the check themselves. Otherwise we risk use-after-free and/or crashes. Fixes: d342894c5d2f ("vxlan: virtual extensible lan") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index eb1c91e86fcd..df48f65c4f90 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1468,6 +1468,14 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) goto drop; } + rcu_read_lock(); + + if (unlikely(!(vxlan->dev->flags & IFF_UP))) { + rcu_read_unlock(); + atomic_long_inc(&vxlan->dev->rx_dropped); + goto drop; + } + stats = this_cpu_ptr(vxlan->dev->tstats); u64_stats_update_begin(&stats->syncp); stats->rx_packets++; @@ -1475,6 +1483,9 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) u64_stats_update_end(&stats->syncp); gro_cells_receive(&vxlan->gro_cells, skb); + + rcu_read_unlock(); + return 0; drop: From 22768ca28f896276db1519bd5cf384dcd3cd0db2 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 12 Mar 2019 17:05:47 +0200 Subject: [PATCH 2259/2608] net/mlx4_core: Fix reset flow when in command polling mode [ Upstream commit e15ce4b8d11227007577e6dc1364d288b8874fbe ] As part of unloading a device, the driver switches from FW command event mode to FW command polling mode. Part of switching over to polling mode is freeing the command context array memory (unfortunately, currently, without NULLing the command context array pointer). The reset flow calls "complete" to complete all outstanding fw commands (if we are in event mode). The check for event vs. polling mode here is to test if the command context array pointer is NULL. If the reset flow is activated after the switch to polling mode, it will attempt (incorrectly) to complete all the commands in the context array -- because the pointer was not NULLed when the driver switched over to polling mode. As a result, we have a use-after-free situation, which results in a kernel crash. For example: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] __wake_up_common+0x2e/0x90 PGD 0 Oops: 0000 [#1] SMP Modules linked in: netconsole nfsv3 nfs_acl nfs lockd grace ... CPU: 2 PID: 940 Comm: kworker/2:3 Kdump: loaded Not tainted 3.10.0-862.el7.x86_64 #1 Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090006 04/28/2016 Workqueue: events hv_eject_device_work [pci_hyperv] task: ffff8d1734ca0fd0 ti: ffff8d17354bc000 task.ti: ffff8d17354bc000 RIP: 0010:[] [] __wake_up_common+0x2e/0x90 RSP: 0018:ffff8d17354bfa38 EFLAGS: 00010082 RAX: 0000000000000000 RBX: ffff8d17362d42c8 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000003 RDI: ffff8d17362d42c8 RBP: ffff8d17354bfa70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000298 R11: ffff8d173610e000 R12: ffff8d17362d42d0 R13: 0000000000000246 R14: 0000000000000000 R15: 0000000000000003 FS: 0000000000000000(0000) GS:ffff8d1802680000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000f16d8000 CR4: 00000000001406e0 Call Trace: [] complete+0x3c/0x50 [] mlx4_cmd_wake_completions+0x70/0x90 [mlx4_core] [] mlx4_enter_error_state+0xe1/0x380 [mlx4_core] [] mlx4_comm_cmd+0x29b/0x360 [mlx4_core] [] __mlx4_cmd+0x441/0x920 [mlx4_core] [] ? __slab_free+0x81/0x2f0 [] ? __radix_tree_lookup+0x84/0xf0 [] mlx4_free_mtt_range+0x5b/0xb0 [mlx4_core] [] mlx4_mtt_cleanup+0x17/0x20 [mlx4_core] [] mlx4_free_eq+0xa7/0x1c0 [mlx4_core] [] mlx4_cleanup_eq_table+0xde/0x130 [mlx4_core] [] mlx4_unload_one+0x118/0x300 [mlx4_core] [] mlx4_remove_one+0x91/0x1f0 [mlx4_core] The fix is to set the command context array pointer to NULL after freeing the array. Fixes: f5aef5aa3506 ("net/mlx4_core: Activate reset flow upon fatal command cases") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 6a9086dc1e92..5b69d373bcb2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2686,6 +2686,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) down(&priv->cmd.event_sem); kfree(priv->cmd.context); + priv->cmd.context = NULL; up(&priv->cmd.poll_sem); up_write(&priv->cmd.switch_sem); From 3d31f62c115e55178d7903a202d1f967c0b7531f Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 12 Mar 2019 17:05:48 +0200 Subject: [PATCH 2260/2608] net/mlx4_core: Fix locking in SRIOV mode when switching between events and polling [ Upstream commit c07d27927f2f2e96fcd27bb9fb330c9ea65612d0 ] In procedures mlx4_cmd_use_events() and mlx4_cmd_use_polling(), we need to guarantee that there are no FW commands in progress on the comm channel (for VFs) or wrapped FW commands (on the PF) when SRIOV is active. We do this by also taking the slave_cmd_mutex when SRIOV is active. This is especially important when switching from event to polling, since we free the command-context array during the switch. If there are FW commands in progress (e.g., waiting for a completion event), the completion event handler will access freed memory. Since the decision to use comm_wait or comm_poll is taken before grabbing the event_sem/poll_sem in mlx4_comm_cmd_wait/poll, we must take the slave_cmd_mutex as well (to guarantee that the decision to use events or polling and the call to the appropriate cmd function are atomic). Fixes: a7e1f04905e5 ("net/mlx4_core: Fix deadlock when switching between polling and event fw commands") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 5b69d373bcb2..4c2ee9fd9e57 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2642,6 +2642,8 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) if (!priv->cmd.context) return -ENOMEM; + if (mlx4_is_mfunc(dev)) + mutex_lock(&priv->cmd.slave_cmd_mutex); down_write(&priv->cmd.switch_sem); for (i = 0; i < priv->cmd.max_cmds; ++i) { priv->cmd.context[i].token = i; @@ -2667,6 +2669,8 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) down(&priv->cmd.poll_sem); priv->cmd.use_events = 1; up_write(&priv->cmd.switch_sem); + if (mlx4_is_mfunc(dev)) + mutex_unlock(&priv->cmd.slave_cmd_mutex); return err; } @@ -2679,6 +2683,8 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i; + if (mlx4_is_mfunc(dev)) + mutex_lock(&priv->cmd.slave_cmd_mutex); down_write(&priv->cmd.switch_sem); priv->cmd.use_events = 0; @@ -2690,6 +2696,8 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) up(&priv->cmd.poll_sem); up_write(&priv->cmd.switch_sem); + if (mlx4_is_mfunc(dev)) + mutex_unlock(&priv->cmd.slave_cmd_mutex); } struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev) From 048d7079926598a0ab94582f86b1ef46954ed8ec Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 12 Mar 2019 17:05:49 +0200 Subject: [PATCH 2261/2608] net/mlx4_core: Fix qp mtt size calculation [ Upstream commit 8511a653e9250ef36b95803c375a7be0e2edb628 ] Calculation of qp mtt size (in function mlx4_RST2INIT_wrapper) ultimately depends on function roundup_pow_of_two. If the amount of memory required by the QP is less than one page, roundup_pow_of_two is called with argument zero. In this case, the roundup_pow_of_two result is undefined. Calling roundup_pow_of_two with a zero argument resulted in the following stack trace: UBSAN: Undefined behaviour in ./include/linux/log2.h:61:13 shift exponent 64 is too large for 64-bit type 'long unsigned int' CPU: 4 PID: 26939 Comm: rping Tainted: G OE 4.19.0-rc1 Hardware name: Supermicro X9DR3-F/X9DR3-F, BIOS 3.2a 07/09/2015 Call Trace: dump_stack+0x9a/0xeb ubsan_epilogue+0x9/0x7c __ubsan_handle_shift_out_of_bounds+0x254/0x29d ? __ubsan_handle_load_invalid_value+0x180/0x180 ? debug_show_all_locks+0x310/0x310 ? sched_clock+0x5/0x10 ? sched_clock+0x5/0x10 ? sched_clock_cpu+0x18/0x260 ? find_held_lock+0x35/0x1e0 ? mlx4_RST2INIT_QP_wrapper+0xfb1/0x1440 [mlx4_core] mlx4_RST2INIT_QP_wrapper+0xfb1/0x1440 [mlx4_core] Fix this by explicitly testing for zero, and returning one if the argument is zero (assuming that the next higher power of 2 in this case should be one). Fixes: c82e9aa0a8bc ("mlx4_core: resource tracking for HCA resources used by guests") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index a5381b091710..53ca6cf316dc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2717,13 +2717,13 @@ static int qp_get_mtt_size(struct mlx4_qp_context *qpc) int total_pages; int total_mem; int page_offset = (be32_to_cpu(qpc->params2) >> 6) & 0x3f; + int tot; sq_size = 1 << (log_sq_size + log_sq_sride + 4); rq_size = (srq|rss|xrc) ? 0 : (1 << (log_rq_size + log_rq_stride + 4)); total_mem = sq_size + rq_size; - total_pages = - roundup_pow_of_two((total_mem + (page_offset << 6)) >> - page_shift); + tot = (total_mem + (page_offset << 6)) >> page_shift; + total_pages = !tot ? 1 : roundup_pow_of_two(tot); return total_pages; } From 64a6e35ac51036d309fbcc003e4daae672fb5849 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 23 Feb 2019 13:24:59 -0800 Subject: [PATCH 2262/2608] net/x25: fix a race in x25_bind() [ Upstream commit 797a22bd5298c2674d927893f46cadf619dad11d ] syzbot was able to trigger another soft lockup [1] I first thought it was the O(N^2) issue I mentioned in my prior fix (f657d22ee1f "net/x25: do not hold the cpu too long in x25_new_lci()"), but I eventually found that x25_bind() was not checking SOCK_ZAPPED state under socket lock protection. This means that multiple threads can end up calling x25_insert_socket() for the same socket, and corrupt x25_list [1] watchdog: BUG: soft lockup - CPU#0 stuck for 123s! [syz-executor.2:10492] Modules linked in: irq event stamp: 27515 hardirqs last enabled at (27514): [] trace_hardirqs_on_thunk+0x1a/0x1c hardirqs last disabled at (27515): [] trace_hardirqs_off_thunk+0x1a/0x1c softirqs last enabled at (32): [] x25_get_neigh+0xa3/0xd0 net/x25/x25_link.c:336 softirqs last disabled at (34): [] x25_find_socket+0x23/0x140 net/x25/af_x25.c:341 CPU: 0 PID: 10492 Comm: syz-executor.2 Not tainted 5.0.0-rc7+ #88 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__sanitizer_cov_trace_pc+0x4/0x50 kernel/kcov.c:97 Code: f4 ff ff ff e8 11 9f ea ff 48 c7 05 12 fb e5 08 00 00 00 00 e9 c8 e9 ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 55 48 89 e5 <48> 8b 75 08 65 48 8b 04 25 40 ee 01 00 65 8b 15 38 0c 92 7e 81 e2 RSP: 0018:ffff88806e94fc48 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff13 RAX: 1ffff1100d84dac5 RBX: 0000000000000001 RCX: ffffc90006197000 RDX: 0000000000040000 RSI: ffffffff86324bf3 RDI: ffff88806c26d628 RBP: ffff88806e94fc48 R08: ffff88806c1c6500 R09: fffffbfff1282561 R10: fffffbfff1282560 R11: ffffffff89412b03 R12: ffff88806c26d628 R13: ffff888090455200 R14: dffffc0000000000 R15: 0000000000000000 FS: 00007f3a107e4700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3a107e3db8 CR3: 00000000a5544000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __x25_find_socket net/x25/af_x25.c:327 [inline] x25_find_socket+0x7d/0x140 net/x25/af_x25.c:342 x25_new_lci net/x25/af_x25.c:355 [inline] x25_connect+0x380/0xde0 net/x25/af_x25.c:784 __sys_connect+0x266/0x330 net/socket.c:1662 __do_sys_connect net/socket.c:1673 [inline] __se_sys_connect net/socket.c:1670 [inline] __x64_sys_connect+0x73/0xb0 net/socket.c:1670 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457e29 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f3a107e3c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457e29 RDX: 0000000000000012 RSI: 0000000020000200 RDI: 0000000000000005 RBP: 000000000073c040 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f3a107e46d4 R13: 00000000004be362 R14: 00000000004ceb98 R15: 00000000ffffffff Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 10493 Comm: syz-executor.3 Not tainted 5.0.0-rc7+ #88 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__read_once_size include/linux/compiler.h:193 [inline] RIP: 0010:queued_write_lock_slowpath+0x143/0x290 kernel/locking/qrwlock.c:86 Code: 4c 8d 2c 01 41 83 c7 03 41 0f b6 45 00 41 38 c7 7c 08 84 c0 0f 85 0c 01 00 00 8b 03 3d 00 01 00 00 74 1a f3 90 41 0f b6 55 00 <41> 38 d7 7c eb 84 d2 74 e7 48 89 df e8 cc aa 4e 00 eb dd be 04 00 RSP: 0018:ffff888085c47bd8 EFLAGS: 00000206 RAX: 0000000000000300 RBX: ffffffff89412b00 RCX: 1ffffffff1282560 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffff89412b00 RBP: ffff888085c47c70 R08: 1ffffffff1282560 R09: fffffbfff1282561 R10: fffffbfff1282560 R11: ffffffff89412b03 R12: 00000000000000ff R13: fffffbfff1282560 R14: 1ffff11010b88f7d R15: 0000000000000003 FS: 00007fdd04086700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fdd04064db8 CR3: 0000000090be0000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: queued_write_lock include/asm-generic/qrwlock.h:104 [inline] do_raw_write_lock+0x1d6/0x290 kernel/locking/spinlock_debug.c:203 __raw_write_lock_bh include/linux/rwlock_api_smp.h:204 [inline] _raw_write_lock_bh+0x3b/0x50 kernel/locking/spinlock.c:312 x25_insert_socket+0x21/0xe0 net/x25/af_x25.c:267 x25_bind+0x273/0x340 net/x25/af_x25.c:703 __sys_bind+0x23f/0x290 net/socket.c:1481 __do_sys_bind net/socket.c:1492 [inline] __se_sys_bind net/socket.c:1490 [inline] __x64_sys_bind+0x73/0xb0 net/socket.c:1490 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457e29 Fixes: 90c27297a9bf ("X.25 remove bkl in bind") Signed-off-by: Eric Dumazet Cc: andrew hendry Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/x25/af_x25.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 8d0b0f2ee697..1b830a6ee3ff 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -678,8 +678,7 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr; int len, i, rc = 0; - if (!sock_flag(sk, SOCK_ZAPPED) || - addr_len != sizeof(struct sockaddr_x25) || + if (addr_len != sizeof(struct sockaddr_x25) || addr->sx25_family != AF_X25) { rc = -EINVAL; goto out; @@ -694,9 +693,13 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } lock_sock(sk); - x25_sk(sk)->source_addr = addr->sx25_addr; - x25_insert_socket(sk); - sock_reset_flag(sk, SOCK_ZAPPED); + if (sock_flag(sk, SOCK_ZAPPED)) { + x25_sk(sk)->source_addr = addr->sx25_addr; + x25_insert_socket(sk); + sock_reset_flag(sk, SOCK_ZAPPED); + } else { + rc = -EINVAL; + } release_sock(sk); SOCK_DEBUG(sk, "x25_bind: socket is bound\n"); out: From 07c2216c3783daca34ea0d86affb9dfc5346f183 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 21 Feb 2019 22:42:01 +0800 Subject: [PATCH 2263/2608] mdio_bus: Fix use-after-free on device_register fails [ Upstream commit 6ff7b060535e87c2ae14dd8548512abfdda528fb ] KASAN has found use-after-free in fixed_mdio_bus_init, commit 0c692d07842a ("drivers/net/phy/mdio_bus.c: call put_device on device_register() failure") call put_device() while device_register() fails,give up the last reference to the device and allow mdiobus_release to be executed ,kfreeing the bus. However in most drives, mdiobus_free be called to free the bus while mdiobus_register fails. use-after-free occurs when access bus again, this patch revert it to let mdiobus_free free the bus. KASAN report details as below: BUG: KASAN: use-after-free in mdiobus_free+0x85/0x90 drivers/net/phy/mdio_bus.c:482 Read of size 4 at addr ffff8881dc824d78 by task syz-executor.0/3524 CPU: 1 PID: 3524 Comm: syz-executor.0 Not tainted 5.0.0-rc7+ #45 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xfa/0x1ce lib/dump_stack.c:113 print_address_description+0x65/0x270 mm/kasan/report.c:187 kasan_report+0x149/0x18d mm/kasan/report.c:317 mdiobus_free+0x85/0x90 drivers/net/phy/mdio_bus.c:482 fixed_mdio_bus_init+0x283/0x1000 [fixed_phy] ? 0xffffffffc0e40000 ? 0xffffffffc0e40000 ? 0xffffffffc0e40000 do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f6215c19c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000080 RDI: 0000000000000003 RBP: 00007f6215c19c70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f6215c1a6bc R13: 00000000004bcefb R14: 00000000006f7030 R15: 0000000000000004 Allocated by task 3524: set_track mm/kasan/common.c:85 [inline] __kasan_kmalloc.constprop.3+0xa0/0xd0 mm/kasan/common.c:496 kmalloc include/linux/slab.h:545 [inline] kzalloc include/linux/slab.h:740 [inline] mdiobus_alloc_size+0x54/0x1b0 drivers/net/phy/mdio_bus.c:143 fixed_mdio_bus_init+0x163/0x1000 [fixed_phy] do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 3524: set_track mm/kasan/common.c:85 [inline] __kasan_slab_free+0x130/0x180 mm/kasan/common.c:458 slab_free_hook mm/slub.c:1409 [inline] slab_free_freelist_hook mm/slub.c:1436 [inline] slab_free mm/slub.c:2986 [inline] kfree+0xe1/0x270 mm/slub.c:3938 device_release+0x78/0x200 drivers/base/core.c:919 kobject_cleanup lib/kobject.c:662 [inline] kobject_release lib/kobject.c:691 [inline] kref_put include/linux/kref.h:67 [inline] kobject_put+0x146/0x240 lib/kobject.c:708 put_device+0x1c/0x30 drivers/base/core.c:2060 __mdiobus_register+0x483/0x560 drivers/net/phy/mdio_bus.c:382 fixed_mdio_bus_init+0x26b/0x1000 [fixed_phy] do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8881dc824c80 which belongs to the cache kmalloc-2k of size 2048 The buggy address is located 248 bytes inside of 2048-byte region [ffff8881dc824c80, ffff8881dc825480) The buggy address belongs to the page: page:ffffea0007720800 count:1 mapcount:0 mapping:ffff8881f6c02800 index:0x0 compound_mapcount: 0 flags: 0x2fffc0000010200(slab|head) raw: 02fffc0000010200 0000000000000000 0000000500000001 ffff8881f6c02800 raw: 0000000000000000 00000000800f000f 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8881dc824c00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8881dc824c80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8881dc824d00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8881dc824d80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8881dc824e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 0c692d07842a ("drivers/net/phy/mdio_bus.c: call put_device on device_register() failure") Signed-off-by: YueHaibing Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/mdio_bus.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 1ece41277993..c545fb1f82bd 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -347,7 +347,6 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) err = device_register(&bus->dev); if (err) { pr_err("mii_bus %s failed to register\n", bus->id); - put_device(&bus->dev); return -EINVAL; } From b3dc5b185072520422e26d8bd1a65203f0713a61 Mon Sep 17 00:00:00 2001 From: Kalash Nainwal Date: Wed, 20 Feb 2019 16:23:04 -0800 Subject: [PATCH 2264/2608] net: Set rtm_table to RT_TABLE_COMPAT for ipv6 for tables > 255 [ Upstream commit 97f0082a0592212fc15d4680f5a4d80f79a1687c ] Set rtm_table to RT_TABLE_COMPAT for ipv6 for tables > 255 to keep legacy software happy. This is similar to what was done for ipv4 in commit 709772e6e065 ("net: Fix routing tables with id > 255 for legacy software"). Signed-off-by: Kalash Nainwal Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fafecdc06900..00f8fe8cebd5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3495,7 +3495,7 @@ static int rt6_fill_node(struct net *net, table = rt->rt6i_table->tb6_id; else table = RT6_TABLE_UNSPEC; - rtm->rtm_table = table; + rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT; if (nla_put_u32(skb, RTA_TABLE, table)) goto nla_put_failure; if (rt->rt6i_flags & RTF_REJECT) { From 6c338fafab643efe92229caf9a4d301d92ff9fd1 Mon Sep 17 00:00:00 2001 From: Michal Soltys Date: Mon, 18 Feb 2019 17:55:28 +0100 Subject: [PATCH 2265/2608] bonding: fix PACKET_ORIGDEV regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3c963a3306eada999be5ebf4f293dfa3d3945487 ] This patch fixes a subtle PACKET_ORIGDEV regression which was a side effect of fixes introduced by: 6a9e461f6fe4 bonding: pass link-local packets to bonding master also. ... to: b89f04c61efe bonding: deliver link-local packets with skb->dev set to link that packets arrived on While 6a9e461f6fe4 restored pre-b89f04c61efe presence of link-local packets on bonding masters (which is required e.g. by linux bridges participating in spanning tree or needed for lab-like setups created with group_fwd_mask) it also caused the originating device information to be lost due to cloning. Maciej Żenczykowski proposed another solution that doesn't require packet cloning and retains original device information - instead of returning RX_HANDLER_PASS for all link-local packets it's now limited only to packets from inactive slaves. At the same time, packets passed to bonding masters retain correct information about the originating device and PACKET_ORIGDEV can be used to determine it. This elegantly solves all issues so far: - link-local packets that were removed from bonding masters - LLDP daemons being forced to explicitly bind to slave interfaces - PACKET_ORIGDEV having no effect on bond interfaces Fixes: 6a9e461f6fe4 (bonding: pass link-local packets to bonding master also.) Reported-by: Vincent Bernat Signed-off-by: Michal Soltys Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 35 +++++++++++++-------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 65c5a65af0ba..99e60bb5fe07 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1177,29 +1177,22 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) } } - /* Link-local multicast packets should be passed to the - * stack on the link they arrive as well as pass them to the - * bond-master device. These packets are mostly usable when - * stack receives it with the link on which they arrive - * (e.g. LLDP) they also must be available on master. Some of - * the use cases include (but are not limited to): LLDP agents - * that must be able to operate both on enslaved interfaces as - * well as on bonds themselves; linux bridges that must be able - * to process/pass BPDUs from attached bonds when any kind of - * STP version is enabled on the network. + /* + * For packets determined by bond_should_deliver_exact_match() call to + * be suppressed we want to make an exception for link-local packets. + * This is necessary for e.g. LLDP daemons to be able to monitor + * inactive slave links without being forced to bind to them + * explicitly. + * + * At the same time, packets that are passed to the bonding master + * (including link-local ones) can have their originating interface + * determined via PACKET_ORIGDEV socket option. */ - if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) { - struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); - - if (nskb) { - nskb->dev = bond->dev; - nskb->queue_mapping = 0; - netif_rx(nskb); - } - return RX_HANDLER_PASS; - } - if (bond_should_deliver_exact_match(skb, slave, bond)) + if (bond_should_deliver_exact_match(skb, slave, bond)) { + if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) + return RX_HANDLER_PASS; return RX_HANDLER_EXACT; + } skb->dev = bond->dev; From 727a261969faaaa47262329dad10816188c75a6b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Feb 2019 20:09:35 +0000 Subject: [PATCH 2266/2608] missing barriers in some of unix_sock ->addr and ->path accesses [ Upstream commit ae3b564179bfd06f32d051b9e5d72ce4b2a07c37 ] Several u->addr and u->path users are not holding any locks in common with unix_bind(). unix_state_lock() is useless for those purposes. u->addr is assign-once and *(u->addr) is fully set up by the time we set u->addr (all under unix_table_lock). u->path is also set in the same critical area, also before setting u->addr, and any unix_sock with ->path filled will have non-NULL ->addr. So setting ->addr with smp_store_release() is all we need for those "lockless" users - just have them fetch ->addr with smp_load_acquire() and don't even bother looking at ->path if they see NULL ->addr. Users of ->addr and ->path fall into several classes now: 1) ones that do smp_load_acquire(u->addr) and access *(u->addr) and u->path only if smp_load_acquire() has returned non-NULL. 2) places holding unix_table_lock. These are guaranteed that *(u->addr) is seen fully initialized. If unix_sock is in one of the "bound" chains, so's ->path. 3) unix_sock_destructor() using ->addr is safe. All places that set u->addr are guaranteed to have seen all stores *(u->addr) while holding a reference to u and unix_sock_destructor() is called when (atomic) refcount hits zero. 4) unix_release_sock() using ->path is safe. unix_bind() is serialized wrt unix_release() (normally - by struct file refcount), and for the instances that had ->path set by unix_bind() unix_release_sock() comes from unix_release(), so they are fine. Instances that had it set in unix_stream_connect() either end up attached to a socket (in unix_accept()), in which case the call chain to unix_release_sock() and serialization are the same as in the previous case, or they never get accept'ed and unix_release_sock() is called when the listener is shut down and its queue gets purged. In that case the listener's queue lock provides the barriers needed - unix_stream_connect() shoves our unix_sock into listener's queue under that lock right after having set ->path and eventual unix_release_sock() caller picks them from that queue under the same lock right before calling unix_release_sock(). 5) unix_find_other() use of ->path is pointless, but safe - it happens with successful lookup by (abstract) name, so ->path.dentry is guaranteed to be NULL there. earlier-variant-reviewed-by: "Paul E. McKenney" Signed-off-by: Al Viro Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 59 +++++++++++++++++++++++++------------------- net/unix/diag.c | 3 ++- security/lsm_audit.c | 10 +++++--- 3 files changed, 42 insertions(+), 30 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7f46bab4ce5c..2adfcc6dec5a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -892,7 +892,7 @@ retry: addr->hash ^= sk->sk_type; __unix_remove_socket(sk); - u->addr = addr; + smp_store_release(&u->addr, addr); __unix_insert_socket(&unix_socket_table[addr->hash], sk); spin_unlock(&unix_table_lock); err = 0; @@ -1062,7 +1062,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = 0; __unix_remove_socket(sk); - u->addr = addr; + smp_store_release(&u->addr, addr); __unix_insert_socket(list, sk); out_unlock: @@ -1333,15 +1333,29 @@ restart: RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); otheru = unix_sk(other); - /* copy address information from listening to new sock*/ - if (otheru->addr) { - refcount_inc(&otheru->addr->refcnt); - newu->addr = otheru->addr; - } + /* copy address information from listening to new sock + * + * The contents of *(otheru->addr) and otheru->path + * are seen fully set up here, since we have found + * otheru in hash under unix_table_lock. Insertion + * into the hash chain we'd found it in had been done + * in an earlier critical area protected by unix_table_lock, + * the same one where we'd set *(otheru->addr) contents, + * as well as otheru->path and otheru->addr itself. + * + * Using smp_store_release() here to set newu->addr + * is enough to make those stores, as well as stores + * to newu->path visible to anyone who gets newu->addr + * by smp_load_acquire(). IOW, the same warranties + * as for unix_sock instances bound in unix_bind() or + * in unix_autobind(). + */ if (otheru->path.dentry) { path_get(&otheru->path); newu->path = otheru->path; } + refcount_inc(&otheru->addr->refcnt); + smp_store_release(&newu->addr, otheru->addr); /* Set credentials */ copy_peercred(sk, other); @@ -1455,7 +1469,7 @@ out: static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { struct sock *sk = sock->sk; - struct unix_sock *u; + struct unix_address *addr; DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); int err = 0; @@ -1470,19 +1484,15 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_ sock_hold(sk); } - u = unix_sk(sk); - unix_state_lock(sk); - if (!u->addr) { + addr = smp_load_acquire(&unix_sk(sk)->addr); + if (!addr) { sunaddr->sun_family = AF_UNIX; sunaddr->sun_path[0] = 0; *uaddr_len = sizeof(short); } else { - struct unix_address *addr = u->addr; - *uaddr_len = addr->len; memcpy(sunaddr, addr->name, *uaddr_len); } - unix_state_unlock(sk); sock_put(sk); out: return err; @@ -2075,11 +2085,11 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, static void unix_copy_addr(struct msghdr *msg, struct sock *sk) { - struct unix_sock *u = unix_sk(sk); + struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); - if (u->addr) { - msg->msg_namelen = u->addr->len; - memcpy(msg->msg_name, u->addr->name, u->addr->len); + if (addr) { + msg->msg_namelen = addr->len; + memcpy(msg->msg_name, addr->name, addr->len); } } @@ -2583,15 +2593,14 @@ static int unix_open_file(struct sock *sk) if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; - unix_state_lock(sk); - path = unix_sk(sk)->path; - if (!path.dentry) { - unix_state_unlock(sk); + if (!smp_load_acquire(&unix_sk(sk)->addr)) + return -ENOENT; + + path = unix_sk(sk)->path; + if (!path.dentry) return -ENOENT; - } path_get(&path); - unix_state_unlock(sk); fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) @@ -2831,7 +2840,7 @@ static int unix_seq_show(struct seq_file *seq, void *v) (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), sock_i_ino(s)); - if (u->addr) { + if (u->addr) { // under unix_table_lock here int i, len; seq_putc(seq, ' '); diff --git a/net/unix/diag.c b/net/unix/diag.c index 384c84e83462..3183d9b8ab33 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -10,7 +10,8 @@ static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) { - struct unix_address *addr = unix_sk(sk)->addr; + /* might or might not have unix_table_lock */ + struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); if (!addr) return 0; diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 67703dbe29ea..3a8916aa73c4 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -321,6 +321,7 @@ static void dump_common_audit_data(struct audit_buffer *ab, if (a->u.net->sk) { struct sock *sk = a->u.net->sk; struct unix_sock *u; + struct unix_address *addr; int len = 0; char *p = NULL; @@ -351,14 +352,15 @@ static void dump_common_audit_data(struct audit_buffer *ab, #endif case AF_UNIX: u = unix_sk(sk); + addr = smp_load_acquire(&u->addr); + if (!addr) + break; if (u->path.dentry) { audit_log_d_path(ab, " path=", &u->path); break; } - if (!u->addr) - break; - len = u->addr->len-sizeof(short); - p = &u->addr->name->sun_path[0]; + len = addr->len-sizeof(short); + p = &addr->name->sun_path[0]; audit_log_format(ab, " path="); if (*p) audit_log_untrustedstring(ab, p); From ddcc3253f8dd3dae3b131e319fe8217a687a2cd2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 20 Feb 2019 00:15:30 +0100 Subject: [PATCH 2267/2608] ipvlan: disallow userns cap_net_admin to change global mode/flags [ Upstream commit 7cc9f7003a969d359f608ebb701d42cafe75b84a ] When running Docker with userns isolation e.g. --userns-remap="default" and spawning up some containers with CAP_NET_ADMIN under this realm, I noticed that link changes on ipvlan slave device inside that container can affect all devices from this ipvlan group which are in other net namespaces where the container should have no permission to make changes to, such as the init netns, for example. This effectively allows to undo ipvlan private mode and switch globally to bridge mode where slaves can communicate directly without going through hostns, or it allows to switch between global operation mode (l2/l3/l3s) for everyone bound to the given ipvlan master device. libnetwork plugin here is creating an ipvlan master and ipvlan slave in hostns and a slave each that is moved into the container's netns upon creation event. * In hostns: # ip -d a [...] 8: cilium_host@bond0: mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000 link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 ipvlan mode l3 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 inet 10.41.0.1/32 scope link cilium_host valid_lft forever preferred_lft forever [...] * Spawn container & change ipvlan mode setting inside of it: # docker run -dt --cap-add=NET_ADMIN --network cilium-net --name client -l app=test cilium/netperf 9fff485d69dcb5ce37c9e33ca20a11ccafc236d690105aadbfb77e4f4170879c # docker exec -ti client ip -d a [...] 10: cilium0@if4: mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000 link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 ipvlan mode l3 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 inet 10.41.197.43/32 brd 10.41.197.43 scope global cilium0 valid_lft forever preferred_lft forever # docker exec -ti client ip link change link cilium0 name cilium0 type ipvlan mode l2 # docker exec -ti client ip -d a [...] 10: cilium0@if4: mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000 link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 ipvlan mode l2 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 inet 10.41.197.43/32 brd 10.41.197.43 scope global cilium0 valid_lft forever preferred_lft forever * In hostns (mode switched to l2): # ip -d a [...] 8: cilium_host@bond0: mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000 link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 ipvlan mode l2 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 inet 10.41.0.1/32 scope link cilium_host valid_lft forever preferred_lft forever [...] Same l3 -> l2 switch would also happen by creating another slave inside the container's network namespace when specifying the existing cilium0 link to derive the actual (bond0) master: # docker exec -ti client ip link add link cilium0 name cilium1 type ipvlan mode l2 # docker exec -ti client ip -d a [...] 2: cilium1@if4: mtu 1500 qdisc noop state DOWN group default qlen 1000 link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 ipvlan mode l2 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 10: cilium0@if4: mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000 link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 ipvlan mode l2 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 inet 10.41.197.43/32 brd 10.41.197.43 scope global cilium0 valid_lft forever preferred_lft forever * In hostns: # ip -d a [...] 8: cilium_host@bond0: mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000 link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 ipvlan mode l2 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 inet 10.41.0.1/32 scope link cilium_host valid_lft forever preferred_lft forever [...] One way to mitigate it is to check CAP_NET_ADMIN permissions of the ipvlan master device's ns, and only then allow to change mode or flags for all devices bound to it. Above two cases are then disallowed after the patch. Signed-off-by: Daniel Borkmann Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ipvlan/ipvlan_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 2222ed63d055..d629dddb0e89 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -482,7 +482,12 @@ static int ipvlan_nl_changelink(struct net_device *dev, struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); int err = 0; - if (data && data[IFLA_IPVLAN_MODE]) { + if (!data) + return 0; + if (!ns_capable(dev_net(ipvlan->phy_dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (data[IFLA_IPVLAN_MODE]) { u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); err = ipvlan_set_port_mode(port, nmode); @@ -551,6 +556,8 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev, struct ipvl_dev *tmp = netdev_priv(phy_dev); phy_dev = tmp->phy_dev; + if (!ns_capable(dev_net(phy_dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; } else if (!netif_is_ipvlan_port(phy_dev)) { err = ipvlan_port_create(phy_dev); if (err < 0) From 578aa457dad7831a420592890141cd6de6465006 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 15 Mar 2019 09:14:10 +0100 Subject: [PATCH 2268/2608] perf/x86: Fixup typo in stub functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f764c58b7faa26f5714e6907f892abc2bc0de4f8 upstream. Guenter reported a build warning for CONFIG_CPU_SUP_INTEL=n: > With allmodconfig-CONFIG_CPU_SUP_INTEL, this patch results in: > > In file included from arch/x86/events/amd/core.c:8:0: > arch/x86/events/amd/../perf_event.h:1036:45: warning: ‘struct cpu_hw_event’ declared inside parameter list will not be visible outside of this definition or declaration > static inline int intel_cpuc_prepare(struct cpu_hw_event *cpuc, int cpu) While harmless (an unsed pointer is an unused pointer, no matter the type) it needs fixing. Reported-by: Guenter Roeck Signed-off-by: Peter Zijlstra (Intel) Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: d01b1f96a82e ("perf/x86/intel: Make cpuc allocations consistent") Link: http://lkml.kernel.org/r/20190315081410.GR5996@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/perf_event.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 9702f4ed4748..84b3841c131d 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1021,12 +1021,12 @@ static inline int intel_pmu_init(void) return 0; } -static inline int intel_cpuc_prepare(struct cpu_hw_event *cpuc, int cpu) +static inline int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) { return 0; } -static inline void intel_cpuc_finish(struct cpu_hw_event *cpuc) +static inline void intel_cpuc_finish(struct cpu_hw_events *cpuc) { } From 18d48bd413830eb2df22d06d0573f499185d935c Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 26 Feb 2019 13:38:16 +0900 Subject: [PATCH 2269/2608] ALSA: bebob: use more identical mod_alias for Saffire Pro 10 I/O against Liquid Saffire 56 commit 7dc661bd8d3261053b69e4e2d0050cd1ee540fc1 upstream. ALSA bebob driver has an entry for Focusrite Saffire Pro 10 I/O. The entry matches vendor_id in root directory and model_id in unit directory of configuration ROM for IEEE 1394 bus. On the other hand, configuration ROM of Focusrite Liquid Saffire 56 has the same vendor_id and model_id. This device is an application of TCAT Dice (TCD2220 a.k.a Dice Jr.) however ALSA bebob driver can be bound to it randomly instead of ALSA dice driver. At present, drivers in ALSA firewire stack can not handle this situation appropriately. This commit uses more identical mod_alias for Focusrite Saffire Pro 10 I/O in ALSA bebob driver. $ python2 crpp < /sys/bus/firewire/devices/fw1/config_rom ROM header and bus information block ----------------------------------------------------------------- 400 042a829d bus_info_length 4, crc_length 42, crc 33437 404 31333934 bus_name "1394" 408 f0649222 irmc 1, cmc 1, isc 1, bmc 1, pmc 0, cyc_clk_acc 100, max_rec 9 (1024), max_rom 2, gen 2, spd 2 (S400) 40c 00130e01 company_id 00130e | 410 000606e0 device_id 01000606e0 | EUI-64 00130e01000606e0 root directory ----------------------------------------------------------------- 414 0009d31c directory_length 9, crc 54044 418 04000014 hardware version 41c 0c0083c0 node capabilities per IEEE 1394 420 0300130e vendor 424 81000012 --> descriptor leaf at 46c 428 17000006 model 42c 81000016 --> descriptor leaf at 484 430 130120c2 version 434 d1000002 --> unit directory at 43c 438 d4000006 --> dependent info directory at 450 unit directory at 43c ----------------------------------------------------------------- 43c 0004707c directory_length 4, crc 28796 440 1200a02d specifier id: 1394 TA 444 13010001 version: AV/C 448 17000006 model 44c 81000013 --> descriptor leaf at 498 dependent info directory at 450 ----------------------------------------------------------------- 450 000637c7 directory_length 6, crc 14279 454 120007f5 specifier id 458 13000001 version 45c 3affffc7 (immediate value) 460 3b100000 (immediate value) 464 3cffffc7 (immediate value) 468 3d600000 (immediate value) descriptor leaf at 46c ----------------------------------------------------------------- 46c 00056f3b leaf_length 5, crc 28475 470 00000000 textual descriptor 474 00000000 minimal ASCII 478 466f6375 "Focu" 47c 73726974 "srit" 480 65000000 "e" descriptor leaf at 484 ----------------------------------------------------------------- 484 0004a165 leaf_length 4, crc 41317 488 00000000 textual descriptor 48c 00000000 minimal ASCII 490 50726f31 "Pro1" 494 30494f00 "0IO" descriptor leaf at 498 ----------------------------------------------------------------- 498 0004a165 leaf_length 4, crc 41317 49c 00000000 textual descriptor 4a0 00000000 minimal ASCII 4a4 50726f31 "Pro1" 4a8 30494f00 "0IO" $ python2 crpp < /sys/bus/firewire/devices/fw1/config_rom ROM header and bus information block ----------------------------------------------------------------- 400 040442e4 bus_info_length 4, crc_length 4, crc 17124 404 31333934 bus_name "1394" 408 e0ff8112 irmc 1, cmc 1, isc 1, bmc 0, pmc 0, cyc_clk_acc 255, max_rec 8 (512), max_rom 1, gen 1, spd 2 (S400) 40c 00130e04 company_id 00130e | 410 018001e9 device_id 04018001e9 | EUI-64 00130e04018001e9 root directory ----------------------------------------------------------------- 414 00065612 directory_length 6, crc 22034 418 0300130e vendor 41c 8100000a --> descriptor leaf at 444 420 17000006 model 424 8100000e --> descriptor leaf at 45c 428 0c0087c0 node capabilities per IEEE 1394 42c d1000001 --> unit directory at 430 unit directory at 430 ----------------------------------------------------------------- 430 000418a0 directory_length 4, crc 6304 434 1200130e specifier id 438 13000001 version 43c 17000006 model 440 8100000f --> descriptor leaf at 47c descriptor leaf at 444 ----------------------------------------------------------------- 444 00056f3b leaf_length 5, crc 28475 448 00000000 textual descriptor 44c 00000000 minimal ASCII 450 466f6375 "Focu" 454 73726974 "srit" 458 65000000 "e" descriptor leaf at 45c ----------------------------------------------------------------- 45c 000762c6 leaf_length 7, crc 25286 460 00000000 textual descriptor 464 00000000 minimal ASCII 468 4c495155 "LIQU" 46c 49445f53 "ID_S" 470 41464649 "AFFI" 474 52455f35 "RE_5" 478 36000000 "6" descriptor leaf at 47c ----------------------------------------------------------------- 47c 000762c6 leaf_length 7, crc 25286 480 00000000 textual descriptor 484 00000000 minimal ASCII 488 4c495155 "LIQU" 48c 49445f53 "ID_S" 490 41464649 "AFFI" 494 52455f35 "RE_5" 498 36000000 "6" Cc: # v3.16+ Fixes: 25784ec2d034 ("ALSA: bebob: Add support for Focusrite Saffire/SaffirePro series") Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/bebob/bebob.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sound/firewire/bebob/bebob.c b/sound/firewire/bebob/bebob.c index de4af8a41ff0..5636e89ce5c7 100644 --- a/sound/firewire/bebob/bebob.c +++ b/sound/firewire/bebob/bebob.c @@ -474,7 +474,19 @@ static const struct ieee1394_device_id bebob_id_table[] = { /* Focusrite, SaffirePro 26 I/O */ SND_BEBOB_DEV_ENTRY(VEN_FOCUSRITE, 0x00000003, &saffirepro_26_spec), /* Focusrite, SaffirePro 10 I/O */ - SND_BEBOB_DEV_ENTRY(VEN_FOCUSRITE, 0x00000006, &saffirepro_10_spec), + { + // The combination of vendor_id and model_id is the same as the + // same as the one of Liquid Saffire 56. + .match_flags = IEEE1394_MATCH_VENDOR_ID | + IEEE1394_MATCH_MODEL_ID | + IEEE1394_MATCH_SPECIFIER_ID | + IEEE1394_MATCH_VERSION, + .vendor_id = VEN_FOCUSRITE, + .model_id = 0x000006, + .specifier_id = 0x00a02d, + .version = 0x010001, + .driver_data = (kernel_ulong_t)&saffirepro_10_spec, + }, /* Focusrite, Saffire(no label and LE) */ SND_BEBOB_DEV_ENTRY(VEN_FOCUSRITE, MODEL_FOCUSRITE_SAFFIRE_BOTH, &saffire_spec), From 34cdfe7845089d2762f7439825e582887994ecf5 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 26 Feb 2019 13:38:37 +0900 Subject: [PATCH 2270/2608] ALSA: firewire-motu: fix construction of PCM frame for capture direction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f97a0944a72b26a2bece72516294e112a890f98a upstream. In data blocks of common isochronous packet for MOTU devices, PCM frames are multiplexed in a shape of '24 bit * 4 Audio Pack', described in IEC 61883-6. The frames are not aligned to quadlet. For capture PCM substream, ALSA firewire-motu driver constructs PCM frames by reading data blocks byte-by-byte. However this operation includes bug for lower byte of the PCM sample. This brings invalid content of the PCM samples. This commit fixes the bug. Reported-by: Peter Sjöberg Cc: # v4.12+ Fixes: 4641c9394010 ("ALSA: firewire-motu: add MOTU specific protocol layer") Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/motu/amdtp-motu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/firewire/motu/amdtp-motu.c b/sound/firewire/motu/amdtp-motu.c index 96f0091144bb..2cf18bedb91e 100644 --- a/sound/firewire/motu/amdtp-motu.c +++ b/sound/firewire/motu/amdtp-motu.c @@ -136,7 +136,9 @@ static void read_pcm_s32(struct amdtp_stream *s, byte = (u8 *)buffer + p->pcm_byte_offset; for (c = 0; c < channels; ++c) { - *dst = (byte[0] << 24) | (byte[1] << 16) | byte[2]; + *dst = (byte[0] << 24) | + (byte[1] << 16) | + (byte[2] << 8); byte += 3; dst++; } From 2a003d3333f2cb41ced9183506397ccb6a0571eb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Mar 2019 14:01:14 +0100 Subject: [PATCH 2271/2608] perf/x86/intel: Fix memory corruption commit ede271b059463731cbd6dffe55ffd70d7dbe8392 upstream. Through: validate_event() x86_pmu.get_event_constraints(.idx=-1) tfa_get_event_constraints() dyn_constraint() cpuc->constraint_list[-1] is used, which is an obvious out-of-bound access. In this case, simply skip the TFA constraint code, there is no event constraint with just PMC3, therefore the code will never result in the empty set. Fixes: 400816f60c54 ("perf/x86/intel: Implement support for TSX Force Abort") Reported-by: Tony Jones Reported-by: "DSouza, Nelson" Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Tony Jones Tested-by: "DSouza, Nelson" Cc: eranian@google.com Cc: jolsa@redhat.com Cc: stable@kernel.org Link: https://lkml.kernel.org/r/20190314130705.441549378@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 65a369a42338..ade4773375bc 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3262,7 +3262,7 @@ tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx, /* * Without TFA we must not use PMC3. */ - if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) { + if (!allow_tsx_force_abort && test_bit(3, c->idxmsk) && idx >= 0) { c = dyn_constraint(cpuc, c, idx); c->idxmsk64 &= ~(1ULL << 3); c->weight--; From 94b2a2c534f5a01fb8e93857cf376528c599502c Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Thu, 14 Mar 2019 02:42:43 +0800 Subject: [PATCH 2272/2608] perf/x86/intel: Make dev_attr_allow_tsx_force_abort static commit c634dc6bdedeb0b2c750fc611612618a85639ab2 upstream. Fixes: 400816f60c54 ("perf/x86/intel: Implement support for TSX Force Abort") Signed-off-by: kbuild test robot Signed-off-by: Thomas Gleixner Cc: "Peter Zijlstra (Intel)" Cc: kbuild-all@01.org Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Kan Liang Cc: Jiri Olsa Cc: Andi Kleen Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190313184243.GA10820@lkp-sb-ep06 Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index ade4773375bc..dc8f8b3e6cec 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3968,7 +3968,7 @@ static struct attribute *intel_pmu_caps_attrs[] = { NULL }; -DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort); +static DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort); static struct attribute *intel_pmu_attrs[] = { &dev_attr_freeze_on_smi.attr, From c9794e6820ad92bc0fa47ac6a99f63f6bb477225 Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Fri, 8 Mar 2019 23:52:05 +0800 Subject: [PATCH 2273/2608] It's wrong to add len to sector_nr in raid10 reshape twice commit b761dcf1217760a42f7897c31dcb649f59b2333e upstream. In reshape_request it already adds len to sector_nr already. It's wrong to add len to sector_nr again after adding pages to bio. If there is bad block it can't copy one chunk at a time, it needs to goto read_more. Now the sector_nr is wrong. It can cause data corruption. Cc: stable@vger.kernel.org # v3.16+ Signed-off-by: Xiao Ni Signed-off-by: Song Liu Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 2ce079a0b0bd..ed1b7bf1ec0e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4495,7 +4495,6 @@ read_more: atomic_inc(&r10_bio->remaining); read_bio->bi_next = NULL; generic_make_request(read_bio); - sector_nr += nr_sectors; sectors_done += nr_sectors; if (sector_nr <= last) goto read_more; From b384efc1fbb944255c096b9372162de4b4c2ceb7 Mon Sep 17 00:00:00 2001 From: Zha Bin Date: Tue, 8 Jan 2019 16:07:03 +0800 Subject: [PATCH 2274/2608] vhost/vsock: fix vhost vsock cid hashing inconsistent commit 7fbe078c37aba3088359c9256c1a1d0c3e39ee81 upstream. The vsock core only supports 32bit CID, but the Virtio-vsock spec define CID (dst_cid and src_cid) as u64 and the upper 32bits is reserved as zero. This inconsistency causes one bug in vhost vsock driver. The scenarios is: 0. A hash table (vhost_vsock_hash) is used to map an CID to a vsock object. And hash_min() is used to compute the hash key. hash_min() is defined as: (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits)). That means the hash algorithm has dependency on the size of macro argument 'val'. 0. In function vhost_vsock_set_cid(), a 64bit CID is passed to hash_min() to compute the hash key when inserting a vsock object into the hash table. 0. In function vhost_vsock_get(), a 32bit CID is passed to hash_min() to compute the hash key when looking up a vsock for an CID. Because the different size of the CID, hash_min() returns different hash key, thus fails to look up the vsock object for an CID. To fix this bug, we keep CID as u64 in the IOCTLs and virtio message headers, but explicitly convert u64 to u32 when deal with the hash table and vsock core. Fixes: 834e772c8db0 ("vhost/vsock: fix use-after-free in network stack callers") Link: https://github.com/stefanha/virtio/blob/vsock/trunk/content.tex Signed-off-by: Zha Bin Reviewed-by: Liu Jiang Reviewed-by: Stefan Hajnoczi Acked-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Shengjing Zhu Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 831758335e2c..d0cf3d5aa570 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -642,7 +642,7 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) hash_del_rcu(&vsock->hash); vsock->guest_cid = guest_cid; - hash_add_rcu(vhost_vsock_hash, &vsock->hash, guest_cid); + hash_add_rcu(vhost_vsock_hash, &vsock->hash, vsock->guest_cid); spin_unlock_bh(&vhost_vsock_lock); return 0; From 5726a8d0f1958af80ad8e514bc2c18d213e739b7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 19 Mar 2019 13:13:25 +0100 Subject: [PATCH 2275/2608] Linux 4.14.107 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ecc3a2a82a49..e3e2121718a8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 106 +SUBLEVEL = 107 EXTRAVERSION = NAME = Petit Gorille From be0b155c2b2f8c39adaa1641f3f6a4be47277993 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Thu, 24 Jan 2019 14:35:13 +0800 Subject: [PATCH 2276/2608] 9p: use inode->i_lock to protect i_size_write() under 32-bit commit 5e3cc1ee1405a7eb3487ed24f786dec01b4cbe1f upstream. Use inode->i_lock to protect i_size_write(), else i_size_read() in generic_fillattr() may loop infinitely in read_seqcount_begin() when multiple processes invoke v9fs_vfs_getattr() or v9fs_vfs_getattr_dotl() simultaneously under 32-bit SMP environment, and a soft lockup will be triggered as show below: watchdog: BUG: soft lockup - CPU#5 stuck for 22s! [stat:2217] Modules linked in: CPU: 5 PID: 2217 Comm: stat Not tainted 5.0.0-rc1-00005-g7f702faf5a9e #4 Hardware name: Generic DT based system PC is at generic_fillattr+0x104/0x108 LR is at 0xec497f00 pc : [<802b8898>] lr : [] psr: 200c0013 sp : ec497e20 ip : ed608030 fp : ec497e3c r10: 00000000 r9 : ec497f00 r8 : ed608030 r7 : ec497ebc r6 : ec497f00 r5 : ee5c1550 r4 : ee005780 r3 : 0000052d r2 : 00000000 r1 : ec497f00 r0 : ed608030 Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: ac48006a DAC: 00000051 CPU: 5 PID: 2217 Comm: stat Not tainted 5.0.0-rc1-00005-g7f702faf5a9e #4 Hardware name: Generic DT based system Backtrace: [<8010d974>] (dump_backtrace) from [<8010dc88>] (show_stack+0x20/0x24) [<8010dc68>] (show_stack) from [<80a1d194>] (dump_stack+0xb0/0xdc) [<80a1d0e4>] (dump_stack) from [<80109f34>] (show_regs+0x1c/0x20) [<80109f18>] (show_regs) from [<801d0a80>] (watchdog_timer_fn+0x280/0x2f8) [<801d0800>] (watchdog_timer_fn) from [<80198658>] (__hrtimer_run_queues+0x18c/0x380) [<801984cc>] (__hrtimer_run_queues) from [<80198e60>] (hrtimer_run_queues+0xb8/0xf0) [<80198da8>] (hrtimer_run_queues) from [<801973e8>] (run_local_timers+0x28/0x64) [<801973c0>] (run_local_timers) from [<80197460>] (update_process_times+0x3c/0x6c) [<80197424>] (update_process_times) from [<801ab2b8>] (tick_nohz_handler+0xe0/0x1bc) [<801ab1d8>] (tick_nohz_handler) from [<80843050>] (arch_timer_handler_virt+0x38/0x48) [<80843018>] (arch_timer_handler_virt) from [<80180a64>] (handle_percpu_devid_irq+0x8c/0x240) [<801809d8>] (handle_percpu_devid_irq) from [<8017ac20>] (generic_handle_irq+0x34/0x44) [<8017abec>] (generic_handle_irq) from [<8017b344>] (__handle_domain_irq+0x6c/0xc4) [<8017b2d8>] (__handle_domain_irq) from [<801022e0>] (gic_handle_irq+0x4c/0x88) [<80102294>] (gic_handle_irq) from [<80101a30>] (__irq_svc+0x70/0x98) [<802b8794>] (generic_fillattr) from [<8056b284>] (v9fs_vfs_getattr_dotl+0x74/0xa4) [<8056b210>] (v9fs_vfs_getattr_dotl) from [<802b8904>] (vfs_getattr_nosec+0x68/0x7c) [<802b889c>] (vfs_getattr_nosec) from [<802b895c>] (vfs_getattr+0x44/0x48) [<802b8918>] (vfs_getattr) from [<802b8a74>] (vfs_statx+0x9c/0xec) [<802b89d8>] (vfs_statx) from [<802b9428>] (sys_lstat64+0x48/0x78) [<802b93e0>] (sys_lstat64) from [<80101000>] (ret_fast_syscall+0x0/0x28) [dominique.martinet@cea.fr: updated comment to not refer to a function in another subsystem] Link: http://lkml.kernel.org/r/20190124063514.8571-2-houtao1@huawei.com Cc: stable@vger.kernel.org Fixes: 7549ae3e81cc ("9p: Use the i_size_[read, write]() macros instead of using inode->i_size directly.") Reported-by: Xing Gaopeng Signed-off-by: Hou Tao Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- fs/9p/v9fs_vfs.h | 23 +++++++++++++++++++++-- fs/9p/vfs_file.c | 6 +++++- fs/9p/vfs_inode.c | 23 +++++++++++------------ fs/9p/vfs_inode_dotl.c | 27 ++++++++++++++------------- fs/9p/vfs_super.c | 4 ++-- 5 files changed, 53 insertions(+), 30 deletions(-) diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 5a0db6dec8d1..aaee1e6584e6 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -40,6 +40,9 @@ */ #define P9_LOCK_TIMEOUT (30*HZ) +/* flags for v9fs_stat2inode() & v9fs_stat2inode_dotl() */ +#define V9FS_STAT2INODE_KEEP_ISIZE 1 + extern struct file_system_type v9fs_fs_type; extern const struct address_space_operations v9fs_addr_operations; extern const struct file_operations v9fs_file_operations; @@ -61,8 +64,10 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, struct inode *inode, umode_t mode, dev_t); void v9fs_evict_inode(struct inode *inode); ino_t v9fs_qid2ino(struct p9_qid *qid); -void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); -void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *); +void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, + struct super_block *sb, unsigned int flags); +void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, + unsigned int flags); int v9fs_dir_release(struct inode *inode, struct file *filp); int v9fs_file_open(struct inode *inode, struct file *file); void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat); @@ -83,4 +88,18 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode) } int v9fs_open_to_dotl_flags(int flags); + +static inline void v9fs_i_size_write(struct inode *inode, loff_t i_size) +{ + /* + * 32-bit need the lock, concurrent updates could break the + * sequences and make i_size_read() loop forever. + * 64-bit updates are atomic and can skip the locking. + */ + if (sizeof(i_size) > sizeof(long)) + spin_lock(&inode->i_lock); + i_size_write(inode, i_size); + if (sizeof(i_size) > sizeof(long)) + spin_unlock(&inode->i_lock); +} #endif diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 3a2f37ad1f89..af8cac975a74 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -442,7 +442,11 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) i_size = i_size_read(inode); if (iocb->ki_pos > i_size) { inode_add_bytes(inode, iocb->ki_pos - i_size); - i_size_write(inode, iocb->ki_pos); + /* + * Need to serialize against i_size_write() in + * v9fs_stat2inode() + */ + v9fs_i_size_write(inode, iocb->ki_pos); } return retval; } diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index bdabb2765d1b..e88cb25176dc 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -538,7 +538,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, if (retval) goto error; - v9fs_stat2inode(st, inode, sb); + v9fs_stat2inode(st, inode, sb, 0); v9fs_cache_inode_get_cookie(inode); unlock_new_inode(inode); return inode; @@ -1080,7 +1080,7 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat, if (IS_ERR(st)) return PTR_ERR(st); - v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb); + v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0); generic_fillattr(d_inode(dentry), stat); p9stat_free(st); @@ -1158,12 +1158,13 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) * @stat: Plan 9 metadata (mistat) structure * @inode: inode to populate * @sb: superblock of filesystem + * @flags: control flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE) * */ void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, - struct super_block *sb) + struct super_block *sb, unsigned int flags) { umode_t mode; char ext[32]; @@ -1204,10 +1205,11 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, mode = p9mode2perm(v9ses, stat); mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; - i_size_write(inode, stat->length); + if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) + v9fs_i_size_write(inode, stat->length); /* not real number of blocks, but 512 byte ones ... */ - inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; + inode->i_blocks = (stat->length + 512 - 1) >> 9; v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR; } @@ -1404,9 +1406,9 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) { int umode; dev_t rdev; - loff_t i_size; struct p9_wstat *st; struct v9fs_session_info *v9ses; + unsigned int flags; v9ses = v9fs_inode2v9ses(inode); st = p9_client_stat(fid); @@ -1419,16 +1421,13 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) goto out; - spin_lock(&inode->i_lock); /* * We don't want to refresh inode->i_size, * because we may have cached data */ - i_size = inode->i_size; - v9fs_stat2inode(st, inode, inode->i_sb); - if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) - inode->i_size = i_size; - spin_unlock(&inode->i_lock); + flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ? + V9FS_STAT2INODE_KEEP_ISIZE : 0; + v9fs_stat2inode(st, inode, inode->i_sb, flags); out: p9stat_free(st); kfree(st); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 7f6ae21a27b3..3446ab1f44e7 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -143,7 +143,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, if (retval) goto error; - v9fs_stat2inode_dotl(st, inode); + v9fs_stat2inode_dotl(st, inode, 0); v9fs_cache_inode_get_cookie(inode); retval = v9fs_get_acl(inode, fid); if (retval) @@ -497,7 +497,7 @@ v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat, if (IS_ERR(st)) return PTR_ERR(st); - v9fs_stat2inode_dotl(st, d_inode(dentry)); + v9fs_stat2inode_dotl(st, d_inode(dentry), 0); generic_fillattr(d_inode(dentry), stat); /* Change block size to what the server returned */ stat->blksize = st->st_blksize; @@ -608,11 +608,13 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) * v9fs_stat2inode_dotl - populate an inode structure with stat info * @stat: stat structure * @inode: inode to populate + * @flags: ctrl flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE) * */ void -v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) +v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, + unsigned int flags) { umode_t mode; struct v9fs_inode *v9inode = V9FS_I(inode); @@ -632,7 +634,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; - i_size_write(inode, stat->st_size); + if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) + v9fs_i_size_write(inode, stat->st_size); inode->i_blocks = stat->st_blocks; } else { if (stat->st_result_mask & P9_STATS_ATIME) { @@ -662,8 +665,9 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) } if (stat->st_result_mask & P9_STATS_RDEV) inode->i_rdev = new_decode_dev(stat->st_rdev); - if (stat->st_result_mask & P9_STATS_SIZE) - i_size_write(inode, stat->st_size); + if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) && + stat->st_result_mask & P9_STATS_SIZE) + v9fs_i_size_write(inode, stat->st_size); if (stat->st_result_mask & P9_STATS_BLOCKS) inode->i_blocks = stat->st_blocks; } @@ -929,9 +933,9 @@ v9fs_vfs_get_link_dotl(struct dentry *dentry, int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) { - loff_t i_size; struct p9_stat_dotl *st; struct v9fs_session_info *v9ses; + unsigned int flags; v9ses = v9fs_inode2v9ses(inode); st = p9_client_getattr_dotl(fid, P9_STATS_ALL); @@ -943,16 +947,13 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT)) goto out; - spin_lock(&inode->i_lock); /* * We don't want to refresh inode->i_size, * because we may have cached data */ - i_size = inode->i_size; - v9fs_stat2inode_dotl(st, inode); - if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) - inode->i_size = i_size; - spin_unlock(&inode->i_lock); + flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ? + V9FS_STAT2INODE_KEEP_ISIZE : 0; + v9fs_stat2inode_dotl(st, inode, flags); out: kfree(st); return 0; diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 8b75463cb211..d4400779f6d9 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -172,7 +172,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, goto release_sb; } d_inode(root)->i_ino = v9fs_qid2ino(&st->qid); - v9fs_stat2inode_dotl(st, d_inode(root)); + v9fs_stat2inode_dotl(st, d_inode(root), 0); kfree(st); } else { struct p9_wstat *st = NULL; @@ -183,7 +183,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, } d_inode(root)->i_ino = v9fs_qid2ino(&st->qid); - v9fs_stat2inode(st, d_inode(root), sb); + v9fs_stat2inode(st, d_inode(root), sb, 0); p9stat_free(st); kfree(st); From 1cbd3417c43ab9eab43a0fe909fcd2e4b38668f2 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Wed, 13 Mar 2019 16:01:37 +0800 Subject: [PATCH 2277/2608] 9p/net: fix memory leak in p9_client_create commit bb06c388fa20ae24cfe80c52488de718a7e3a53f upstream. If msize is less than 4096, we should close and put trans, destroy tagpool, not just free client. This patch fixes that. Link: http://lkml.kernel.org/m/1552464097-142659-1-git-send-email-zhengbin13@huawei.com Cc: stable@vger.kernel.org Fixes: 574d356b7a02 ("9p/net: put a lower bound on msize") Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- net/9p/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/9p/client.c b/net/9p/client.c index ef0f8fe3ac08..6a6b290574a1 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1082,7 +1082,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) p9_debug(P9_DEBUG_ERROR, "Please specify a msize of at least 4k\n"); err = -EINVAL; - goto free_client; + goto close_trans; } err = p9_client_version(clnt); From fb7c62a5efff55ee026301ebe1e8987c92e38237 Mon Sep 17 00:00:00 2001 From: "S.j. Wang" Date: Mon, 18 Feb 2019 08:29:11 +0000 Subject: [PATCH 2278/2608] ASoC: fsl_esai: fix register setting issue in RIGHT_J mode commit cc29ea007347f39f4c5a4d27b0b555955a0277f9 upstream. The ESAI_xCR_xWA is xCR's bit, not the xCCR's bit, driver set it to wrong register, correct it. Fixes 43d24e76b698 ("ASoC: fsl_esai: Add ESAI CPU DAI driver") Cc: Signed-off-by: Shengjiu Wang Reviewed-by: Fabio Estevam Ackedy-by: Nicolin Chen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/fsl/fsl_esai.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c index 81268760b7a9..a23d6a821ff3 100644 --- a/sound/soc/fsl/fsl_esai.c +++ b/sound/soc/fsl/fsl_esai.c @@ -395,7 +395,8 @@ static int fsl_esai_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) break; case SND_SOC_DAIFMT_RIGHT_J: /* Data on rising edge of bclk, frame high, right aligned */ - xccr |= ESAI_xCCR_xCKP | ESAI_xCCR_xHCKP | ESAI_xCR_xWA; + xccr |= ESAI_xCCR_xCKP | ESAI_xCCR_xHCKP; + xcr |= ESAI_xCR_xWA; break; case SND_SOC_DAIFMT_DSP_A: /* Data on rising edge of bclk, frame high, 1clk before data */ @@ -452,12 +453,12 @@ static int fsl_esai_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } - mask = ESAI_xCR_xFSL | ESAI_xCR_xFSR; + mask = ESAI_xCR_xFSL | ESAI_xCR_xFSR | ESAI_xCR_xWA; regmap_update_bits(esai_priv->regmap, REG_ESAI_TCR, mask, xcr); regmap_update_bits(esai_priv->regmap, REG_ESAI_RCR, mask, xcr); mask = ESAI_xCCR_xCKP | ESAI_xCCR_xHCKP | ESAI_xCCR_xFSP | - ESAI_xCCR_xFSD | ESAI_xCCR_xCKD | ESAI_xCR_xWA; + ESAI_xCCR_xFSD | ESAI_xCCR_xCKD; regmap_update_bits(esai_priv->regmap, REG_ESAI_TCCR, mask, xccr); regmap_update_bits(esai_priv->regmap, REG_ESAI_RCCR, mask, xccr); From 51f2d1adaa3bceb4ee624b470d62be24f724be36 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 9 Feb 2019 00:39:27 +0100 Subject: [PATCH 2279/2608] iio: adc: exynos-adc: Fix NULL pointer exception on unbind commit 2ea8bab4dd2a9014e723b28091831fa850b82d83 upstream. Fix NULL pointer exception on device unbind when device tree does not contain "has-touchscreen" property. In such case the input device is not registered so it should not be unregistered. $ echo "12d10000.adc" > /sys/bus/platform/drivers/exynos-adc/unbind Unable to handle kernel NULL pointer dereference at virtual address 00000474 ... (input_unregister_device) from [] (exynos_adc_remove+0x20/0x80) (exynos_adc_remove) from [] (platform_drv_remove+0x20/0x40) (platform_drv_remove) from [] (device_release_driver_internal+0xdc/0x1ac) (device_release_driver_internal) from [] (unbind_store+0x60/0xd4) (unbind_store) from [] (kernfs_fop_write+0x100/0x1e0) (kernfs_fop_write) from [] (__vfs_write+0x2c/0x17c) (__vfs_write) from [] (vfs_write+0xa4/0x184) (vfs_write) from [] (ksys_write+0x4c/0xac) (ksys_write) from [] (ret_fast_syscall+0x0/0x28) Fixes: 2bb8ad9b44c5 ("iio: exynos-adc: add experimental touchscreen support") Cc: Signed-off-by: Krzysztof Kozlowski Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/exynos_adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/exynos_adc.c b/drivers/iio/adc/exynos_adc.c index 6c5a7be9f8c1..019153882e70 100644 --- a/drivers/iio/adc/exynos_adc.c +++ b/drivers/iio/adc/exynos_adc.c @@ -916,7 +916,7 @@ static int exynos_adc_remove(struct platform_device *pdev) struct iio_dev *indio_dev = platform_get_drvdata(pdev); struct exynos_adc *info = iio_priv(indio_dev); - if (IS_REACHABLE(CONFIG_INPUT)) { + if (IS_REACHABLE(CONFIG_INPUT) && info->input) { free_irq(info->tsirq, info); input_unregister_device(info->input); } From 1a839058789a5e124d7f2bd5a1b7fccc7445af14 Mon Sep 17 00:00:00 2001 From: Zhi Jin Date: Thu, 6 Sep 2018 15:22:10 +0800 Subject: [PATCH 2280/2608] stm class: Fix an endless loop in channel allocation commit a1d75dad3a2c689e70a1c4e0214cca9de741d0aa upstream. There is a bug in the channel allocation logic that leads to an endless loop when looking for a contiguous range of channels in a range with a mixture of free and occupied channels. For example, opening three consequtive channels, closing the first two and requesting 4 channels in a row will trigger this soft lockup. The bug is that the search loop forgets to skip over the range once it detects that one channel in that range is occupied. Restore the original intent to the logic by fixing the omission. Signed-off-by: Zhi Jin Signed-off-by: Alexander Shishkin Fixes: 7bd1d4093c2f ("stm class: Introduce an abstraction for System Trace Module devices") CC: stable@vger.kernel.org # v4.4+ Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/stm/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c index 736862967e32..88a79b45b80c 100644 --- a/drivers/hwtracing/stm/core.c +++ b/drivers/hwtracing/stm/core.c @@ -252,6 +252,9 @@ static int find_free_channels(unsigned long *bitmap, unsigned int start, ; if (i == width) return pos; + + /* step over [pos..pos+i) to continue search */ + pos += i; } return -1; From 68a4425031bce034fcd987495d6ecc8c0d15faa7 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Fri, 1 Feb 2019 07:18:20 +0000 Subject: [PATCH 2281/2608] crypto: caam - fixed handling of sg list commit 42e95d1f10dcf8b18b1d7f52f7068985b3dc5b79 upstream. when the source sg contains more than 1 fragment and destination sg contains 1 fragment, the caam driver mishandle the buffers to be sent to caam. Fixes: f2147b88b2b1 ("crypto: caam - Convert GCM to new AEAD interface") Cc: # 4.2+ Signed-off-by: Pankaj Gupta Signed-off-by: Arun Pathak Reviewed-by: Horia Geanta Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/caamalg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 43fe195f6dca..63a21a6fc6cf 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -1097,6 +1097,7 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr, } else { if (edesc->dst_nents == 1) { dst_dma = sg_dma_address(req->dst); + out_options = 0; } else { dst_dma = edesc->sec4_sg_dma + (edesc->src_nents + 1) * sizeof(struct sec4_sg_entry); From 09024fed4e52b7cfef8721584c4d8658f65fa151 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 31 Jan 2019 23:51:41 -0800 Subject: [PATCH 2282/2608] crypto: ahash - fix another early termination in hash walk commit 77568e535af7c4f97eaef1e555bf0af83772456c upstream. Hash algorithms with an alignmask set, e.g. "xcbc(aes-aesni)" and "michael_mic", fail the improved hash tests because they sometimes produce the wrong digest. The bug is that in the case where a scatterlist element crosses pages, not all the data is actually hashed because the scatterlist walk terminates too early. This happens because the 'nbytes' variable in crypto_hash_walk_done() is assigned the number of bytes remaining in the page, then later interpreted as the number of bytes remaining in the scatterlist element. Fix it. Fixes: 900a081f6912 ("crypto: ahash - Fix early termination in hash walk") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ahash.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index 3980e9e45289..5bec16ebbf8b 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -86,17 +86,17 @@ static int hash_walk_new_entry(struct crypto_hash_walk *walk) int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err) { unsigned int alignmask = walk->alignmask; - unsigned int nbytes = walk->entrylen; walk->data -= walk->offset; - if (nbytes && walk->offset & alignmask && !err) { - walk->offset = ALIGN(walk->offset, alignmask + 1); - nbytes = min(nbytes, - ((unsigned int)(PAGE_SIZE)) - walk->offset); - walk->entrylen -= nbytes; + if (walk->entrylen && (walk->offset & alignmask) && !err) { + unsigned int nbytes; + walk->offset = ALIGN(walk->offset, alignmask + 1); + nbytes = min(walk->entrylen, + (unsigned int)(PAGE_SIZE - walk->offset)); if (nbytes) { + walk->entrylen -= nbytes; walk->data += walk->offset; return nbytes; } @@ -116,7 +116,7 @@ int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err) if (err) return err; - if (nbytes) { + if (walk->entrylen) { walk->offset = 0; walk->pg++; return hash_walk_next(walk); From 216445c55c31ba09afb722b4bcd62a3470196024 Mon Sep 17 00:00:00 2001 From: Zhang Zhijie Date: Wed, 13 Feb 2019 16:24:38 +0800 Subject: [PATCH 2283/2608] crypto: rockchip - fix scatterlist nents error commit 4359669a087633132203c52d67dd8c31e09e7b2e upstream. In some cases, the nents of src scatterlist is different from dst scatterlist. So two variables are used to handle the nents of src&dst scatterlist. Reported-by: Eric Biggers Fixes: 433cd2c617bf ("crypto: rockchip - add crypto driver for rk3288") Cc: # v4.5+ Signed-off-by: Zhang Zhijie Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/rockchip/rk3288_crypto.c | 2 +- drivers/crypto/rockchip/rk3288_crypto.h | 3 ++- drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c | 5 +++-- drivers/crypto/rockchip/rk3288_crypto_ahash.c | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index c9d622abd90c..0ce4a65b95f5 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -119,7 +119,7 @@ static int rk_load_data(struct rk_crypto_info *dev, count = (dev->left_bytes > PAGE_SIZE) ? PAGE_SIZE : dev->left_bytes; - if (!sg_pcopy_to_buffer(dev->first, dev->nents, + if (!sg_pcopy_to_buffer(dev->first, dev->src_nents, dev->addr_vir, count, dev->total - dev->left_bytes)) { dev_err(dev->dev, "[%s:%d] pcopy err\n", diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index d5fb4013fb42..417c445d8dea 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -207,7 +207,8 @@ struct rk_crypto_info { void *addr_vir; int aligned; int align_size; - size_t nents; + size_t src_nents; + size_t dst_nents; unsigned int total; unsigned int count; dma_addr_t addr_in; diff --git a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c index 639c15c5364b..ea5a6da05496 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c @@ -260,8 +260,9 @@ static int rk_ablk_start(struct rk_crypto_info *dev) dev->total = req->nbytes; dev->sg_src = req->src; dev->first = req->src; - dev->nents = sg_nents(req->src); + dev->src_nents = sg_nents(req->src); dev->sg_dst = req->dst; + dev->dst_nents = sg_nents(req->dst); dev->aligned = 1; spin_lock_irqsave(&dev->lock, flags); @@ -297,7 +298,7 @@ static int rk_ablk_rx(struct rk_crypto_info *dev) dev->unload_data(dev); if (!dev->aligned) { - if (!sg_pcopy_from_buffer(req->dst, dev->nents, + if (!sg_pcopy_from_buffer(req->dst, dev->dst_nents, dev->addr_vir, dev->count, dev->total - dev->left_bytes - dev->count)) { diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index 821a506b9e17..c336ae75e361 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -206,7 +206,7 @@ static int rk_ahash_start(struct rk_crypto_info *dev) dev->sg_dst = NULL; dev->sg_src = req->src; dev->first = req->src; - dev->nents = sg_nents(req->src); + dev->src_nents = sg_nents(req->src); rctx = ahash_request_ctx(req); rctx->mode = 0; From 5af535ab437bf3b8d88dc21e44596ca38dceb22e Mon Sep 17 00:00:00 2001 From: Zhang Zhijie Date: Wed, 13 Feb 2019 16:24:39 +0800 Subject: [PATCH 2284/2608] crypto: rockchip - update new iv to device in multiple operations commit c1c214adcb56d36433480c8fedf772498e7e539c upstream. For chain mode in cipher(eg. AES-CBC/DES-CBC), the iv is continuously updated in the operation. The new iv value should be written to device register by software. Reported-by: Eric Biggers Fixes: 433cd2c617bf ("crypto: rockchip - add crypto driver for rk3288") Cc: # v4.5+ Signed-off-by: Zhang Zhijie Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/rockchip/rk3288_crypto.h | 1 + .../rockchip/rk3288_crypto_ablkcipher.c | 34 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index 417c445d8dea..54ee5b3ed9db 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -245,6 +245,7 @@ struct rk_cipher_ctx { struct rk_crypto_info *dev; unsigned int keylen; u32 mode; + u8 iv[AES_BLOCK_SIZE]; }; enum alg_type { diff --git a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c index ea5a6da05496..23305f22072f 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c @@ -242,6 +242,17 @@ static void crypto_dma_start(struct rk_crypto_info *dev) static int rk_set_data_start(struct rk_crypto_info *dev) { int err; + struct ablkcipher_request *req = + ablkcipher_request_cast(dev->async_req); + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm); + u32 ivsize = crypto_ablkcipher_ivsize(tfm); + u8 *src_last_blk = page_address(sg_page(dev->sg_src)) + + dev->sg_src->offset + dev->sg_src->length - ivsize; + + /* store the iv that need to be updated in chain mode */ + if (ctx->mode & RK_CRYPTO_DEC) + memcpy(ctx->iv, src_last_blk, ivsize); err = dev->load_data(dev, dev->sg_src, dev->sg_dst); if (!err) @@ -286,6 +297,28 @@ static void rk_iv_copyback(struct rk_crypto_info *dev) memcpy_fromio(req->info, dev->reg + RK_CRYPTO_AES_IV_0, ivsize); } +static void rk_update_iv(struct rk_crypto_info *dev) +{ + struct ablkcipher_request *req = + ablkcipher_request_cast(dev->async_req); + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm); + u32 ivsize = crypto_ablkcipher_ivsize(tfm); + u8 *new_iv = NULL; + + if (ctx->mode & RK_CRYPTO_DEC) { + new_iv = ctx->iv; + } else { + new_iv = page_address(sg_page(dev->sg_dst)) + + dev->sg_dst->offset + dev->sg_dst->length - ivsize; + } + + if (ivsize == DES_BLOCK_SIZE) + memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, new_iv, ivsize); + else if (ivsize == AES_BLOCK_SIZE) + memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, new_iv, ivsize); +} + /* return: * true some err was occurred * fault no err, continue @@ -307,6 +340,7 @@ static int rk_ablk_rx(struct rk_crypto_info *dev) } } if (dev->left_bytes) { + rk_update_iv(dev); if (dev->aligned) { if (sg_is_last(dev->sg_src)) { dev_err(dev->dev, "[%s:%d] Lack of data\n", From 6a46cd4abe4f6f7be3b6004031ce36e9c9e30478 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 5 Nov 2018 16:36:07 +0100 Subject: [PATCH 2285/2608] drm/imx: ignore plane updates on disabled crtcs [ Upstream commit 4fb873c9648e383206e0a91cef9b03aa54066aca ] This patch fixes backtraces like the following when sending SIGKILL to a process with a currently pending plane update: [drm:ipu_plane_atomic_check] CRTC should be enabled [drm:drm_framebuffer_remove] *ERROR* failed to commit ------------[ cut here ]------------ WARNING: CPU: 3 PID: 63 at drivers/gpu/drm/drm_framebuffer.c:926 drm_framebuffer_remove+0x47c/0x498 atomic remove_fb failed with -22 Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin --- drivers/gpu/drm/imx/ipuv3-plane.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index cf98596c7ce1..d0d7f6adbc89 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -348,9 +348,9 @@ static int ipu_plane_atomic_check(struct drm_plane *plane, if (ret) return ret; - /* CRTC should be enabled */ + /* nothing to check when disabling or disabled */ if (!crtc_state->enable) - return -EINVAL; + return 0; switch (plane->type) { case DRM_PLANE_TYPE_PRIMARY: From a0e11262c67ef2eb75f93651643fb8345ab282c4 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Thu, 20 Dec 2018 11:06:38 +0300 Subject: [PATCH 2286/2608] gpu: ipu-v3: Fix i.MX51 CSI control registers offset [ Upstream commit 2c0408dd0d8906b26fe8023889af7adf5e68b2c2 ] The CSI0/CSI1 registers offset is at +0xe030000/+0xe038000 relative to the control module registers on IPUv3EX. This patch fixes wrong values for i.MX51 CSI0/CSI1. Fixes: 2ffd48f2e7 ("gpu: ipu-v3: Add Camera Sensor Interface unit") Signed-off-by: Alexander Shiyan Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin --- drivers/gpu/ipu-v3/ipu-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 2c8411b8d050..5f8b31f879ca 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -894,8 +894,8 @@ static struct ipu_devtype ipu_type_imx51 = { .cpmem_ofs = 0x1f000000, .srm_ofs = 0x1f040000, .tpm_ofs = 0x1f060000, - .csi0_ofs = 0x1f030000, - .csi1_ofs = 0x1f038000, + .csi0_ofs = 0x1e030000, + .csi1_ofs = 0x1e038000, .ic_ofs = 0x1e020000, .disp0_ofs = 0x1e040000, .disp1_ofs = 0x1e048000, From 1839b1f67120f8f6427a237dd9616ddf30d96274 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 13 Jan 2019 09:47:42 +0100 Subject: [PATCH 2287/2608] drm/imx: imx-ldb: add missing of_node_puts [ Upstream commit aa3312012f103f91f123600bbf768b11c8f431bc ] The device node iterators perform an of_node_get on each iteration, so a jump out of the loop requires an of_node_put. Move the initialization channel->child = child; down to just before the call to imx_ldb_register so that intervening failures don't need to clear it. Add a label at the end of the function to do all the of_node_puts. The semantic patch that finds part of this problem is as follows (http://coccinelle.lip6.fr): // @@ expression root,e; local idexpression child; iterator name for_each_child_of_node; @@ for_each_child_of_node(root, child) { ... when != of_node_put(child) when != e = child ( return child; | * return ...; ) ... } // Signed-off-by: Julia Lawall Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin --- drivers/gpu/drm/imx/imx-ldb.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index dd5312b02a8d..4f2e6c7e04c1 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -652,8 +652,10 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) int bus_format; ret = of_property_read_u32(child, "reg", &i); - if (ret || i < 0 || i > 1) - return -EINVAL; + if (ret || i < 0 || i > 1) { + ret = -EINVAL; + goto free_child; + } if (!of_device_is_available(child)) continue; @@ -666,7 +668,6 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) channel = &imx_ldb->channel[i]; channel->ldb = imx_ldb; channel->chno = i; - channel->child = child; /* * The output port is port@4 with an external 4-port mux or @@ -676,13 +677,13 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) imx_ldb->lvds_mux ? 4 : 2, 0, &channel->panel, &channel->bridge); if (ret && ret != -ENODEV) - return ret; + goto free_child; /* panel ddc only if there is no bridge */ if (!channel->bridge) { ret = imx_ldb_panel_ddc(dev, channel, child); if (ret) - return ret; + goto free_child; } bus_format = of_get_bus_format(dev, child); @@ -698,18 +699,26 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) if (bus_format < 0) { dev_err(dev, "could not determine data mapping: %d\n", bus_format); - return bus_format; + ret = bus_format; + goto free_child; } channel->bus_format = bus_format; + channel->child = child; ret = imx_ldb_register(drm, channel); - if (ret) - return ret; + if (ret) { + channel->child = NULL; + goto free_child; + } } dev_set_drvdata(dev, imx_ldb); return 0; + +free_child: + of_node_put(child); + return ret; } static void imx_ldb_unbind(struct device *dev, struct device *master, From 3aa47dba7d7fa67ce62f7843cf0f4bbfe26d6b45 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Tue, 16 Oct 2018 17:31:40 -0700 Subject: [PATCH 2288/2608] gpu: ipu-v3: Fix CSI offsets for imx53 [ Upstream commit bb867d219fda7fbaabea3314702474c4eac2b91d ] The CSI offsets are wrong for both CSI0 and CSI1. They are at physical address 0x1e030000 and 0x1e038000 respectively. Fixes: 2ffd48f2e7 ("gpu: ipu-v3: Add Camera Sensor Interface unit") Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin --- drivers/gpu/ipu-v3/ipu-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 5f8b31f879ca..f3a57c0500f3 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -910,8 +910,8 @@ static struct ipu_devtype ipu_type_imx53 = { .cpmem_ofs = 0x07000000, .srm_ofs = 0x07040000, .tpm_ofs = 0x07060000, - .csi0_ofs = 0x07030000, - .csi1_ofs = 0x07038000, + .csi0_ofs = 0x06030000, + .csi1_ofs = 0x06038000, .ic_ofs = 0x06020000, .disp0_ofs = 0x06040000, .disp1_ofs = 0x06048000, From 7668a3600ea03c984d60fb5d1281433ffab1c202 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 21 Nov 2018 12:39:47 +0100 Subject: [PATCH 2289/2608] s390/dasd: fix using offset into zero size array error [ Upstream commit 4a8ef6999bce998fa5813023a9a6b56eea329dba ] Dan Carpenter reported the following: The patch 52898025cf7d: "[S390] dasd: security and PSF update patch for EMC CKD ioctl" from Mar 8, 2010, leads to the following static checker warning: drivers/s390/block/dasd_eckd.c:4486 dasd_symm_io() error: using offset into zero size array 'psf_data[]' drivers/s390/block/dasd_eckd.c 4458 /* Copy parms from caller */ 4459 rc = -EFAULT; 4460 if (copy_from_user(&usrparm, argp, sizeof(usrparm))) ^^^^^^^ The user can specify any "usrparm.psf_data_len". They choose zero by mistake. 4461 goto out; 4462 if (is_compat_task()) { 4463 /* Make sure pointers are sane even on 31 bit. */ 4464 rc = -EINVAL; 4465 if ((usrparm.psf_data >> 32) != 0) 4466 goto out; 4467 if ((usrparm.rssd_result >> 32) != 0) 4468 goto out; 4469 usrparm.psf_data &= 0x7fffffffULL; 4470 usrparm.rssd_result &= 0x7fffffffULL; 4471 } 4472 /* alloc I/O data area */ 4473 psf_data = kzalloc(usrparm.psf_data_len, GFP_KERNEL | GFP_DMA); 4474 rssd_result = kzalloc(usrparm.rssd_result_len, GFP_KERNEL | GFP_DMA); 4475 if (!psf_data || !rssd_result) { kzalloc() returns a ZERO_SIZE_PTR (0x16). 4476 rc = -ENOMEM; 4477 goto out_free; 4478 } 4479 4480 /* get syscall header from user space */ 4481 rc = -EFAULT; 4482 if (copy_from_user(psf_data, 4483 (void __user *)(unsigned long) usrparm.psf_data, 4484 usrparm.psf_data_len)) That all works great. 4485 goto out_free; 4486 psf0 = psf_data[0]; 4487 psf1 = psf_data[1]; But now we're assuming that "->psf_data_len" was at least 2 bytes. Fix this by checking the user specified length psf_data_len. Fixes: 52898025cf7d ("[S390] dasd: security and PSF update patch for EMC CKD ioctl") Reported-by: Dan Carpenter Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- drivers/s390/block/dasd_eckd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 4c7c8455da96..0a1e7f9b5239 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -4463,6 +4463,14 @@ static int dasd_symm_io(struct dasd_device *device, void __user *argp) usrparm.psf_data &= 0x7fffffffULL; usrparm.rssd_result &= 0x7fffffffULL; } + /* at least 2 bytes are accessed and should be allocated */ + if (usrparm.psf_data_len < 2) { + DBF_DEV_EVENT(DBF_WARNING, device, + "Symmetrix ioctl invalid data length %d", + usrparm.psf_data_len); + rc = -EINVAL; + goto out; + } /* alloc I/O data area */ psf_data = kzalloc(usrparm.psf_data_len, GFP_KERNEL | GFP_DMA); rssd_result = kzalloc(usrparm.rssd_result_len, GFP_KERNEL | GFP_DMA); From 9c2e8de17c7684845988c8627523036b48b0bed9 Mon Sep 17 00:00:00 2001 From: Jonathan Bakker Date: Mon, 28 Jan 2019 11:13:01 -0800 Subject: [PATCH 2290/2608] Input: pwm-vibra - prevent unbalanced regulator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3ca232df9921f083c3b37ba5fbc76f4d9046268b ] pwm_vibrator_stop disables the regulator, but it can be called from multiple places, even when the regulator is already disabled. Fix this by using regulator_is_enabled check when starting and stopping device. Signed-off-by: Jonathan Bakker Signed-off-by: Paweł Chmiel Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/misc/pwm-vibra.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/input/misc/pwm-vibra.c b/drivers/input/misc/pwm-vibra.c index 55da191ae550..9df87431d7d4 100644 --- a/drivers/input/misc/pwm-vibra.c +++ b/drivers/input/misc/pwm-vibra.c @@ -34,6 +34,7 @@ struct pwm_vibrator { struct work_struct play_work; u16 level; u32 direction_duty_cycle; + bool vcc_on; }; static int pwm_vibrator_start(struct pwm_vibrator *vibrator) @@ -42,10 +43,13 @@ static int pwm_vibrator_start(struct pwm_vibrator *vibrator) struct pwm_state state; int err; - err = regulator_enable(vibrator->vcc); - if (err) { - dev_err(pdev, "failed to enable regulator: %d", err); - return err; + if (!vibrator->vcc_on) { + err = regulator_enable(vibrator->vcc); + if (err) { + dev_err(pdev, "failed to enable regulator: %d", err); + return err; + } + vibrator->vcc_on = true; } pwm_get_state(vibrator->pwm, &state); @@ -76,7 +80,10 @@ static int pwm_vibrator_start(struct pwm_vibrator *vibrator) static void pwm_vibrator_stop(struct pwm_vibrator *vibrator) { - regulator_disable(vibrator->vcc); + if (vibrator->vcc_on) { + regulator_disable(vibrator->vcc); + vibrator->vcc_on = false; + } if (vibrator->pwm_dir) pwm_disable(vibrator->pwm_dir); From 2e1b2e753eae843992509f1414ec86a5a4fb0c99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Chmiel?= Date: Mon, 28 Jan 2019 11:13:34 -0800 Subject: [PATCH 2291/2608] Input: pwm-vibra - stop regulator after disabling pwm, not before MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 94803aef3533676194c772383472636c453e3147 ] This patch fixes order of disable calls in pwm_vibrator_stop. Currently when starting device, we first enable vcc regulator and then setup and enable pwm. When stopping, we should do this in oposite order, so first disable pwm and then disable regulator. Previously order was the same as in start. Signed-off-by: Paweł Chmiel Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/misc/pwm-vibra.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/input/misc/pwm-vibra.c b/drivers/input/misc/pwm-vibra.c index 9df87431d7d4..dbb6d9e1b947 100644 --- a/drivers/input/misc/pwm-vibra.c +++ b/drivers/input/misc/pwm-vibra.c @@ -80,14 +80,14 @@ static int pwm_vibrator_start(struct pwm_vibrator *vibrator) static void pwm_vibrator_stop(struct pwm_vibrator *vibrator) { + if (vibrator->pwm_dir) + pwm_disable(vibrator->pwm_dir); + pwm_disable(vibrator->pwm); + if (vibrator->vcc_on) { regulator_disable(vibrator->vcc); vibrator->vcc_on = false; } - - if (vibrator->pwm_dir) - pwm_disable(vibrator->pwm_dir); - pwm_disable(vibrator->pwm); } static void pwm_vibrator_play_work(struct work_struct *work) From 456697542ed15a781326907445f472486a34da83 Mon Sep 17 00:00:00 2001 From: Yizhuo Date: Fri, 25 Jan 2019 22:32:20 -0800 Subject: [PATCH 2292/2608] ARM: OMAP2+: Variable "reg" in function omap4_dsi_mux_pads() could be uninitialized [ Upstream commit dc30e70391376ba3987aeb856ae6d9c0706534f1 ] In function omap4_dsi_mux_pads(), local variable "reg" could be uninitialized if function regmap_read() returns -EINVAL. However, it will be used directly in the later context, which is potentially unsafe. Signed-off-by: Yizhuo Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/display.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index b3f6eb5d04a2..6e7440ef503a 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -84,6 +84,7 @@ static int omap4_dsi_mux_pads(int dsi_id, unsigned lanes) u32 enable_mask, enable_shift; u32 pipd_mask, pipd_shift; u32 reg; + int ret; if (dsi_id == 0) { enable_mask = OMAP4_DSI1_LANEENABLE_MASK; @@ -99,7 +100,11 @@ static int omap4_dsi_mux_pads(int dsi_id, unsigned lanes) return -ENODEV; } - regmap_read(omap4_dsi_mux_syscon, OMAP4_DSIPHY_SYSCON_OFFSET, ®); + ret = regmap_read(omap4_dsi_mux_syscon, + OMAP4_DSIPHY_SYSCON_OFFSET, + ®); + if (ret) + return ret; reg &= ~enable_mask; reg &= ~pipd_mask; From f290aa86d93a9d77dd97a8ff7408d6445d116a25 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 5 Feb 2019 10:22:27 -0600 Subject: [PATCH 2293/2608] ASoC: dapm: fix out-of-bounds accesses to DAPM lookup tables [ Upstream commit c16e12010060c6c7a31f08b4a99513064cb53b7d ] KASAN reports and additional traces point to out-of-bounds accesses to the dapm_up_seq and dapm_down_seq lookup tables. The indices used are larger than the array definition. Fix by adding missing entries for the new widget types in these two lookup tables, and align them with PGA values. Also the sequences for the following widgets were not defined. Since their values defaulted to zero, assign them explicitly snd_soc_dapm_input snd_soc_dapm_output snd_soc_dapm_vmid snd_soc_dapm_siggen snd_soc_dapm_sink Fixes: 8a70b4544ef4 ('ASoC: dapm: Add new widget type for constructing DAPM graphs on DSPs.'). Signed-off-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-dapm.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index bba6a917cd02..e9f7c6287376 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -75,12 +75,16 @@ static int dapm_up_seq[] = { [snd_soc_dapm_clock_supply] = 1, [snd_soc_dapm_supply] = 2, [snd_soc_dapm_micbias] = 3, + [snd_soc_dapm_vmid] = 3, [snd_soc_dapm_dai_link] = 2, [snd_soc_dapm_dai_in] = 4, [snd_soc_dapm_dai_out] = 4, [snd_soc_dapm_aif_in] = 4, [snd_soc_dapm_aif_out] = 4, [snd_soc_dapm_mic] = 5, + [snd_soc_dapm_siggen] = 5, + [snd_soc_dapm_input] = 5, + [snd_soc_dapm_output] = 5, [snd_soc_dapm_mux] = 6, [snd_soc_dapm_demux] = 6, [snd_soc_dapm_dac] = 7, @@ -88,11 +92,19 @@ static int dapm_up_seq[] = { [snd_soc_dapm_mixer] = 8, [snd_soc_dapm_mixer_named_ctl] = 8, [snd_soc_dapm_pga] = 9, + [snd_soc_dapm_buffer] = 9, + [snd_soc_dapm_scheduler] = 9, + [snd_soc_dapm_effect] = 9, + [snd_soc_dapm_src] = 9, + [snd_soc_dapm_asrc] = 9, + [snd_soc_dapm_encoder] = 9, + [snd_soc_dapm_decoder] = 9, [snd_soc_dapm_adc] = 10, [snd_soc_dapm_out_drv] = 11, [snd_soc_dapm_hp] = 11, [snd_soc_dapm_spk] = 11, [snd_soc_dapm_line] = 11, + [snd_soc_dapm_sink] = 11, [snd_soc_dapm_kcontrol] = 12, [snd_soc_dapm_post] = 13, }; @@ -105,13 +117,25 @@ static int dapm_down_seq[] = { [snd_soc_dapm_spk] = 3, [snd_soc_dapm_line] = 3, [snd_soc_dapm_out_drv] = 3, + [snd_soc_dapm_sink] = 3, [snd_soc_dapm_pga] = 4, + [snd_soc_dapm_buffer] = 4, + [snd_soc_dapm_scheduler] = 4, + [snd_soc_dapm_effect] = 4, + [snd_soc_dapm_src] = 4, + [snd_soc_dapm_asrc] = 4, + [snd_soc_dapm_encoder] = 4, + [snd_soc_dapm_decoder] = 4, [snd_soc_dapm_switch] = 5, [snd_soc_dapm_mixer_named_ctl] = 5, [snd_soc_dapm_mixer] = 5, [snd_soc_dapm_dac] = 6, [snd_soc_dapm_mic] = 7, + [snd_soc_dapm_siggen] = 7, + [snd_soc_dapm_input] = 7, + [snd_soc_dapm_output] = 7, [snd_soc_dapm_micbias] = 8, + [snd_soc_dapm_vmid] = 8, [snd_soc_dapm_mux] = 9, [snd_soc_dapm_demux] = 9, [snd_soc_dapm_aif_in] = 10, From acd9107dec464f54586e7d73927cb8f210152b30 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 5 Feb 2019 09:46:43 +0900 Subject: [PATCH 2294/2608] ASoC: rsnd: fixup rsnd_ssi_master_clk_start() user count check [ Upstream commit d9111d36024de07784f2e1ba2ccf70b16035f378 ] commit 4d230d1271064 ("ASoC: rsnd: fixup not to call clk_get/set under non-atomic") added new rsnd_ssi_prepare() and moved rsnd_ssi_master_clk_start() to .prepare. But, ssi user count (= ssi->usrcnt) is incremented at .init (= rsnd_ssi_init()). Because of these timing exchange, ssi->usrcnt check at rsnd_ssi_master_clk_start() should be adjusted. Otherwise, 2nd master clock setup will be no check. This patch fixup this issue. Fixes: commit 4d230d1271064 ("ASoC: rsnd: fixup not to call clk_get/set under non-atomic") Reported-by: Yusuke Goda Reported-by: Valentine Barshak Signed-off-by: Kuninori Morimoto Tested-by: Yusuke Goda Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sh/rcar/ssi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 0db2791f7035..60cc550c5a4c 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -280,7 +280,7 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod, if (rsnd_ssi_is_multi_slave(mod, io)) return 0; - if (ssi->usrcnt > 1) { + if (ssi->usrcnt > 0) { if (ssi->rate != rate) { dev_err(dev, "SSI parent/child should use same rate\n"); return -EINVAL; From 2898b2fa31802790260e90251a8f86c42def3409 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 20 Dec 2018 12:44:05 +0100 Subject: [PATCH 2295/2608] KVM: arm/arm64: Reset the VCPU without preemption and vcpu state loaded [ Upstream commit e761a927bc9a7ee6ceb7c4f63d5922dbced87f0d ] We have two ways to reset a vcpu: - either through VCPU_INIT - or through a PSCI_ON call The first one is easy to reason about. The second one is implemented in a more bizarre way, as it is the vcpu that handles PSCI_ON that resets the vcpu that is being powered-on. As we need to turn the logic around and have the target vcpu to reset itself, we must take some preliminary steps. Resetting the VCPU state modifies the system register state in memory, but this may interact with vcpu_load/vcpu_put if running with preemption disabled, which in turn may lead to corrupted system register state. Address this by disabling preemption and doing put/load if required around the reset logic. Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- arch/arm64/kvm/reset.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index a74311beda35..c1c5a57249d2 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -95,16 +95,33 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) * This function finds the right table above and sets the registers on * the virtual CPU struct to their architecturally defined reset * values. + * + * Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT + * ioctl or as part of handling a request issued by another VCPU in the PSCI + * handling code. In the first case, the VCPU will not be loaded, and in the + * second case the VCPU will be loaded. Because this function operates purely + * on the memory-backed valus of system registers, we want to do a full put if + * we were loaded (handling a request) and load the values back at the end of + * the function. Otherwise we leave the state alone. In both cases, we + * disable preemption around the vcpu reset as we would otherwise race with + * preempt notifiers which also call put/load. */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { const struct kvm_regs *cpu_reset; + int ret = -EINVAL; + bool loaded; + + preempt_disable(); + loaded = (vcpu->cpu != -1); + if (loaded) + kvm_arch_vcpu_put(vcpu); switch (vcpu->arch.target) { default: if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { if (!cpu_has_32bit_el1()) - return -EINVAL; + goto out; cpu_reset = &default_regs_reset32; } else { cpu_reset = &default_regs_reset; @@ -127,5 +144,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; /* Reset timer */ - return kvm_timer_vcpu_reset(vcpu); + ret = kvm_timer_vcpu_reset(vcpu); +out: + if (loaded) + kvm_arch_vcpu_load(vcpu, smp_processor_id()); + preempt_enable(); + return ret; } From c7c2716139756fa0f8ac8d66146ed48fcf3e2259 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 12 Dec 2018 11:49:47 +0000 Subject: [PATCH 2296/2608] ARM: OMAP2+: fix lack of timer interrupts on CPU1 after hotplug [ Upstream commit 50d6b3cf9403879911e06d69c7ef41e43f8f7b4b ] If we have a kernel configured for periodic timer interrupts, and we have cpuidle enabled, then we end up with CPU1 losing timer interupts after a hotplug. This can manifest itself in RCU stall warnings, or userspace becoming unresponsive. The problem is that the kernel initially wants to use the TWD timer for interrupts, but the TWD loses context when we enter the C3 cpuidle state. Nothing reprograms the TWD after idle. We have solved this in the past by switching to broadcast timer ticks, and cpuidle44xx switches to that mode at boot time. However, there is nothing to switch from periodic mode local timers after a hotplug operation. We call tick_broadcast_enter() in omap_enter_idle_coupled(), which one would expect would take care of the issue, but internally this only deals with one-shot local timers - tick_broadcast_enable() on the other hand only deals with periodic local timers. So, we need to call both. Signed-off-by: Russell King [tony@atomide.com: just standardized the subject line] Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/cpuidle44xx.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index a8b291f00109..dae514c8276a 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -152,6 +152,10 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, mpuss_can_lose_context = (cx->mpu_state == PWRDM_POWER_RET) && (cx->mpu_logic_state == PWRDM_POWER_OFF); + /* Enter broadcast mode for periodic timers */ + tick_broadcast_enable(); + + /* Enter broadcast mode for one-shot timers */ tick_broadcast_enter(); /* @@ -218,15 +222,6 @@ fail: return index; } -/* - * For each cpu, setup the broadcast timer because local timers - * stops for the states above C1. - */ -static void omap_setup_broadcast_timer(void *arg) -{ - tick_broadcast_enable(); -} - static struct cpuidle_driver omap4_idle_driver = { .name = "omap4_idle", .owner = THIS_MODULE, @@ -319,8 +314,5 @@ int __init omap4_idle_init(void) if (!cpu_clkdm[0] || !cpu_clkdm[1]) return -ENODEV; - /* Configure the broadcast timer on each cpu */ - on_each_cpu(omap_setup_broadcast_timer, NULL, 1); - return cpuidle_register(idle_driver, cpu_online_mask); } From 0d523d6716da87ece0dce95e844d2e9674082b98 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 5 Feb 2019 13:52:26 -0800 Subject: [PATCH 2297/2608] Input: cap11xx - switch to using set_brightness_blocking() [ Upstream commit 628442880af8c201d307a45f3862a7a17df8a189 ] Updating LED state requires access to regmap and therefore we may sleep, so we could not do that directly form set_brightness() method. Historically we used private work to adjust the brightness, but with the introduction of set_brightness_blocking() we no longer need it. As a bonus, not having our own work item means we do not have use-after-free issue as we neglected to cancel outstanding work on driver unbind. Reported-by: Sven Van Asbroeck Reviewed-by: Sven Van Asbroeck Acked-by: Jacek Anaszewski Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/keyboard/cap11xx.c | 39 +++++++++++--------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/drivers/input/keyboard/cap11xx.c b/drivers/input/keyboard/cap11xx.c index 1a1eacae3ea1..87fb48143859 100644 --- a/drivers/input/keyboard/cap11xx.c +++ b/drivers/input/keyboard/cap11xx.c @@ -75,9 +75,7 @@ struct cap11xx_led { struct cap11xx_priv *priv; struct led_classdev cdev; - struct work_struct work; u32 reg; - enum led_brightness new_brightness; }; #endif @@ -233,30 +231,21 @@ static void cap11xx_input_close(struct input_dev *idev) } #ifdef CONFIG_LEDS_CLASS -static void cap11xx_led_work(struct work_struct *work) -{ - struct cap11xx_led *led = container_of(work, struct cap11xx_led, work); - struct cap11xx_priv *priv = led->priv; - int value = led->new_brightness; - - /* - * All LEDs share the same duty cycle as this is a HW limitation. - * Brightness levels per LED are either 0 (OFF) and 1 (ON). - */ - regmap_update_bits(priv->regmap, CAP11XX_REG_LED_OUTPUT_CONTROL, - BIT(led->reg), value ? BIT(led->reg) : 0); -} - -static void cap11xx_led_set(struct led_classdev *cdev, - enum led_brightness value) +static int cap11xx_led_set(struct led_classdev *cdev, + enum led_brightness value) { struct cap11xx_led *led = container_of(cdev, struct cap11xx_led, cdev); + struct cap11xx_priv *priv = led->priv; - if (led->new_brightness == value) - return; - - led->new_brightness = value; - schedule_work(&led->work); + /* + * All LEDs share the same duty cycle as this is a HW + * limitation. Brightness levels per LED are either + * 0 (OFF) and 1 (ON). + */ + return regmap_update_bits(priv->regmap, + CAP11XX_REG_LED_OUTPUT_CONTROL, + BIT(led->reg), + value ? BIT(led->reg) : 0); } static int cap11xx_init_leds(struct device *dev, @@ -299,7 +288,7 @@ static int cap11xx_init_leds(struct device *dev, led->cdev.default_trigger = of_get_property(child, "linux,default-trigger", NULL); led->cdev.flags = 0; - led->cdev.brightness_set = cap11xx_led_set; + led->cdev.brightness_set_blocking = cap11xx_led_set; led->cdev.max_brightness = 1; led->cdev.brightness = LED_OFF; @@ -312,8 +301,6 @@ static int cap11xx_init_leds(struct device *dev, led->reg = reg; led->priv = priv; - INIT_WORK(&led->work, cap11xx_led_work); - error = devm_led_classdev_register(dev, &led->cdev); if (error) { of_node_put(child); From fd922f641e5c452931f86c887fd32ba883b3233c Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 7 Feb 2019 14:22:42 -0800 Subject: [PATCH 2298/2608] Input: ps2-gpio - flush TX work when closing port [ Upstream commit 33a841ce5cef4ca6c18ad333248b6d273f54c839 ] To ensure that TX work is not running after serio port has been torn down, let's flush it when closing the port. Reported-by: Sven Van Asbroeck Acked-by: Danilo Krummrich Reviewed-by: Sven Van Asbroeck Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/serio/ps2-gpio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/serio/ps2-gpio.c b/drivers/input/serio/ps2-gpio.c index b50e3817f3c4..4a64ab30589c 100644 --- a/drivers/input/serio/ps2-gpio.c +++ b/drivers/input/serio/ps2-gpio.c @@ -76,6 +76,7 @@ static void ps2_gpio_close(struct serio *serio) { struct ps2_gpio_data *drvdata = serio->port_data; + flush_delayed_work(&drvdata->tx_work); disable_irq(drvdata->irq); } From b735f5718e618f6db98564e482e2224125562ab5 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 7 Feb 2019 14:39:40 -0800 Subject: [PATCH 2299/2608] Input: matrix_keypad - use flush_delayed_work() [ Upstream commit a342083abe576db43594a32d458a61fa81f7cb32 ] We should be using flush_delayed_work() instead of flush_work() in matrix_keypad_stop() to ensure that we are not missing work that is scheduled but not yet put in the workqueue (i.e. its delay timer has not expired yet). Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/keyboard/matrix_keypad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c index 782dda68d93a..c04559a232f7 100644 --- a/drivers/input/keyboard/matrix_keypad.c +++ b/drivers/input/keyboard/matrix_keypad.c @@ -222,7 +222,7 @@ static void matrix_keypad_stop(struct input_dev *dev) keypad->stopped = true; spin_unlock_irq(&keypad->lock); - flush_work(&keypad->work.work); + flush_delayed_work(&keypad->work); /* * matrix_keypad_scan() will leave IRQs enabled; * we should disable them now. From 5706520b0cd66cd618f0c6e7da8e6aec610d667c Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 6 Feb 2019 13:17:21 +0200 Subject: [PATCH 2300/2608] mac80211: Fix Tx aggregation session tear down with ITXQs [ Upstream commit 6157ca0d6bfe437691b1e98a62e2efe12b6714da ] When mac80211 requests the low level driver to stop an ongoing Tx aggregation, the low level driver is expected to call ieee80211_stop_tx_ba_cb_irqsafe() to indicate that it is ready to stop the session. The callback in turn schedules a worker to complete the session tear down, which in turn also handles the relevant state for the intermediate Tx queue. However, as this flow in asynchronous, the intermediate queue should be stopped and not continue servicing frames, as in such a case frames that are dequeued would be marked as part of an aggregation, although the aggregation is already been stopped. Fix this by stopping the intermediate Tx queue, before calling the low level driver to stop the Tx aggregation. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/agg-tx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 197947a07f83..ed57db9b6086 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -8,7 +8,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -361,6 +361,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state); + ieee80211_agg_stop_txq(sta, tid); + spin_unlock_bh(&sta->lock); ht_dbg(sta->sdata, "Tx BA session stop requested for %pM tid %u\n", From de7f08cfd53daeb893b928e9a5f9daf71ff8b95d Mon Sep 17 00:00:00 2001 From: Andrea Claudi Date: Mon, 11 Feb 2019 16:14:39 +0100 Subject: [PATCH 2301/2608] ipvs: fix dependency on nf_defrag_ipv6 [ Upstream commit 098e13f5b21d3398065fce8780f07a3ef62f4812 ] ipvs relies on nf_defrag_ipv6 module to manage IPv6 fragmentation, but lacks proper Kconfig dependencies and does not explicitly request defrag features. As a result, if netfilter hooks are not loaded, when IPv6 fragmented packet are handled by ipvs only the first fragment makes through. Fix it properly declaring the dependency on Kconfig and registering netfilter hooks on ip_vs_add_service() and ip_vs_new_dest(). Reported-by: Li Shuang Signed-off-by: Andrea Claudi Acked-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipvs/Kconfig | 1 + net/netfilter/ipvs/ip_vs_core.c | 10 ++++------ net/netfilter/ipvs/ip_vs_ctl.c | 10 ++++++++++ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index b32fb0dbe237..3f8e490d1133 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -29,6 +29,7 @@ config IP_VS_IPV6 bool "IPv6 support for IPVS" depends on IPV6 = y || IP_VS = IPV6 select IP6_NF_IPTABLES + select NF_DEFRAG_IPV6 ---help--- Add IPv6 support to IPVS. diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 1bd53b1e7672..4278f5c947ab 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1524,14 +1524,12 @@ ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, af, pp, skb, iph->off, "ip_vs_in: packet continues traversal as normal"); - if (iph->fragoffs) { - /* Fragment that couldn't be mapped to a conn entry - * is missing module nf_defrag_ipv6 - */ - IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); + + /* Fragment couldn't be mapped to a conn entry */ + if (iph->fragoffs) IP_VS_DBG_PKT(7, af, pp, skb, iph->off, "unhandled fragment"); - } + *verdict = NF_ACCEPT; return 0; } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index dff4ead3d117..56dd5ce6274f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -43,6 +43,7 @@ #ifdef CONFIG_IP_VS_IPV6 #include #include +#include #endif #include #include @@ -888,6 +889,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, { struct ip_vs_dest *dest; unsigned int atype, i; + int ret = 0; EnterFunction(2); @@ -898,6 +900,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, atype & IPV6_ADDR_LINKLOCAL) && !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6)) return -EINVAL; + + ret = nf_defrag_ipv6_enable(svc->ipvs->net); + if (ret) + return ret; } else #endif { @@ -1221,6 +1227,10 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, ret = -EINVAL; goto out_err; } + + ret = nf_defrag_ipv6_enable(ipvs->net); + if (ret) + goto out_err; } #endif From 86e74ca9fd0dae23fd55bb4ce577adfd98ff75e8 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Tue, 29 Jan 2019 16:34:04 +0800 Subject: [PATCH 2302/2608] floppy: check_events callback should not return a negative number [ Upstream commit 96d7cb932e826219ec41ac02e5af037ffae6098c ] floppy_check_events() is supposed to return bit flags to say which events occured. We should return zero to say that no event flags are set. Only BIT(0) and BIT(1) are used in the caller. And .check_events interface also expect to return an unsigned int value. However, after commit a0c80efe5956, it may return -EINTR (-4u). Here, both BIT(0) and BIT(1) are cleared. So this patch shouldn't affect runtime, but it obviously is still worth fixing. Reviewed-by: Dan Carpenter Fixes: a0c80efe5956 ("floppy: fix lock_fdc() signal handling") Signed-off-by: Yufen Yu Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/floppy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index a7f212ea17bf..3ea9c3e9acb3 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4079,7 +4079,7 @@ static unsigned int floppy_check_events(struct gendisk *disk, if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) { if (lock_fdc(drive)) - return -EINTR; + return 0; poll_drive(false, 0); process_fd_request(); } From fc4b12f3cad776dff7d56d9f08a96589859feac5 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 6 Feb 2019 06:09:43 -0500 Subject: [PATCH 2303/2608] NFS: Don't use page_file_mapping after removing the page [ Upstream commit d2ceb7e57086750ea6198a31fd942d98099a0786 ] If nfs_page_async_flush() removes the page from the mapping, then we can't use page_file_mapping() on it as nfs_updatepate() is wont to do when receiving an error. Instead, push the mapping to the stack before the page is possibly truncated. Fixes: 8fc75bed96bb ("NFS: Fix up return value on fatal errors in nfs_page_async_flush()") Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/write.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2d956a7d5378..50ed3944d183 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -236,9 +236,9 @@ out: } /* A writeback failed: mark the page as bad, and invalidate the page cache */ -static void nfs_set_pageerror(struct page *page) +static void nfs_set_pageerror(struct address_space *mapping) { - nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); + nfs_zap_mapping(mapping->host, mapping); } /* @@ -994,7 +994,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) nfs_list_remove_request(req); if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes < bytes)) { - nfs_set_pageerror(req->wb_page); + nfs_set_pageerror(page_file_mapping(req->wb_page)); nfs_context_set_write_error(req->wb_context, hdr->error); goto remove_req; } @@ -1330,7 +1330,8 @@ int nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count) { struct nfs_open_context *ctx = nfs_file_open_context(file); - struct inode *inode = page_file_mapping(page)->host; + struct address_space *mapping = page_file_mapping(page); + struct inode *inode = mapping->host; int status = 0; nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); @@ -1348,7 +1349,7 @@ int nfs_updatepage(struct file *file, struct page *page, status = nfs_writepage_setup(ctx, page, offset, count); if (status < 0) - nfs_set_pageerror(page); + nfs_set_pageerror(mapping); else __set_page_dirty_nobuffers(page); out: From 538162d21ac877b060dc057c89f13718f5caffc5 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Tue, 12 Feb 2019 15:35:58 -0800 Subject: [PATCH 2304/2608] mm/gup: fix gup_pmd_range() for dax [ Upstream commit 414fd080d125408cb15d04ff4907e1dd8145c8c7 ] For dax pmd, pmd_trans_huge() returns false but pmd_huge() returns true on x86. So the function works as long as hugetlb is configured. However, dax doesn't depend on hugetlb. Link: http://lkml.kernel.org/r/20190111034033.601-1-yuzhao@google.com Signed-off-by: Yu Zhao Reviewed-by: Jan Kara Cc: Dan Williams Cc: Huang Ying Cc: Matthew Wilcox Cc: Keith Busch Cc: "Michael S . Tsirkin" Cc: John Hubbard Cc: Wei Yang Cc: Mike Rapoport Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/gup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/gup.c b/mm/gup.c index 4cc8a6ff0f56..7c0e5b1bbcd4 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1643,7 +1643,8 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, if (!pmd_present(pmd)) return 0; - if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) { + if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) || + pmd_devmap(pmd))) { /* * NUMA hinting faults need to be handled in the GUP * slowpath for accounting purposes and so that they From 5a8da52a2982c3d5edfe17b9a56971b7dc3eaa38 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Tue, 12 Feb 2019 15:36:03 -0800 Subject: [PATCH 2305/2608] Revert "mm: use early_pfn_to_nid in page_ext_init" [ Upstream commit 2f1ee0913ce58efe7f18fbd518bd54c598559b89 ] This reverts commit fe53ca54270a ("mm: use early_pfn_to_nid in page_ext_init"). When booting a system with "page_owner=on", start_kernel page_ext_init invoke_init_callbacks init_section_page_ext init_page_owner init_early_allocated_pages init_zones_in_node init_pages_in_zone lookup_page_ext page_to_nid The issue here is that page_to_nid() will not work since some page flags have no node information until later in page_alloc_init_late() due to DEFERRED_STRUCT_PAGE_INIT. Hence, it could trigger an out-of-bounds access with an invalid nid. UBSAN: Undefined behaviour in ./include/linux/mm.h:1104:50 index 7 is out of range for type 'zone [5]' Also, kernel will panic since flags were poisoned earlier with, CONFIG_DEBUG_VM_PGFLAGS=y CONFIG_NODE_NOT_IN_PAGE_FLAGS=n start_kernel setup_arch pagetable_init paging_init sparse_init sparse_init_nid memblock_alloc_try_nid_raw It did not handle it well in init_pages_in_zone() which ends up calling page_to_nid(). page:ffffea0004200000 is uninitialized and poisoned raw: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff raw: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) page_owner info is not active (free page?) kernel BUG at include/linux/mm.h:990! RIP: 0010:init_page_owner+0x486/0x520 This means that assumptions behind commit fe53ca54270a ("mm: use early_pfn_to_nid in page_ext_init") are incomplete. Therefore, revert the commit for now. A proper way to move the page_owner initialization to sooner is to hook into memmap initialization. Link: http://lkml.kernel.org/r/20190115202812.75820-1-cai@lca.pw Signed-off-by: Qian Cai Acked-by: Michal Hocko Cc: Pasha Tatashin Cc: Mel Gorman Cc: Yang Shi Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- init/main.c | 3 ++- mm/page_ext.c | 4 +--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/init/main.c b/init/main.c index c4a45145e102..3d3d79c5a232 100644 --- a/init/main.c +++ b/init/main.c @@ -663,7 +663,6 @@ asmlinkage __visible void __init start_kernel(void) initrd_start = 0; } #endif - page_ext_init(); kmemleak_init(); debug_objects_mem_init(); setup_per_cpu_pageset(); @@ -1069,6 +1068,8 @@ static noinline void __init kernel_init_freeable(void) sched_init_smp(); page_alloc_init_late(); + /* Initialize page ext after all struct pages are initialized. */ + page_ext_init(); do_basic_setup(); diff --git a/mm/page_ext.c b/mm/page_ext.c index 2c16216c29b6..2c44f5b78435 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -396,10 +396,8 @@ void __init page_ext_init(void) * We know some arch can have a nodes layout such as * -------------pfn--------------> * N0 | N1 | N2 | N0 | N1 | N2|.... - * - * Take into account DEFERRED_STRUCT_PAGE_INIT. */ - if (early_pfn_to_nid(pfn) != nid) + if (pfn_to_nid(pfn) != nid) continue; if (init_section_page_ext(pfn, nid)) goto oom; From a977209627ca58a2b640e523a67c01dd0ead7fad Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 13 Feb 2019 22:45:59 +0100 Subject: [PATCH 2306/2608] mm: page_alloc: fix ref bias in page_frag_alloc() for 1-byte allocs [ Upstream commit 2c2ade81741c66082f8211f0b96cf509cc4c0218 ] The basic idea behind ->pagecnt_bias is: If we pre-allocate the maximum number of references that we might need to create in the fastpath later, the bump-allocation fastpath only has to modify the non-atomic bias value that tracks the number of extra references we hold instead of the atomic refcount. The maximum number of allocations we can serve (under the assumption that no allocation is made with size 0) is nc->size, so that's the bias used. However, even when all memory in the allocation has been given away, a reference to the page is still held; and in the `offset < 0` slowpath, the page may be reused if everyone else has dropped their references. This means that the necessary number of references is actually `nc->size+1`. Luckily, from a quick grep, it looks like the only path that can call page_frag_alloc(fragsz=1) is TAP with the IFF_NAPI_FRAGS flag, which requires CAP_NET_ADMIN in the init namespace and is only intended to be used for kernel testing and fuzzing. To test for this issue, put a `WARN_ON(page_ref_count(page) == 0)` in the `offset < 0` path, below the virt_to_page() call, and then repeatedly call writev() on a TAP device with IFF_TAP|IFF_NO_PI|IFF_NAPI_FRAGS|IFF_NAPI, with a vector consisting of 15 elements containing 1 byte each. Signed-off-by: Jann Horn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- mm/page_alloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a2f365f40433..40075c1946b3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4325,11 +4325,11 @@ refill: /* Even if we own the page, we do not use atomic_set(). * This would break get_page_unless_zero() users. */ - page_ref_add(page, size - 1); + page_ref_add(page, size); /* reset page count bias and offset to start of new frag */ nc->pfmemalloc = page_is_pfmemalloc(page); - nc->pagecnt_bias = size; + nc->pagecnt_bias = size + 1; nc->offset = size; } @@ -4345,10 +4345,10 @@ refill: size = nc->size; #endif /* OK, page count is 0, we can safely set it */ - set_page_count(page, size); + set_page_count(page, size + 1); /* reset page count bias and offset to start of new frag */ - nc->pagecnt_bias = size; + nc->pagecnt_bias = size + 1; offset = size - fragsz; } From e5f4a0bb7915d4d7f73320c57d37dde6fc16c64d Mon Sep 17 00:00:00 2001 From: Huang Zijiang Date: Thu, 14 Feb 2019 14:41:45 +0800 Subject: [PATCH 2307/2608] net: hns: Fix object reference leaks in hns_dsaf_roce_reset() [ Upstream commit c969c6e7ab8cb42b5c787c567615474fdbad9d6a ] The of_find_device_by_node() takes a reference to the underlying device structure, we should release that reference. Signed-off-by: Huang Zijiang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 51d42d7f6074..7e82dfbb4340 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -3074,6 +3074,7 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset) dsaf_dev = dev_get_drvdata(&pdev->dev); if (!dsaf_dev) { dev_err(&pdev->dev, "dsaf_dev is NULL\n"); + put_device(&pdev->dev); return -ENODEV; } @@ -3081,6 +3082,7 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset) if (AE_IS_VER1(dsaf_dev->dsaf_ver)) { dev_err(dsaf_dev->dev, "%s v1 chip doesn't support RoCE!\n", dsaf_dev->ae_dev.name); + put_device(&pdev->dev); return -ENODEV; } From 4bd491fd4c496589d8d9adbba3eb6d6f294baefe Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Tue, 5 Feb 2019 16:42:53 +0530 Subject: [PATCH 2308/2608] i2c: cadence: Fix the hold bit setting [ Upstream commit d358def706880defa4c9e87381c5bf086a97d5f9 ] In case the hold bit is not needed we are carrying the old values. Fix the same by resetting the bit when not needed. Fixes the sporadic i2c bus lockups on National Instruments Zynq-based devices. Fixes: df8eb5691c48 ("i2c: Add driver for Cadence I2C controller") Reported-by: Kyle Roeschley Acked-by: Michal Simek Signed-off-by: Shubhrajyoti Datta Tested-by: Kyle Roeschley Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-cadence.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index b13605718291..d917cefc5a19 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -382,8 +382,10 @@ static void cdns_i2c_mrecv(struct cdns_i2c *id) * Check for the message size against FIFO depth and set the * 'hold bus' bit if it is greater than FIFO depth. */ - if (id->recv_count > CDNS_I2C_FIFO_DEPTH) + if ((id->recv_count > CDNS_I2C_FIFO_DEPTH) || id->bus_hold_flag) ctrl_reg |= CDNS_I2C_CR_HOLD; + else + ctrl_reg = ctrl_reg & ~CDNS_I2C_CR_HOLD; cdns_i2c_writereg(ctrl_reg, CDNS_I2C_CR_OFFSET); @@ -440,8 +442,11 @@ static void cdns_i2c_msend(struct cdns_i2c *id) * Check for the message size against FIFO depth and set the * 'hold bus' bit if it is greater than FIFO depth. */ - if (id->send_count > CDNS_I2C_FIFO_DEPTH) + if ((id->send_count > CDNS_I2C_FIFO_DEPTH) || id->bus_hold_flag) ctrl_reg |= CDNS_I2C_CR_HOLD; + else + ctrl_reg = ctrl_reg & ~CDNS_I2C_CR_HOLD; + cdns_i2c_writereg(ctrl_reg, CDNS_I2C_CR_OFFSET); /* Clear the interrupts in interrupt status register. */ From 0946b9263eaacdec2d77f53f99e31aed6f9e9644 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Thu, 27 Dec 2018 16:42:25 +0100 Subject: [PATCH 2309/2608] i2c: bcm2835: Clear current buffer pointers and counts after a transfer [ Upstream commit f275a4659484716259cc46268d9043424e51cf0f ] The driver's interrupt handler checks whether a message is currently being handled with the curr_msg pointer. When it is NULL, the interrupt is considered to be unexpected. Similarly, the i2c_start_transfer routine checks for the remaining number of messages to handle in num_msgs. However, these values are never cleared and always keep the message and number relevant to the latest transfer (which might be done already and the underlying message memory might have been freed). When an unexpected interrupt hits with the DONE bit set, the isr will then try to access the flags field of the curr_msg structure, leading to a fatal page fault. The msg_buf and msg_buf_remaining fields are also never cleared at the end of the transfer, which can lead to similar pitfalls. Fix these issues by introducing a cleanup function and always calling it after a transfer is finished. Fixes: e2474541032d ("i2c: bcm2835: Fix hang for writing messages larger than 16 bytes") Signed-off-by: Paul Kocialkowski Acked-by: Stefan Wahren Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-bcm2835.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index 44deae78913e..4d19254f78c8 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -191,6 +191,15 @@ static void bcm2835_i2c_start_transfer(struct bcm2835_i2c_dev *i2c_dev) bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_C, c); } +static void bcm2835_i2c_finish_transfer(struct bcm2835_i2c_dev *i2c_dev) +{ + i2c_dev->curr_msg = NULL; + i2c_dev->num_msgs = 0; + + i2c_dev->msg_buf = NULL; + i2c_dev->msg_buf_remaining = 0; +} + /* * Note about I2C_C_CLEAR on error: * The I2C_C_CLEAR on errors will take some time to resolve -- if you were in @@ -291,6 +300,9 @@ static int bcm2835_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], time_left = wait_for_completion_timeout(&i2c_dev->completion, adap->timeout); + + bcm2835_i2c_finish_transfer(i2c_dev); + if (!time_left) { bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_C, BCM2835_I2C_C_CLEAR); From 69da13295e181768bec2cf3e2e86d0b1f1243506 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 9 Feb 2019 00:38:45 +0100 Subject: [PATCH 2310/2608] auxdisplay: ht16k33: fix potential user-after-free on module unload [ Upstream commit 69ef9bc54715fb1cb7786ada15774e469e822209 ] On module unload/remove, we need to ensure that work does not run after we have freed resources. Concretely, cancel_delayed_work() may return while the callback function is still running. From kernel/workqueue.c: The work callback function may still be running on return, unless it returns true and the work doesn't re-arm itself. Explicitly flush or use cancel_delayed_work_sync() to wait on it. Link: https://lore.kernel.org/lkml/20190204220952.30761-1-TheSven73@googlemail.com/ Reported-by: Sven Van Asbroeck Reviewed-by: Dmitry Torokhov Reviewed-by: Sven Van Asbroeck Acked-by: Robin van der Gracht Signed-off-by: Miguel Ojeda Signed-off-by: Sasha Levin --- drivers/auxdisplay/ht16k33.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c index fbfa5b4cc567..a93ded300740 100644 --- a/drivers/auxdisplay/ht16k33.c +++ b/drivers/auxdisplay/ht16k33.c @@ -517,7 +517,7 @@ static int ht16k33_remove(struct i2c_client *client) struct ht16k33_priv *priv = i2c_get_clientdata(client); struct ht16k33_fbdev *fbdev = &priv->fbdev; - cancel_delayed_work(&fbdev->work); + cancel_delayed_work_sync(&fbdev->work); unregister_framebuffer(fbdev->info); framebuffer_release(fbdev->info); free_page((unsigned long) fbdev->buffer); From 53670a06657c24c80ef23fc0c0d39b5a03206c13 Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Sat, 16 Feb 2019 21:10:16 -0800 Subject: [PATCH 2311/2608] Input: st-keyscan - fix potential zalloc NULL dereference [ Upstream commit 2439d37e1bf8a34d437573c086572abe0f3f1b15 ] This patch fixes the following static checker warning: drivers/input/keyboard/st-keyscan.c:156 keyscan_probe() error: potential zalloc NULL dereference: 'keypad_data->input_dev' Reported-by: Dan Carpenter Signed-off-by: Gabriel Fernandez Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/keyboard/st-keyscan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/keyboard/st-keyscan.c b/drivers/input/keyboard/st-keyscan.c index babcfb165e4f..3b85631fde91 100644 --- a/drivers/input/keyboard/st-keyscan.c +++ b/drivers/input/keyboard/st-keyscan.c @@ -153,6 +153,8 @@ static int keyscan_probe(struct platform_device *pdev) input_dev->id.bustype = BUS_HOST; + keypad_data->input_dev = input_dev; + error = keypad_matrix_key_parse_dt(keypad_data); if (error) return error; @@ -168,8 +170,6 @@ static int keyscan_probe(struct platform_device *pdev) input_set_drvdata(input_dev, keypad_data); - keypad_data->input_dev = input_dev; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); keypad_data->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(keypad_data->base)) From a9652f7280f482e0b02196e5e2edd5a62ce397f1 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Tue, 22 Jan 2019 09:16:19 +0100 Subject: [PATCH 2312/2608] clk: sunxi-ng: v3s: Fix TCON reset de-assert bit [ Upstream commit 5c59801f7018acba11b12de59017a3fcdcf7421d ] According to the datasheet and the reference code from Allwinner, the bit used to de-assert the TCON reset is bit 4, not bit 3. Fix it in the V3s CCU driver. Signed-off-by: Paul Kocialkowski Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- drivers/clk/sunxi-ng/ccu-sun8i-v3s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c index 621b1cd996db..ac12f261f8ca 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c @@ -542,7 +542,7 @@ static struct ccu_reset_map sun8i_v3s_ccu_resets[] = { [RST_BUS_OHCI0] = { 0x2c0, BIT(29) }, [RST_BUS_VE] = { 0x2c4, BIT(0) }, - [RST_BUS_TCON0] = { 0x2c4, BIT(3) }, + [RST_BUS_TCON0] = { 0x2c4, BIT(4) }, [RST_BUS_CSI] = { 0x2c4, BIT(8) }, [RST_BUS_DE] = { 0x2c4, BIT(12) }, [RST_BUS_DBG] = { 0x2c4, BIT(31) }, From b2a1c6c349816c169ace687aedc5302de2ea5217 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 23 Jan 2019 00:59:11 +0000 Subject: [PATCH 2313/2608] clk: sunxi: A31: Fix wrong AHB gate number [ Upstream commit ee0b27a3a4da0b0ed2318aa092f8856896e9450b ] According to the manual the gate clock for MMC3 is at bit 11, and NAND1 is controlled by bit 12. Fix the gate bit definitions in the clock driver. Fixes: c6e6c96d8fa6 ("clk: sunxi-ng: Add A31/A31s clocks") Signed-off-by: Andre Przywara Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- drivers/clk/sunxi-ng/ccu-sun6i-a31.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c index 40d5f74cb2ac..d93b4815e65c 100644 --- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c +++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c @@ -252,9 +252,9 @@ static SUNXI_CCU_GATE(ahb1_mmc1_clk, "ahb1-mmc1", "ahb1", static SUNXI_CCU_GATE(ahb1_mmc2_clk, "ahb1-mmc2", "ahb1", 0x060, BIT(10), 0); static SUNXI_CCU_GATE(ahb1_mmc3_clk, "ahb1-mmc3", "ahb1", - 0x060, BIT(12), 0); + 0x060, BIT(11), 0); static SUNXI_CCU_GATE(ahb1_nand1_clk, "ahb1-nand1", "ahb1", - 0x060, BIT(13), 0); + 0x060, BIT(12), 0); static SUNXI_CCU_GATE(ahb1_nand0_clk, "ahb1-nand0", "ahb1", 0x060, BIT(13), 0); static SUNXI_CCU_GATE(ahb1_sdram_clk, "ahb1-sdram", "ahb1", From a12795d795aa1e7b042cc9a87b24369fe6833a75 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Mon, 28 Jan 2019 09:35:35 +0100 Subject: [PATCH 2314/2608] esp: Skip TX bytes accounting when sending from a request socket [ Upstream commit 09db51241118aeb06e1c8cd393b45879ce099b36 ] On ESP output, sk_wmem_alloc is incremented for the added padding if a socket is associated to the skb. When replying with TCP SYNACKs over IPsec, the associated sk is a casted request socket, only. Increasing sk_wmem_alloc on a request socket results in a write at an arbitrary struct offset. In the best case, this produces the following WARNING: WARNING: CPU: 1 PID: 0 at lib/refcount.c:102 esp_output_head+0x2e4/0x308 [esp4] refcount_t: addition on 0; use-after-free. CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.0.0-rc3 #2 Hardware name: Marvell Armada 380/385 (Device Tree) [...] [] (esp_output_head [esp4]) from [] (esp_output+0xb8/0x180 [esp4]) [] (esp_output [esp4]) from [] (xfrm_output_resume+0x558/0x664) [] (xfrm_output_resume) from [] (xfrm4_output+0x44/0xc4) [] (xfrm4_output) from [] (tcp_v4_send_synack+0xa8/0xe8) [] (tcp_v4_send_synack) from [] (tcp_conn_request+0x7f4/0x948) [] (tcp_conn_request) from [] (tcp_rcv_state_process+0x2a0/0xe64) [] (tcp_rcv_state_process) from [] (tcp_v4_do_rcv+0xf0/0x1f4) [] (tcp_v4_do_rcv) from [] (tcp_v4_rcv+0xdb8/0xe20) [] (tcp_v4_rcv) from [] (ip_protocol_deliver_rcu+0x2c/0x2dc) [] (ip_protocol_deliver_rcu) from [] (ip_local_deliver_finish+0x48/0x54) [] (ip_local_deliver_finish) from [] (ip_local_deliver+0x54/0xec) [] (ip_local_deliver) from [] (ip_rcv+0x48/0xb8) [] (ip_rcv) from [] (__netif_receive_skb_one_core+0x50/0x6c) [...] The issue triggers only when not using TCP syncookies, as for syncookies no socket is associated. Fixes: cac2661c53f3 ("esp4: Avoid skb_cow_data whenever possible") Fixes: 03e2a30f6a27 ("esp6: Avoid skb_cow_data whenever possible") Signed-off-by: Martin Willi Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/ipv4/esp4.c | 2 +- net/ipv6/esp6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b00e4a43b4dc..d30285c5d52d 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -307,7 +307,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * skb->len += tailen; skb->data_len += tailen; skb->truesize += tailen; - if (sk) + if (sk && sk_fullsock(sk)) refcount_add(tailen, &sk->sk_wmem_alloc); goto out; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index f112fef79216..ef7822fad0fd 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -275,7 +275,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info skb->len += tailen; skb->data_len += tailen; skb->truesize += tailen; - if (sk) + if (sk && sk_fullsock(sk)) refcount_add(tailen, &sk->sk_wmem_alloc); goto out; From f6b0db3b14884be33dd2d9c107d5144f1c31775a Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Mon, 21 Jan 2019 14:42:42 +0100 Subject: [PATCH 2315/2608] ARM: 8824/1: fix a migrating irq bug when hotplug cpu [ Upstream commit 1b5ba350784242eb1f899bcffd95d2c7cff61e84 ] Arm TC2 fails cpu hotplug stress test. This issue was tracked down to a missing copy of the new affinity cpumask for the vexpress-spc interrupt into struct irq_common_data.affinity when the interrupt is migrated in migrate_one_irq(). Fix it by replacing the arm specific hotplug cpu migration with the generic irq code. This is the counterpart implementation to commit 217d453d473c ("arm64: fix a migrating irq bug when hotplug cpu"). Tested with cpu hotplug stress test on Arm TC2 (multi_v7_defconfig plus CONFIG_ARM_BIG_LITTLE_CPUFREQ=y and CONFIG_ARM_VEXPRESS_SPC_CPUFREQ=y). The vexpress-spc interrupt (irq=22) on this board is affine to CPU0. Its affinity cpumask now changes correctly e.g. from 0 to 1-4 when CPU0 is hotplugged out. Suggested-by: Marc Zyngier Signed-off-by: Dietmar Eggemann Acked-by: Marc Zyngier Reviewed-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/Kconfig | 1 + arch/arm/include/asm/irq.h | 1 - arch/arm/kernel/irq.c | 62 -------------------------------------- arch/arm/kernel/smp.c | 2 +- 4 files changed, 2 insertions(+), 64 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index d1346a160760..cf69aab648fb 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1447,6 +1447,7 @@ config NR_CPUS config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" depends on SMP + select GENERIC_IRQ_MIGRATION help Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h index b6f319606e30..2de321e89b94 100644 --- a/arch/arm/include/asm/irq.h +++ b/arch/arm/include/asm/irq.h @@ -25,7 +25,6 @@ #ifndef __ASSEMBLY__ struct irqaction; struct pt_regs; -extern void migrate_irqs(void); extern void asm_do_IRQ(unsigned int, struct pt_regs *); void handle_IRQ(unsigned int, struct pt_regs *); diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index ece04a457486..5b07c7a31c31 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -119,64 +118,3 @@ int __init arch_probe_nr_irqs(void) return nr_irqs; } #endif - -#ifdef CONFIG_HOTPLUG_CPU -static bool migrate_one_irq(struct irq_desc *desc) -{ - struct irq_data *d = irq_desc_get_irq_data(desc); - const struct cpumask *affinity = irq_data_get_affinity_mask(d); - struct irq_chip *c; - bool ret = false; - - /* - * If this is a per-CPU interrupt, or the affinity does not - * include this CPU, then we have nothing to do. - */ - if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) - return false; - - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - affinity = cpu_online_mask; - ret = true; - } - - c = irq_data_get_irq_chip(d); - if (!c->irq_set_affinity) - pr_debug("IRQ%u: unable to set affinity\n", d->irq); - else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret) - cpumask_copy(irq_data_get_affinity_mask(d), affinity); - - return ret; -} - -/* - * The current CPU has been marked offline. Migrate IRQs off this CPU. - * If the affinity settings do not allow other CPUs, force them onto any - * available CPU. - * - * Note: we must iterate over all IRQs, whether they have an attached - * action structure or not, as we need to get chained interrupts too. - */ -void migrate_irqs(void) -{ - unsigned int i; - struct irq_desc *desc; - unsigned long flags; - - local_irq_save(flags); - - for_each_irq_desc(i, desc) { - bool affinity_broken; - - raw_spin_lock(&desc->lock); - affinity_broken = migrate_one_irq(desc); - raw_spin_unlock(&desc->lock); - - if (affinity_broken) - pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", - i, smp_processor_id()); - } - - local_irq_restore(flags); -} -#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index f57333f46242..65f85737c6a2 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -254,7 +254,7 @@ int __cpu_disable(void) /* * OK - migrate IRQs away from this CPU */ - migrate_irqs(); + irq_migrate_all_off_this_cpu(); /* * Flush user cache and TLB mappings, and then remove this CPU From 3b0e9305e39f3c08445c8f601fc477d8f842d1b4 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Thu, 7 Feb 2019 13:33:21 -0700 Subject: [PATCH 2316/2608] af_key: unconditionally clone on broadcast [ Upstream commit fc2d5cfdcfe2ab76b263d91429caa22451123085 ] Attempting to avoid cloning the skb when broadcasting by inflating the refcount with sock_hold/sock_put while under RCU lock is dangerous and violates RCU principles. It leads to subtle race conditions when attempting to free the SKB, as we may reference sockets that have already been freed by the stack. Unable to handle kernel paging request at virtual address 6b6b6b6b6b6c4b [006b6b6b6b6b6c4b] address between user and kernel address ranges Internal error: Oops: 96000004 [#1] PREEMPT SMP task: fffffff78f65b380 task.stack: ffffff8049a88000 pc : sock_rfree+0x38/0x6c lr : skb_release_head_state+0x6c/0xcc Process repro (pid: 7117, stack limit = 0xffffff8049a88000) Call trace: sock_rfree+0x38/0x6c skb_release_head_state+0x6c/0xcc skb_release_all+0x1c/0x38 __kfree_skb+0x1c/0x30 kfree_skb+0xd0/0xf4 pfkey_broadcast+0x14c/0x18c pfkey_sendmsg+0x1d8/0x408 sock_sendmsg+0x44/0x60 ___sys_sendmsg+0x1d0/0x2a8 __sys_sendmsg+0x64/0xb4 SyS_sendmsg+0x34/0x4c el0_svc_naked+0x34/0x38 Kernel panic - not syncing: Fatal exception Suggested-by: Eric Dumazet Signed-off-by: Sean Tranchetti Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/key/af_key.c | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 3b209cbfe1df..b095551a5773 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -196,30 +196,22 @@ static int pfkey_release(struct socket *sock) return 0; } -static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, - gfp_t allocation, struct sock *sk) +static int pfkey_broadcast_one(struct sk_buff *skb, gfp_t allocation, + struct sock *sk) { int err = -ENOBUFS; - sock_hold(sk); - if (*skb2 == NULL) { - if (refcount_read(&skb->users) != 1) { - *skb2 = skb_clone(skb, allocation); - } else { - *skb2 = skb; - refcount_inc(&skb->users); - } + if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) + return err; + + skb = skb_clone(skb, allocation); + + if (skb) { + skb_set_owner_r(skb, sk); + skb_queue_tail(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk); + err = 0; } - if (*skb2 != NULL) { - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) { - skb_set_owner_r(*skb2, sk); - skb_queue_tail(&sk->sk_receive_queue, *skb2); - sk->sk_data_ready(sk); - *skb2 = NULL; - err = 0; - } - } - sock_put(sk); return err; } @@ -234,7 +226,6 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; - struct sk_buff *skb2 = NULL; int err = -ESRCH; /* XXX Do we need something like netlink_overrun? I think @@ -253,7 +244,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, * socket. */ if (pfk->promisc) - pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk); + pfkey_broadcast_one(skb, GFP_ATOMIC, sk); /* the exact target will be processed later */ if (sk == one_sk) @@ -268,7 +259,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, continue; } - err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk); + err2 = pfkey_broadcast_one(skb, GFP_ATOMIC, sk); /* Error is cleared after successful sending to at least one * registered KM */ @@ -278,9 +269,8 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, rcu_read_unlock(); if (one_sk != NULL) - err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk); + err = pfkey_broadcast_one(skb, allocation, one_sk); - kfree_skb(skb2); kfree_skb(skb); return err; } From d366f51305553b724561f9f7d161869318535c98 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 14 Feb 2019 16:20:15 +0000 Subject: [PATCH 2317/2608] assoc_array: Fix shortcut creation [ Upstream commit bb2ba2d75a2d673e76ddaf13a9bd30d6a8b1bb08 ] Fix the creation of shortcuts for which the length of the index key value is an exact multiple of the machine word size. The problem is that the code that blanks off the unused bits of the shortcut value malfunctions if the number of bits in the last word equals machine word size. This is due to the "<<" operator being given a shift of zero in this case, and so the mask that should be all zeros is all ones instead. This causes the subsequent masking operation to clear everything rather than clearing nothing. Ordinarily, the presence of the hash at the beginning of the tree index key makes the issue very hard to test for, but in this case, it was encountered due to a development mistake that caused the hash output to be either 0 (keyring) or 1 (non-keyring) only. This made it susceptible to the keyctl/unlink/valid test in the keyutils package. The fix is simply to skip the blanking if the shift would be 0. For example, an index key that is 64 bits long would produce a 0 shift and thus a 'blank' of all 1s. This would then be inverted and AND'd onto the index_key, incorrectly clearing the entire last word. Fixes: 3cb989501c26 ("Add a generic associative array implementation.") Signed-off-by: David Howells Signed-off-by: James Morris Signed-off-by: Sasha Levin --- lib/assoc_array.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 4e53be8bc590..9463d3445ccd 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -781,9 +781,11 @@ all_leaves_cluster_together: new_s0->index_key[i] = ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); - blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); - pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); - new_s0->index_key[keylen - 1] &= ~blank; + if (level & ASSOC_ARRAY_KEY_CHUNK_MASK) { + blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); + pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); + new_s0->index_key[keylen - 1] &= ~blank; + } /* This now reduces to a node splitting exercise for which we'll need * to regenerate the disparity table. From 7b1386a3eb472f4f7cda800a751c6e54def6cee6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 14 Feb 2019 16:20:25 +0000 Subject: [PATCH 2318/2608] keys: Fix dependency loop between construction record and auth key [ Upstream commit 822ad64d7e46a8e2c8b8a796738d7b657cbb146d ] In the request_key() upcall mechanism there's a dependency loop by which if a key type driver overrides the ->request_key hook and the userspace side manages to lose the authorisation key, the auth key and the internal construction record (struct key_construction) can keep each other pinned. Fix this by the following changes: (1) Killing off the construction record and using the auth key instead. (2) Including the operation name in the auth key payload and making the payload available outside of security/keys/. (3) The ->request_key hook is given the authkey instead of the cons record and operation name. Changes (2) and (3) allow the auth key to naturally be cleaned up if the keyring it is in is destroyed or cleared or the auth key is unlinked. Fixes: 7ee02a316600 ("keys: Fix dependency loop between construction record and auth key") Signed-off-by: David Howells Signed-off-by: James Morris Signed-off-by: Sasha Levin --- fs/nfs/nfs4idmap.c | 31 ++++++------ include/keys/request_key_auth-type.h | 36 ++++++++++++++ include/linux/key-type.h | 22 +++------ security/keys/internal.h | 13 +---- security/keys/keyctl.c | 1 + security/keys/process_keys.c | 1 + security/keys/request_key.c | 72 +++++++++++----------------- security/keys/request_key_auth.c | 16 ++++--- 8 files changed, 100 insertions(+), 92 deletions(-) create mode 100644 include/keys/request_key_auth-type.h diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index b6f9d84ba19b..ae2d6f220627 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "internal.h" @@ -59,7 +60,7 @@ static struct key_type key_type_id_resolver_legacy; struct idmap_legacy_upcalldata { struct rpc_pipe_msg pipe_msg; struct idmap_msg idmap_msg; - struct key_construction *key_cons; + struct key *authkey; struct idmap *idmap; }; @@ -384,7 +385,7 @@ static const match_table_t nfs_idmap_tokens = { { Opt_find_err, NULL } }; -static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *); +static int nfs_idmap_legacy_upcall(struct key *, void *); static ssize_t idmap_pipe_downcall(struct file *, const char __user *, size_t); static void idmap_release_pipe(struct inode *); @@ -545,11 +546,12 @@ nfs_idmap_prepare_pipe_upcall(struct idmap *idmap, static void nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret) { - struct key_construction *cons = idmap->idmap_upcall_data->key_cons; + struct key *authkey = idmap->idmap_upcall_data->authkey; kfree(idmap->idmap_upcall_data); idmap->idmap_upcall_data = NULL; - complete_request_key(cons, ret); + complete_request_key(authkey, ret); + key_put(authkey); } static void @@ -559,15 +561,14 @@ nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret) nfs_idmap_complete_pipe_upcall_locked(idmap, ret); } -static int nfs_idmap_legacy_upcall(struct key_construction *cons, - const char *op, - void *aux) +static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux) { struct idmap_legacy_upcalldata *data; + struct request_key_auth *rka = get_request_key_auth(authkey); struct rpc_pipe_msg *msg; struct idmap_msg *im; struct idmap *idmap = (struct idmap *)aux; - struct key *key = cons->key; + struct key *key = rka->target_key; int ret = -ENOKEY; if (!aux) @@ -582,7 +583,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, msg = &data->pipe_msg; im = &data->idmap_msg; data->idmap = idmap; - data->key_cons = cons; + data->authkey = key_get(authkey); ret = nfs_idmap_prepare_message(key->description, idmap, im, msg); if (ret < 0) @@ -600,7 +601,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, out2: kfree(data); out1: - complete_request_key(cons, ret); + complete_request_key(authkey, ret); return ret; } @@ -647,9 +648,10 @@ out: static ssize_t idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { + struct request_key_auth *rka; struct rpc_inode *rpci = RPC_I(file_inode(filp)); struct idmap *idmap = (struct idmap *)rpci->private; - struct key_construction *cons; + struct key *authkey; struct idmap_msg im; size_t namelen_in; int ret = -ENOKEY; @@ -661,7 +663,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (idmap->idmap_upcall_data == NULL) goto out_noupcall; - cons = idmap->idmap_upcall_data->key_cons; + authkey = idmap->idmap_upcall_data->authkey; + rka = get_request_key_auth(authkey); if (mlen != sizeof(im)) { ret = -ENOSPC; @@ -686,9 +689,9 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) ret = nfs_idmap_read_and_verify_message(&im, &idmap->idmap_upcall_data->idmap_msg, - cons->key, cons->authkey); + rka->target_key, authkey); if (ret >= 0) { - key_set_timeout(cons->key, nfs_idmap_cache_timeout); + key_set_timeout(rka->target_key, nfs_idmap_cache_timeout); ret = mlen; } diff --git a/include/keys/request_key_auth-type.h b/include/keys/request_key_auth-type.h new file mode 100644 index 000000000000..a726dd3f1dc6 --- /dev/null +++ b/include/keys/request_key_auth-type.h @@ -0,0 +1,36 @@ +/* request_key authorisation token key type + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _KEYS_REQUEST_KEY_AUTH_TYPE_H +#define _KEYS_REQUEST_KEY_AUTH_TYPE_H + +#include + +/* + * Authorisation record for request_key(). + */ +struct request_key_auth { + struct key *target_key; + struct key *dest_keyring; + const struct cred *cred; + void *callout_info; + size_t callout_len; + pid_t pid; + char op[8]; +} __randomize_layout; + +static inline struct request_key_auth *get_request_key_auth(const struct key *key) +{ + return key->payload.data[0]; +} + + +#endif /* _KEYS_REQUEST_KEY_AUTH_TYPE_H */ diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 9520fc3c3b9a..dfb3ba782d2c 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -17,15 +17,6 @@ #ifdef CONFIG_KEYS -/* - * key under-construction record - * - passed to the request_key actor if supplied - */ -struct key_construction { - struct key *key; /* key being constructed */ - struct key *authkey;/* authorisation for key being constructed */ -}; - /* * Pre-parsed payload, used by key add, update and instantiate. * @@ -47,8 +38,7 @@ struct key_preparsed_payload { time_t expiry; /* Expiry time of key */ } __randomize_layout; -typedef int (*request_key_actor_t)(struct key_construction *key, - const char *op, void *aux); +typedef int (*request_key_actor_t)(struct key *auth_key, void *aux); /* * Preparsed matching criterion. @@ -170,20 +160,20 @@ extern int key_instantiate_and_link(struct key *key, const void *data, size_t datalen, struct key *keyring, - struct key *instkey); + struct key *authkey); extern int key_reject_and_link(struct key *key, unsigned timeout, unsigned error, struct key *keyring, - struct key *instkey); -extern void complete_request_key(struct key_construction *cons, int error); + struct key *authkey); +extern void complete_request_key(struct key *authkey, int error); static inline int key_negate_and_link(struct key *key, unsigned timeout, struct key *keyring, - struct key *instkey) + struct key *authkey) { - return key_reject_and_link(key, timeout, ENOKEY, keyring, instkey); + return key_reject_and_link(key, timeout, ENOKEY, keyring, authkey); } extern int generic_key_instantiate(struct key *key, struct key_preparsed_payload *prep); diff --git a/security/keys/internal.h b/security/keys/internal.h index 503adbae7b0d..e3a573840186 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -188,20 +188,9 @@ static inline int key_permission(const key_ref_t key_ref, unsigned perm) return key_task_permission(key_ref, current_cred(), perm); } -/* - * Authorisation record for request_key(). - */ -struct request_key_auth { - struct key *target_key; - struct key *dest_keyring; - const struct cred *cred; - void *callout_info; - size_t callout_len; - pid_t pid; -} __randomize_layout; - extern struct key_type key_type_request_key_auth; extern struct key *request_key_auth_new(struct key *target, + const char *op, const void *callout_info, size_t callout_len, struct key *dest_keyring); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 1ffe60bb2845..ca31af186abd 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "internal.h" #define KEY_MAX_DESC_SIZE 4096 diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 740affd65ee9..5f2993ab2d50 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "internal.h" /* Session keyring create vs join semaphore */ diff --git a/security/keys/request_key.c b/security/keys/request_key.c index c707fdbb3429..2ecd67221476 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -18,31 +18,30 @@ #include #include #include "internal.h" +#include #define key_negative_timeout 60 /* default timeout on a negative key's existence */ /** * complete_request_key - Complete the construction of a key. - * @cons: The key construction record. + * @auth_key: The authorisation key. * @error: The success or failute of the construction. * * Complete the attempt to construct a key. The key will be negated * if an error is indicated. The authorisation key will be revoked * unconditionally. */ -void complete_request_key(struct key_construction *cons, int error) +void complete_request_key(struct key *authkey, int error) { - kenter("{%d,%d},%d", cons->key->serial, cons->authkey->serial, error); + struct request_key_auth *rka = get_request_key_auth(authkey); + struct key *key = rka->target_key; + + kenter("%d{%d},%d", authkey->serial, key->serial, error); if (error < 0) - key_negate_and_link(cons->key, key_negative_timeout, NULL, - cons->authkey); + key_negate_and_link(key, key_negative_timeout, NULL, authkey); else - key_revoke(cons->authkey); - - key_put(cons->key); - key_put(cons->authkey); - kfree(cons); + key_revoke(authkey); } EXPORT_SYMBOL(complete_request_key); @@ -91,21 +90,19 @@ static int call_usermodehelper_keys(const char *path, char **argv, char **envp, * Request userspace finish the construction of a key * - execute "/sbin/request-key " */ -static int call_sbin_request_key(struct key_construction *cons, - const char *op, - void *aux) +static int call_sbin_request_key(struct key *authkey, void *aux) { static char const request_key[] = "/sbin/request-key"; + struct request_key_auth *rka = get_request_key_auth(authkey); const struct cred *cred = current_cred(); key_serial_t prkey, sskey; - struct key *key = cons->key, *authkey = cons->authkey, *keyring, - *session; + struct key *key = rka->target_key, *keyring, *session; char *argv[9], *envp[3], uid_str[12], gid_str[12]; char key_str[12], keyring_str[3][12]; char desc[20]; int ret, i; - kenter("{%d},{%d},%s", key->serial, authkey->serial, op); + kenter("{%d},{%d},%s", key->serial, authkey->serial, rka->op); ret = install_user_keyrings(); if (ret < 0) @@ -163,7 +160,7 @@ static int call_sbin_request_key(struct key_construction *cons, /* set up the argument list */ i = 0; argv[i++] = (char *)request_key; - argv[i++] = (char *) op; + argv[i++] = (char *)rka->op; argv[i++] = key_str; argv[i++] = uid_str; argv[i++] = gid_str; @@ -191,7 +188,7 @@ error_link: key_put(keyring); error_alloc: - complete_request_key(cons, ret); + complete_request_key(authkey, ret); kleave(" = %d", ret); return ret; } @@ -205,42 +202,31 @@ static int construct_key(struct key *key, const void *callout_info, size_t callout_len, void *aux, struct key *dest_keyring) { - struct key_construction *cons; request_key_actor_t actor; struct key *authkey; int ret; kenter("%d,%p,%zu,%p", key->serial, callout_info, callout_len, aux); - cons = kmalloc(sizeof(*cons), GFP_KERNEL); - if (!cons) - return -ENOMEM; - /* allocate an authorisation key */ - authkey = request_key_auth_new(key, callout_info, callout_len, + authkey = request_key_auth_new(key, "create", callout_info, callout_len, dest_keyring); - if (IS_ERR(authkey)) { - kfree(cons); - ret = PTR_ERR(authkey); - authkey = NULL; - } else { - cons->authkey = key_get(authkey); - cons->key = key_get(key); + if (IS_ERR(authkey)) + return PTR_ERR(authkey); - /* make the call */ - actor = call_sbin_request_key; - if (key->type->request_key) - actor = key->type->request_key; + /* Make the call */ + actor = call_sbin_request_key; + if (key->type->request_key) + actor = key->type->request_key; - ret = actor(cons, "create", aux); + ret = actor(authkey, aux); - /* check that the actor called complete_request_key() prior to - * returning an error */ - WARN_ON(ret < 0 && - !test_bit(KEY_FLAG_REVOKED, &authkey->flags)); - key_put(authkey); - } + /* check that the actor called complete_request_key() prior to + * returning an error */ + WARN_ON(ret < 0 && + !test_bit(KEY_FLAG_REVOKED, &authkey->flags)); + key_put(authkey); kleave(" = %d", ret); return ret; } @@ -275,7 +261,7 @@ static int construct_get_dest_keyring(struct key **_dest_keyring) if (cred->request_key_auth) { authkey = cred->request_key_auth; down_read(&authkey->sem); - rka = authkey->payload.data[0]; + rka = get_request_key_auth(authkey); if (!test_bit(KEY_FLAG_REVOKED, &authkey->flags)) dest_keyring = diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 6797843154f0..5e515791ccd1 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -18,7 +18,7 @@ #include #include #include "internal.h" -#include +#include static int request_key_auth_preparse(struct key_preparsed_payload *); static void request_key_auth_free_preparse(struct key_preparsed_payload *); @@ -69,7 +69,7 @@ static int request_key_auth_instantiate(struct key *key, static void request_key_auth_describe(const struct key *key, struct seq_file *m) { - struct request_key_auth *rka = key->payload.data[0]; + struct request_key_auth *rka = get_request_key_auth(key); seq_puts(m, "key:"); seq_puts(m, key->description); @@ -84,7 +84,7 @@ static void request_key_auth_describe(const struct key *key, static long request_key_auth_read(const struct key *key, char __user *buffer, size_t buflen) { - struct request_key_auth *rka = key->payload.data[0]; + struct request_key_auth *rka = get_request_key_auth(key); size_t datalen; long ret; @@ -110,7 +110,7 @@ static long request_key_auth_read(const struct key *key, */ static void request_key_auth_revoke(struct key *key) { - struct request_key_auth *rka = key->payload.data[0]; + struct request_key_auth *rka = get_request_key_auth(key); kenter("{%d}", key->serial); @@ -137,7 +137,7 @@ static void free_request_key_auth(struct request_key_auth *rka) */ static void request_key_auth_destroy(struct key *key) { - struct request_key_auth *rka = key->payload.data[0]; + struct request_key_auth *rka = get_request_key_auth(key); kenter("{%d}", key->serial); @@ -148,8 +148,9 @@ static void request_key_auth_destroy(struct key *key) * Create an authorisation token for /sbin/request-key or whoever to gain * access to the caller's security data. */ -struct key *request_key_auth_new(struct key *target, const void *callout_info, - size_t callout_len, struct key *dest_keyring) +struct key *request_key_auth_new(struct key *target, const char *op, + const void *callout_info, size_t callout_len, + struct key *dest_keyring) { struct request_key_auth *rka, *irka; const struct cred *cred = current->cred; @@ -167,6 +168,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, if (!rka->callout_info) goto error_free_rka; rka->callout_len = callout_len; + strlcpy(rka->op, op, sizeof(rka->op)); /* see if the calling process is already servicing the key request of * another process */ From e50f4443a319c55f0c47678339f3b93c5ad6bec9 Mon Sep 17 00:00:00 2001 From: Anoob Soman Date: Wed, 13 Feb 2019 13:21:39 +0800 Subject: [PATCH 2319/2608] scsi: libiscsi: Fix race between iscsi_xmit_task and iscsi_complete_task [ Upstream commit 79edd00dc6a96644d76b4a1cb97d94d49e026768 ] When a target sends Check Condition, whilst initiator is busy xmiting re-queued data, could lead to race between iscsi_complete_task() and iscsi_xmit_task() and eventually crashing with the following kernel backtrace. [3326150.987523] ALERT: BUG: unable to handle kernel NULL pointer dereference at 0000000000000078 [3326150.987549] ALERT: IP: [] iscsi_xmit_task+0x2d/0xc0 [libiscsi] [3326150.987571] WARN: PGD 569c8067 PUD 569c9067 PMD 0 [3326150.987582] WARN: Oops: 0002 [#1] SMP [3326150.987593] WARN: Modules linked in: tun nfsv3 nfs fscache dm_round_robin [3326150.987762] WARN: CPU: 2 PID: 8399 Comm: kworker/u32:1 Tainted: G O 4.4.0+2 #1 [3326150.987774] WARN: Hardware name: Dell Inc. PowerEdge R720/0W7JN5, BIOS 2.5.4 01/22/2016 [3326150.987790] WARN: Workqueue: iscsi_q_13 iscsi_xmitworker [libiscsi] [3326150.987799] WARN: task: ffff8801d50f3800 ti: ffff8801f5458000 task.ti: ffff8801f5458000 [3326150.987810] WARN: RIP: e030:[] [] iscsi_xmit_task+0x2d/0xc0 [libiscsi] [3326150.987825] WARN: RSP: e02b:ffff8801f545bdb0 EFLAGS: 00010246 [3326150.987831] WARN: RAX: 00000000ffffffc3 RBX: ffff880282d2ab20 RCX: ffff88026b6ac480 [3326150.987842] WARN: RDX: 0000000000000000 RSI: 00000000fffffe01 RDI: ffff880282d2ab20 [3326150.987852] WARN: RBP: ffff8801f545bdc8 R08: 0000000000000000 R09: 0000000000000008 [3326150.987862] WARN: R10: 0000000000000000 R11: 000000000000fe88 R12: 0000000000000000 [3326150.987872] WARN: R13: ffff880282d2abe8 R14: ffff880282d2abd8 R15: ffff880282d2ac08 [3326150.987890] WARN: FS: 00007f5a866b4840(0000) GS:ffff88028a640000(0000) knlGS:0000000000000000 [3326150.987900] WARN: CS: e033 DS: 0000 ES: 0000 CR0: 0000000080050033 [3326150.987907] WARN: CR2: 0000000000000078 CR3: 0000000070244000 CR4: 0000000000042660 [3326150.987918] WARN: Stack: [3326150.987924] WARN: ffff880282d2ad58 ffff880282d2ab20 ffff880282d2abe8 ffff8801f545be18 [3326150.987938] WARN: ffffffffa05cea90 ffff880282d2abf8 ffff88026b59cc80 ffff88026b59cc00 [3326150.987951] WARN: ffff88022acf32c0 ffff880289491800 ffff880255a80800 0000000000000400 [3326150.987964] WARN: Call Trace: [3326150.987975] WARN: [] iscsi_xmitworker+0x2f0/0x360 [libiscsi] [3326150.987988] WARN: [] process_one_work+0x1fc/0x3b0 [3326150.987997] WARN: [] worker_thread+0x2a5/0x470 [3326150.988006] WARN: [] ? __schedule+0x648/0x870 [3326150.988015] WARN: [] ? rescuer_thread+0x300/0x300 [3326150.988023] WARN: [] kthread+0xd5/0xe0 [3326150.988031] WARN: [] ? kthread_stop+0x110/0x110 [3326150.988040] WARN: [] ret_from_fork+0x3f/0x70 [3326150.988048] WARN: [] ? kthread_stop+0x110/0x110 [3326150.988127] ALERT: RIP [] iscsi_xmit_task+0x2d/0xc0 [libiscsi] [3326150.988138] WARN: RSP [3326150.988144] WARN: CR2: 0000000000000078 [3326151.020366] WARN: ---[ end trace 1c60974d4678d81b ]--- Commit 6f8830f5bbab ("scsi: libiscsi: add lock around task lists to fix list corruption regression") introduced "taskqueuelock" to fix list corruption during the race, but this wasn't enough. Re-setting of conn->task to NULL, could race with iscsi_xmit_task(). iscsi_complete_task() { .... if (conn->task == task) conn->task = NULL; } conn->task in iscsi_xmit_task() could be NULL and so will be task. __iscsi_get_task(task) will crash (NullPtr de-ref), trying to access refcount. iscsi_xmit_task() { struct iscsi_task *task = conn->task; __iscsi_get_task(task); } This commit will take extra conn->session->back_lock in iscsi_xmit_task() to ensure iscsi_xmit_task() waits for iscsi_complete_task(), if iscsi_complete_task() wins the race. If iscsi_xmit_task() wins the race, iscsi_xmit_task() increments task->refcount (__iscsi_get_task) ensuring iscsi_complete_task() will not iscsi_free_task(). Signed-off-by: Anoob Soman Signed-off-by: Bob Liu Acked-by: Lee Duncan Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libiscsi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 3ff536b350a1..5ea5d42bac76 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1449,7 +1449,13 @@ static int iscsi_xmit_task(struct iscsi_conn *conn) if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) return -ENODATA; + spin_lock_bh(&conn->session->back_lock); + if (conn->task == NULL) { + spin_unlock_bh(&conn->session->back_lock); + return -ENODATA; + } __iscsi_get_task(task); + spin_unlock_bh(&conn->session->back_lock); spin_unlock_bh(&conn->session->frwd_lock); rc = conn->session->tt->xmit_task(task); spin_lock_bh(&conn->session->frwd_lock); From 8f926101b83e6bbefe91065acb1e1189b12fd1c9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 15 Feb 2019 12:16:51 -0800 Subject: [PATCH 2320/2608] net: systemport: Fix reception of BPDUs [ Upstream commit a40061ea2e39494104602b3048751341bda374a1 ] SYSTEMPORT has its RXCHK parser block that attempts to validate the packet structures, unfortunately setting the L2 header check bit will cause Bridge PDUs (BPDUs) to be incorrectly rejected because they look like LLC/SNAP packets with a non-IPv4 or non-IPv6 Ethernet Type. Fixes: 4e8aedfe78c7 ("net: systemport: Turn on offloads by default") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bcmsysport.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index ed3edb17fd09..79018fea7be2 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -134,6 +134,10 @@ static int bcm_sysport_set_rx_csum(struct net_device *dev, priv->rx_chk_en = !!(wanted & NETIF_F_RXCSUM); reg = rxchk_readl(priv, RXCHK_CONTROL); + /* Clear L2 header checks, which would prevent BPDUs + * from being received. + */ + reg &= ~RXCHK_L2_HDR_DIS; if (priv->rx_chk_en) reg |= RXCHK_EN; else From eb735030790086fd911c3e51094f51c1e2d9a2ac Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 9 Feb 2019 02:01:01 +0100 Subject: [PATCH 2321/2608] pinctrl: meson: meson8b: fix the sdxc_a data 1..3 pins [ Upstream commit c17abcfa93bf0be5e48bb011607d237ac2bfc839 ] Fix the mismatch between the "sdxc_d13_1_a" pin group definition from meson8b_cbus_groups and the entry in sdxc_a_groups ("sdxc_d0_13_1_a"). This makes it possible to use "sdxc_d13_1_a" in device-tree files to route the MMC data 1..3 pins to GPIOX_1..3. Fixes: 0fefcb6876d0d6 ("pinctrl: Add support for Meson8b") Signed-off-by: Martin Blumenstingl Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/meson/pinctrl-meson8b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson8b.c b/drivers/pinctrl/meson/pinctrl-meson8b.c index a6fff215e60f..aafd39eba64f 100644 --- a/drivers/pinctrl/meson/pinctrl-meson8b.c +++ b/drivers/pinctrl/meson/pinctrl-meson8b.c @@ -668,7 +668,7 @@ static const char * const sd_a_groups[] = { static const char * const sdxc_a_groups[] = { "sdxc_d0_0_a", "sdxc_d13_0_a", "sdxc_d47_a", "sdxc_clk_a", - "sdxc_cmd_a", "sdxc_d0_1_a", "sdxc_d0_13_1_a" + "sdxc_cmd_a", "sdxc_d0_1_a", "sdxc_d13_1_a" }; static const char * const pcm_a_groups[] = { From f647070b1efebe4874fe444db261b4f5dbbef68d Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Fri, 15 Feb 2019 13:20:42 +0100 Subject: [PATCH 2322/2608] qmi_wwan: apply SET_DTR quirk to Sierra WP7607 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 97dc47a1308a3af46a09b1546cfb869f2e382a81 ] The 1199:68C0 USB ID is reused by Sierra WP7607 which requires the DTR quirk to be detected. Apply QMI_QUIRK_SET_DTR unconditionally as already done for other IDs shared between different devices. Signed-off-by: Beniamino Galvani Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/qmi_wwan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 25204d2c9e89..65e47cc52d14 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1193,8 +1193,8 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */ {QMI_FIXED_INTF(0x1199, 0x68a2, 8)}, /* Sierra Wireless MC7710 in QMI mode */ {QMI_FIXED_INTF(0x1199, 0x68a2, 19)}, /* Sierra Wireless MC7710 in QMI mode */ - {QMI_FIXED_INTF(0x1199, 0x68c0, 8)}, /* Sierra Wireless MC7304/MC7354 */ - {QMI_FIXED_INTF(0x1199, 0x68c0, 10)}, /* Sierra Wireless MC7304/MC7354 */ + {QMI_QUIRK_SET_DTR(0x1199, 0x68c0, 8)}, /* Sierra Wireless MC7304/MC7354, WP76xx */ + {QMI_QUIRK_SET_DTR(0x1199, 0x68c0, 10)},/* Sierra Wireless MC7304/MC7354 */ {QMI_FIXED_INTF(0x1199, 0x901c, 8)}, /* Sierra Wireless EM7700 */ {QMI_FIXED_INTF(0x1199, 0x901f, 8)}, /* Sierra Wireless EM7355 */ {QMI_FIXED_INTF(0x1199, 0x9041, 8)}, /* Sierra Wireless MC7305/MC7355 */ From 0d1aae8046b6b0eb7d6528f65b39044b60e6de54 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 16 Feb 2019 00:20:54 +0300 Subject: [PATCH 2323/2608] net: mv643xx_eth: disable clk on error path in mv643xx_eth_shared_probe() [ Upstream commit e928b5d6b75e239feb9c6d5488974b6646a0ebc8 ] If mv643xx_eth_shared_of_probe() fails, mv643xx_eth_shared_probe() leaves clk enabled. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mv643xx_eth.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 81c1fac00d33..2434409f84b2 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2886,7 +2886,7 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) ret = mv643xx_eth_shared_of_probe(pdev); if (ret) - return ret; + goto err_put_clk; pd = dev_get_platdata(&pdev->dev); msp->tx_csum_limit = (pd != NULL && pd->tx_csum_limit) ? @@ -2894,6 +2894,11 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) infer_hw_params(msp); return 0; + +err_put_clk: + if (!IS_ERR(msp->clk)) + clk_disable_unprepare(msp->clk); + return ret; } static int mv643xx_eth_shared_remove(struct platform_device *pdev) From 2f29bedd5fc0387807d891ec1b7078d8807f5b6e Mon Sep 17 00:00:00 2001 From: Rayagonda Kokatanur Date: Mon, 4 Feb 2019 11:21:29 -0800 Subject: [PATCH 2324/2608] mailbox: bcm-flexrm-mailbox: Fix FlexRM ring flush timeout issue [ Upstream commit d7bf31a0f85faaf63c63c39d55154825a1eaaea9 ] RING_CONTROL reg was not written due to wrong address, hence all the subsequent ring flush was timing out. Fixes: a371c10ea4b3 ("mailbox: bcm-flexrm-mailbox: Fix FlexRM ring flush sequence") Signed-off-by: Rayagonda Kokatanur Signed-off-by: Ray Jui Reviewed-by: Scott Branden Signed-off-by: Jassi Brar Signed-off-by: Sasha Levin --- drivers/mailbox/bcm-flexrm-mailbox.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mailbox/bcm-flexrm-mailbox.c b/drivers/mailbox/bcm-flexrm-mailbox.c index f052a3eb2098..7e3ed2714630 100644 --- a/drivers/mailbox/bcm-flexrm-mailbox.c +++ b/drivers/mailbox/bcm-flexrm-mailbox.c @@ -1381,9 +1381,9 @@ static void flexrm_shutdown(struct mbox_chan *chan) /* Clear ring flush state */ timeout = 1000; /* timeout of 1s */ - writel_relaxed(0x0, ring + RING_CONTROL); + writel_relaxed(0x0, ring->regs + RING_CONTROL); do { - if (!(readl_relaxed(ring + RING_FLUSH_DONE) & + if (!(readl_relaxed(ring->regs + RING_FLUSH_DONE) & FLUSH_DONE_MASK)) break; mdelay(1); From 4f1daa409e6e4629b7c0ead8f5b4644a31e03f84 Mon Sep 17 00:00:00 2001 From: Bard liao Date: Sun, 17 Feb 2019 21:23:47 +0800 Subject: [PATCH 2325/2608] ASoC: topology: free created components in tplg load error [ Upstream commit 304017d31df36fb61eb2ed3ebf65fb6870b3c731 ] Topology resources are no longer needed if any element failed to load. Signed-off-by: Bard liao Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-topology.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index c1619860a5de..2d5cf263515b 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -2513,6 +2513,7 @@ int snd_soc_tplg_component_load(struct snd_soc_component *comp, struct snd_soc_tplg_ops *ops, const struct firmware *fw, u32 id) { struct soc_tplg tplg; + int ret; /* setup parsing context */ memset(&tplg, 0, sizeof(tplg)); @@ -2526,7 +2527,12 @@ int snd_soc_tplg_component_load(struct snd_soc_component *comp, tplg.bytes_ext_ops = ops->bytes_ext_ops; tplg.bytes_ext_ops_count = ops->bytes_ext_ops_count; - return soc_tplg_load(&tplg); + ret = soc_tplg_load(&tplg); + /* free the created components if fail to load topology */ + if (ret) + snd_soc_tplg_component_remove(comp, SND_SOC_TPLG_INDEX_ALL); + + return ret; } EXPORT_SYMBOL_GPL(snd_soc_tplg_component_load); From 156ca708eab5313fcc75ffb1c17256cee4efd02f Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 18 Feb 2019 15:24:03 +0200 Subject: [PATCH 2326/2608] qed: Fix iWARP syn packet mac address validation. [ Upstream commit 8be3dadf04050c2907760ec1955ca1c8fbc25585 ] The ll2 forwards all syn packets to the driver without validating the mac address. Add validation check in the driver's iWARP listener flow and drop the packet if it isn't intended for the device. Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index eb666877d1aa..bb09f5a9846f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1651,6 +1651,15 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, eth_hlen = ETH_HLEN + (vlan_valid ? sizeof(u32) : 0); + if (!ether_addr_equal(ethh->h_dest, + p_hwfn->p_rdma_info->iwarp.mac_addr)) { + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "Got unexpected mac %pM instead of %pM\n", + ethh->h_dest, p_hwfn->p_rdma_info->iwarp.mac_addr); + return -EINVAL; + } + ether_addr_copy(remote_mac_addr, ethh->h_source); ether_addr_copy(local_mac_addr, ethh->h_dest); From b18d4687a9ed8432550fe6c69be2b1881ab1b2b4 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Wed, 20 Feb 2019 11:43:05 +0000 Subject: [PATCH 2327/2608] arm64: Relax GIC version check during early boot [ Upstream commit 74698f6971f25d045301139413578865fc2bd8f9 ] Updates to the GIC architecture allow ID_AA64PFR0_EL1.GIC to have values other than 0 or 1. At the moment, Linux is quite strict in the way it handles this field at early boot stage (cpufeature is fine) and will refuse to use the system register CPU interface if it doesn't find the value 1. Fixes: 021f653791ad17e03f98aaa7fb933816ae16f161 ("irqchip: gic-v3: Initial support for GICv3") Reported-by: Chase Conklin Reviewed-by: Marc Zyngier Signed-off-by: Vladimir Murzin Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/head.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index ec393275ba04..1371542de0d3 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -442,8 +442,7 @@ set_hcr: /* GICv3 system register access */ mrs x0, id_aa64pfr0_el1 ubfx x0, x0, #24, #4 - cmp x0, #1 - b.ne 3f + cbz x0, 3f mrs_s x0, SYS_ICC_SRE_EL2 orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 From 8dea1a3ec34ed15d212495b0eb2541a5582e9d74 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 15 Feb 2019 13:55:47 +0000 Subject: [PATCH 2328/2608] net: marvell: mvneta: fix DMA debug warning [ Upstream commit a8fef9ba58c9966ddb1fec916d8d8137c9d8bc89 ] Booting 4.20 on SolidRun Clearfog issues this warning with DMA API debug enabled: WARNING: CPU: 0 PID: 555 at kernel/dma/debug.c:1230 check_sync+0x514/0x5bc mvneta f1070000.ethernet: DMA-API: device driver tries to sync DMA memory it has not allocated [device address=0x000000002dd7dc00] [size=240 bytes] Modules linked in: ahci mv88e6xxx dsa_core xhci_plat_hcd xhci_hcd devlink armada_thermal marvell_cesa des_generic ehci_orion phy_armada38x_comphy mcp3021 spi_orion evbug sfp mdio_i2c ip_tables x_tables CPU: 0 PID: 555 Comm: bridge-network- Not tainted 4.20.0+ #291 Hardware name: Marvell Armada 380/385 (Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x9c/0xd4) [] (dump_stack) from [] (__warn+0xf8/0x124) [] (__warn) from [] (warn_slowpath_fmt+0x38/0x48) [] (warn_slowpath_fmt) from [] (check_sync+0x514/0x5bc) [] (check_sync) from [] (debug_dma_sync_single_range_for_cpu+0x6c/0x74) [] (debug_dma_sync_single_range_for_cpu) from [] (mvneta_poll+0x298/0xf58) [] (mvneta_poll) from [] (net_rx_action+0x128/0x424) [] (net_rx_action) from [] (__do_softirq+0xf0/0x540) [] (__do_softirq) from [] (irq_exit+0x124/0x144) [] (irq_exit) from [] (__handle_domain_irq+0x58/0xb0) [] (__handle_domain_irq) from [] (gic_handle_irq+0x48/0x98) [] (gic_handle_irq) from [] (__irq_svc+0x70/0x98) ... This appears to be caused by mvneta_rx_hwbm() calling dma_sync_single_range_for_cpu() with the wrong struct device pointer, as the buffer manager device pointer is used to map and unmap the buffer. Fix this. Signed-off-by: Russell King Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 074a5b79d691..f76cbefeb3c7 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2102,7 +2102,7 @@ err_drop_frame: if (unlikely(!skb)) goto err_drop_frame_ret_pool; - dma_sync_single_range_for_cpu(dev->dev.parent, + dma_sync_single_range_for_cpu(&pp->bm_priv->pdev->dev, rx_desc->buf_phys_addr, MVNETA_MH_SIZE + NET_SKB_PAD, rx_bytes, From 00df9629326b45f071b23f921aaa6c1374c1e0c7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 21 Feb 2019 08:48:09 -0800 Subject: [PATCH 2329/2608] tmpfs: fix link accounting when a tmpfile is linked in [ Upstream commit 1062af920c07f5b54cf5060fde3339da6df0cf6b ] tmpfs has a peculiarity of accounting hard links as if they were separate inodes: so that when the number of inodes is limited, as it is by default, a user cannot soak up an unlimited amount of unreclaimable dcache memory just by repeatedly linking a file. But when v3.11 added O_TMPFILE, and the ability to use linkat() on the fd, we missed accommodating this new case in tmpfs: "df -i" shows that an extra "inode" remains accounted after the file is unlinked and the fd closed and the actual inode evicted. If a user repeatedly links tmpfiles into a tmpfs, the limit will be hit (ENOSPC) even after they are deleted. Just skip the extra reservation from shmem_link() in this case: there's a sense in which this first link of a tmpfile is then cheaper than a hard link of another file, but the accounting works out, and there's still good limiting, so no need to do anything more complicated. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1902182134370.7035@eggly.anvils Fixes: f4e0c30c191 ("allow the temp files created by open() to be linked to") Signed-off-by: Darrick J. Wong Signed-off-by: Hugh Dickins Reported-by: Matej Kupljen Acked-by: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/shmem.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 6c10f1d92251..9b78c04f532b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3102,10 +3102,14 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr * No ordinary (disk based) filesystem counts links as inodes; * but each new link needs a new dentry, pinning lowmem, and * tmpfs dentries cannot be pruned until they are unlinked. + * But if an O_TMPFILE file is linked into the tmpfs, the + * first link must skip that, to get the accounting right. */ - ret = shmem_reserve_inode(inode->i_sb); - if (ret) - goto out; + if (inode->i_nlink) { + ret = shmem_reserve_inode(inode->i_sb); + if (ret) + goto out; + } dir->i_size += BOGO_DIRENT_SIZE; inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); From b0c4ed01329827cb9ee5a8112abcdb76fe6c1dd5 Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Mon, 28 Jan 2019 09:45:01 -0800 Subject: [PATCH 2330/2608] ixgbe: fix older devices that do not support IXGBE_MRQC_L3L4TXSWEN [ Upstream commit 156a67a9065e3339be85f811d1b13b920e50d73b ] The enabling L3/L4 filtering for transmit switched packets for all devices caused unforeseen issue on older devices when trying to send UDP traffic in an ordered sequence. This bit was originally intended for X550 devices, which supported this feature, so limit the scope of this bit to only X550 devices. Signed-off-by: Jeff Kirsher Tested-by: Andrew Bowers Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 42183a8b649c..01c120d656c5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3827,8 +3827,11 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter) else mrqc = IXGBE_MRQC_VMDQRSS64EN; - /* Enable L3/L4 for Tx Switched packets */ - mrqc |= IXGBE_MRQC_L3L4TXSWEN; + /* Enable L3/L4 for Tx Switched packets only for X550, + * older devices do not support this feature + */ + if (hw->mac.type >= ixgbe_mac_X550) + mrqc |= IXGBE_MRQC_L3L4TXSWEN; } else { if (tcs > 4) mrqc = IXGBE_MRQC_RTRSS8TCEN; From 8c8561afce39f258465de68c02d7457cee0d5836 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 30 Jan 2019 19:32:40 +0300 Subject: [PATCH 2331/2608] ARCv2: lib: memcpy: fix doing prefetchw outside of buffer [ Upstream commit f8a15f97664178f27dfbf86a38f780a532cb6df0 ] ARCv2 optimized memcpy uses PREFETCHW instruction for prefetching the next cache line but doesn't ensure that the line is not past the end of the buffer. PRETECHW changes the line ownership and marks it dirty, which can cause data corruption if this area is used for DMA IO. Fix the issue by avoiding the PREFETCHW. This leads to performance degradation but it is OK as we'll introduce new memcpy implementation optimized for unaligned memory access using. We also cut off all PREFETCH instructions at they are quite useless here: * we call PREFETCH right before LOAD instruction call. * we copy 16 or 32 bytes of data (depending on CONFIG_ARC_HAS_LL64) in a main logical loop. so we call PREFETCH 4 times (or 2 times) for each L1 cache line (in case of 64B L1 cache Line which is default case). Obviously this is not optimal. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/lib/memcpy-archs.S | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/arch/arc/lib/memcpy-archs.S b/arch/arc/lib/memcpy-archs.S index d61044dd8b58..ea14b0bf3116 100644 --- a/arch/arc/lib/memcpy-archs.S +++ b/arch/arc/lib/memcpy-archs.S @@ -25,15 +25,11 @@ #endif #ifdef CONFIG_ARC_HAS_LL64 -# define PREFETCH_READ(RX) prefetch [RX, 56] -# define PREFETCH_WRITE(RX) prefetchw [RX, 64] # define LOADX(DST,RX) ldd.ab DST, [RX, 8] # define STOREX(SRC,RX) std.ab SRC, [RX, 8] # define ZOLSHFT 5 # define ZOLAND 0x1F #else -# define PREFETCH_READ(RX) prefetch [RX, 28] -# define PREFETCH_WRITE(RX) prefetchw [RX, 32] # define LOADX(DST,RX) ld.ab DST, [RX, 4] # define STOREX(SRC,RX) st.ab SRC, [RX, 4] # define ZOLSHFT 4 @@ -41,8 +37,6 @@ #endif ENTRY_CFI(memcpy) - prefetch [r1] ; Prefetch the read location - prefetchw [r0] ; Prefetch the write location mov.f 0, r2 ;;; if size is zero jz.d [blink] @@ -72,8 +66,6 @@ ENTRY_CFI(memcpy) lpnz @.Lcopy32_64bytes ;; LOOP START LOADX (r6, r1) - PREFETCH_READ (r1) - PREFETCH_WRITE (r3) LOADX (r8, r1) LOADX (r10, r1) LOADX (r4, r1) @@ -117,9 +109,7 @@ ENTRY_CFI(memcpy) lpnz @.Lcopy8bytes_1 ;; LOOP START ld.ab r6, [r1, 4] - prefetch [r1, 28] ;Prefetch the next read location ld.ab r8, [r1,4] - prefetchw [r3, 32] ;Prefetch the next write location SHIFT_1 (r7, r6, 24) or r7, r7, r5 @@ -162,9 +152,7 @@ ENTRY_CFI(memcpy) lpnz @.Lcopy8bytes_2 ;; LOOP START ld.ab r6, [r1, 4] - prefetch [r1, 28] ;Prefetch the next read location ld.ab r8, [r1,4] - prefetchw [r3, 32] ;Prefetch the next write location SHIFT_1 (r7, r6, 16) or r7, r7, r5 @@ -204,9 +192,7 @@ ENTRY_CFI(memcpy) lpnz @.Lcopy8bytes_3 ;; LOOP START ld.ab r6, [r1, 4] - prefetch [r1, 28] ;Prefetch the next read location ld.ab r8, [r1,4] - prefetchw [r3, 32] ;Prefetch the next write location SHIFT_1 (r7, r6, 8) or r7, r7, r5 From f1e578427b8e923c9ba1e78fb1920129546adee5 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 5 Feb 2019 10:07:07 -0800 Subject: [PATCH 2332/2608] ARC: uacces: remove lp_start, lp_end from clobber list [ Upstream commit d5e3c55e01d8b1774b37b4647c30fb22f1d39077 ] Newer ARC gcc handles lp_start, lp_end in a different way and doesn't like them in the clobber list. Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/include/asm/uaccess.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index c9173c02081c..eabc3efa6c6d 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -207,7 +207,7 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n) */ "=&r" (tmp), "+r" (to), "+r" (from) : - : "lp_count", "lp_start", "lp_end", "memory"); + : "lp_count", "memory"); return n; } @@ -433,7 +433,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) */ "=&r" (tmp), "+r" (to), "+r" (from) : - : "lp_count", "lp_start", "lp_end", "memory"); + : "lp_count", "memory"); return n; } @@ -653,7 +653,7 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n) " .previous \n" : "+r"(d_char), "+r"(res) : "i"(0) - : "lp_count", "lp_start", "lp_end", "memory"); + : "lp_count", "memory"); return res; } @@ -686,7 +686,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count) " .previous \n" : "+r"(res), "+r"(dst), "+r"(src), "=r"(val) : "g"(-EFAULT), "r"(count) - : "lp_count", "lp_start", "lp_end", "memory"); + : "lp_count", "memory"); return res; } From 760884bcd3427a17a4b464c02a3d90f0043a9f9c Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 6 Jun 2018 10:20:37 -0700 Subject: [PATCH 2333/2608] ARCv2: support manual regfile save on interrupts [ Upstream commit e494239a007e601448110ac304fe055951f9de3b ] There's a hardware bug which affects the HSDK platform, triggered by micro-ops for auto-saving regfile on taken interrupt. The workaround is to inhibit autosave. Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/Kconfig | 8 +++++ arch/arc/include/asm/entry-arcv2.h | 54 ++++++++++++++++++++++++++++++ arch/arc/kernel/entry-arcv2.S | 4 ++- arch/arc/kernel/intc-arcv2.c | 2 ++ arch/arc/plat-hsdk/Kconfig | 1 + 5 files changed, 68 insertions(+), 1 deletion(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 9d06c9478a0d..82050893d0b3 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -417,6 +417,14 @@ config ARC_HAS_ACCL_REGS (also referred to as r58:r59). These can also be used by gcc as GPR so kernel needs to save/restore per process +config ARC_IRQ_NO_AUTOSAVE + bool "Disable hardware autosave regfile on interrupts" + default n + help + On HS cores, taken interrupt auto saves the regfile on stack. + This is programmable and can be optionally disabled in which case + software INTERRUPT_PROLOGUE/EPILGUE do the needed work + endif # ISA_ARCV2 endmenu # "ARC CPU Configuration" diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h index 257a68f3c2fe..9f581553dcc3 100644 --- a/arch/arc/include/asm/entry-arcv2.h +++ b/arch/arc/include/asm/entry-arcv2.h @@ -17,6 +17,33 @@ ; ; Now manually save: r12, sp, fp, gp, r25 +#ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE +.ifnc \called_from, exception + st.as r9, [sp, -10] ; save r9 in it's final stack slot + sub sp, sp, 12 ; skip JLI, LDI, EI + + PUSH lp_count + PUSHAX lp_start + PUSHAX lp_end + PUSH blink + + PUSH r11 + PUSH r10 + + sub sp, sp, 4 ; skip r9 + + PUSH r8 + PUSH r7 + PUSH r6 + PUSH r5 + PUSH r4 + PUSH r3 + PUSH r2 + PUSH r1 + PUSH r0 +.endif +#endif + #ifdef CONFIG_ARC_HAS_ACCL_REGS PUSH r59 PUSH r58 @@ -86,6 +113,33 @@ POP r59 #endif +#ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE +.ifnc \called_from, exception + POP r0 + POP r1 + POP r2 + POP r3 + POP r4 + POP r5 + POP r6 + POP r7 + POP r8 + POP r9 + POP r10 + POP r11 + + POP blink + POPAX lp_end + POPAX lp_start + + POP r9 + mov lp_count, r9 + + add sp, sp, 12 ; skip JLI, LDI, EI + ld.as r9, [sp, -10] ; reload r9 which got clobbered +.endif +#endif + .endm /*------------------------------------------------------------------------*/ diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S index cc558a25b8fa..562089d62d9d 100644 --- a/arch/arc/kernel/entry-arcv2.S +++ b/arch/arc/kernel/entry-arcv2.S @@ -209,7 +209,9 @@ restore_regs: ;####### Return from Intr ####### debug_marker_l1: - bbit1.nt r0, STATUS_DE_BIT, .Lintr_ret_to_delay_slot + ; bbit1.nt r0, STATUS_DE_BIT, .Lintr_ret_to_delay_slot + btst r0, STATUS_DE_BIT ; Z flag set if bit clear + bnz .Lintr_ret_to_delay_slot ; branch if STATUS_DE_BIT set .Lisr_ret_fast_path: ; Handle special case #1: (Entry via Exception, Return via IRQ) diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index 067ea362fb3e..cf18b3e5a934 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -49,11 +49,13 @@ void arc_init_IRQ(void) *(unsigned int *)&ictrl = 0; +#ifndef CONFIG_ARC_IRQ_NO_AUTOSAVE ictrl.save_nr_gpr_pairs = 6; /* r0 to r11 (r12 saved manually) */ ictrl.save_blink = 1; ictrl.save_lp_regs = 1; /* LP_COUNT, LP_START, LP_END */ ictrl.save_u_to_u = 0; /* user ctxt saved on kernel stack */ ictrl.save_idx_regs = 1; /* JLI, LDI, EI */ +#endif WRITE_AUX(AUX_IRQ_CTRL, ictrl); diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig index fcc9a9e27e9c..8fb1600b29b7 100644 --- a/arch/arc/plat-hsdk/Kconfig +++ b/arch/arc/plat-hsdk/Kconfig @@ -9,5 +9,6 @@ menuconfig ARC_SOC_HSDK bool "ARC HS Development Kit SOC" depends on ISA_ARCV2 select ARC_HAS_ACCL_REGS + select ARC_IRQ_NO_AUTOSAVE select CLK_HSDK select RESET_HSDK From b0f38ebe0504d2f89281ee3c1d01eb1459b65db4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 Feb 2019 22:53:50 +0100 Subject: [PATCH 2334/2608] phonet: fix building with clang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6321aa197547da397753757bd84c6ce64b3e3d89 ] clang warns about overflowing the data[] member in the struct pnpipehdr: net/phonet/pep.c:295:8: warning: array index 4 is past the end of the array (which contains 1 element) [-Warray-bounds] if (hdr->data[4] == PEP_IND_READY) ^ ~ include/net/phonet/pep.h:66:3: note: array 'data' declared here u8 data[1]; Using a flexible array member at the end of the struct avoids the warning, but since we cannot have a flexible array member inside of the union, each index now has to be moved back by one, which makes it a little uglier. Signed-off-by: Arnd Bergmann Acked-by: Rémi Denis-Courmont Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/phonet/pep.h | 5 +++-- net/phonet/pep.c | 32 ++++++++++++++++---------------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index b669fe6dbc3b..98f31c7ea23d 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -63,10 +63,11 @@ struct pnpipehdr { u8 state_after_reset; /* reset request */ u8 error_code; /* any response */ u8 pep_type; /* status indication */ - u8 data[1]; + u8 data0; /* anything else */ }; + u8 data[]; }; -#define other_pep_type data[1] +#define other_pep_type data[0] static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb) { diff --git a/net/phonet/pep.c b/net/phonet/pep.c index e81537991ddf..bffcef58ebf5 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -132,7 +132,7 @@ static int pep_indicate(struct sock *sk, u8 id, u8 code, ph->utid = 0; ph->message_id = id; ph->pipe_handle = pn->pipe_handle; - ph->data[0] = code; + ph->error_code = code; return pn_skb_send(sk, skb, NULL); } @@ -153,7 +153,7 @@ static int pipe_handler_request(struct sock *sk, u8 id, u8 code, ph->utid = id; /* whatever */ ph->message_id = id; ph->pipe_handle = pn->pipe_handle; - ph->data[0] = code; + ph->error_code = code; return pn_skb_send(sk, skb, NULL); } @@ -208,7 +208,7 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, struct pnpipehdr *ph; struct sockaddr_pn dst; u8 data[4] = { - oph->data[0], /* PEP type */ + oph->pep_type, /* PEP type */ code, /* error code, at an unusual offset */ PAD, PAD, }; @@ -221,7 +221,7 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, ph->utid = oph->utid; ph->message_id = PNS_PEP_CTRL_RESP; ph->pipe_handle = oph->pipe_handle; - ph->data[0] = oph->data[1]; /* CTRL id */ + ph->data0 = oph->data[0]; /* CTRL id */ pn_skb_get_src_sockaddr(oskb, &dst); return pn_skb_send(sk, skb, &dst); @@ -272,17 +272,17 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) return -EINVAL; hdr = pnp_hdr(skb); - if (hdr->data[0] != PN_PEP_TYPE_COMMON) { + if (hdr->pep_type != PN_PEP_TYPE_COMMON) { net_dbg_ratelimited("Phonet unknown PEP type: %u\n", - (unsigned int)hdr->data[0]); + (unsigned int)hdr->pep_type); return -EOPNOTSUPP; } - switch (hdr->data[1]) { + switch (hdr->data[0]) { case PN_PEP_IND_FLOW_CONTROL: switch (pn->tx_fc) { case PN_LEGACY_FLOW_CONTROL: - switch (hdr->data[4]) { + switch (hdr->data[3]) { case PEP_IND_BUSY: atomic_set(&pn->tx_credits, 0); break; @@ -292,7 +292,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) } break; case PN_ONE_CREDIT_FLOW_CONTROL: - if (hdr->data[4] == PEP_IND_READY) + if (hdr->data[3] == PEP_IND_READY) atomic_set(&pn->tx_credits, wake = 1); break; } @@ -301,12 +301,12 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) case PN_PEP_IND_ID_MCFC_GRANT_CREDITS: if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL) break; - atomic_add(wake = hdr->data[4], &pn->tx_credits); + atomic_add(wake = hdr->data[3], &pn->tx_credits); break; default: net_dbg_ratelimited("Phonet unknown PEP indication: %u\n", - (unsigned int)hdr->data[1]); + (unsigned int)hdr->data[0]); return -EOPNOTSUPP; } if (wake) @@ -318,7 +318,7 @@ static int pipe_rcv_created(struct sock *sk, struct sk_buff *skb) { struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *hdr = pnp_hdr(skb); - u8 n_sb = hdr->data[0]; + u8 n_sb = hdr->data0; pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL; __skb_pull(skb, sizeof(*hdr)); @@ -506,7 +506,7 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) return -ECONNREFUSED; /* Parse sub-blocks */ - n_sb = hdr->data[4]; + n_sb = hdr->data[3]; while (n_sb > 0) { u8 type, buf[6], len = sizeof(buf); const u8 *data = pep_get_sb(skb, &type, &len, buf); @@ -739,7 +739,7 @@ static int pipe_do_remove(struct sock *sk) ph->utid = 0; ph->message_id = PNS_PIPE_REMOVE_REQ; ph->pipe_handle = pn->pipe_handle; - ph->data[0] = PAD; + ph->data0 = PAD; return pn_skb_send(sk, skb, NULL); } @@ -817,7 +817,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, peer_type = hdr->other_pep_type << 8; /* Parse sub-blocks (options) */ - n_sb = hdr->data[4]; + n_sb = hdr->data[3]; while (n_sb > 0) { u8 type, buf[1], len = sizeof(buf); const u8 *data = pep_get_sb(skb, &type, &len, buf); @@ -1109,7 +1109,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) ph->utid = 0; if (pn->aligned) { ph->message_id = PNS_PIPE_ALIGNED_DATA; - ph->data[0] = 0; /* padding */ + ph->data0 = 0; /* padding */ } else ph->message_id = PNS_PIPE_DATA; ph->pipe_handle = pn->pipe_handle; From 428675d410751ec27e75a2474c7738e9d6befb1b Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 19 Feb 2019 13:12:40 +0800 Subject: [PATCH 2335/2608] mac80211_hwsim: propagate genlmsg_reply return code [ Upstream commit 17407715240456448e4989bee46ffc93991add83 ] genlmsg_reply can fail, so propagate its return code Signed-off-by: Li RongQing Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/mac80211_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 8f57ca969c9f..27224dc26413 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3241,7 +3241,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info) goto out_err; } - genlmsg_reply(skb, info); + res = genlmsg_reply(skb, info); break; } From 391dcf93530e9de4873626942c7a5daf232e4398 Mon Sep 17 00:00:00 2001 From: Vadim Lomovtsev Date: Wed, 20 Feb 2019 11:02:43 +0000 Subject: [PATCH 2336/2608] net: thunderx: make CFG_DONE message to run through generic send-ack sequence [ Upstream commit 0dd563b9a62c4cbabf5d4fd6596440c2491e72b1 ] At the end of NIC VF initialization VF sends CFG_DONE message to PF without using nicvf_msg_send_to_pf routine. This potentially could re-write data in mailbox. This commit is to implement common way of sending CFG_DONE message by the same way with other configuration messages by using nicvf_send_msg_to_pf() routine. Signed-off-by: Vadim Lomovtsev Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/cavium/thunder/nic_main.c | 2 +- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index d89ec4724efd..819f38a3225d 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -1030,7 +1030,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) case NIC_MBOX_MSG_CFG_DONE: /* Last message of VF config msg sequence */ nic_enable_vf(nic, vf, true); - goto unlock; + break; case NIC_MBOX_MSG_SHUTDOWN: /* First msg in VF teardown sequence */ if (vf >= nic->num_vf_en) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index f13256af8031..59b62b49ad48 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -166,6 +166,17 @@ static int nicvf_check_pf_ready(struct nicvf *nic) return 1; } +static void nicvf_send_cfg_done(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + + mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; + if (nicvf_send_msg_to_pf(nic, &mbx)) { + netdev_err(nic->netdev, + "PF didn't respond to CFG DONE msg\n"); + } +} + static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx) { if (bgx->rx) @@ -1329,7 +1340,6 @@ int nicvf_open(struct net_device *netdev) struct nicvf *nic = netdev_priv(netdev); struct queue_set *qs = nic->qs; struct nicvf_cq_poll *cq_poll = NULL; - union nic_mbx mbx = {}; netif_carrier_off(netdev); @@ -1419,8 +1429,7 @@ int nicvf_open(struct net_device *netdev) nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); /* Send VF config done msg to PF */ - mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; - nicvf_write_to_mbx(nic, &mbx); + nicvf_send_cfg_done(nic); return 0; cleanup: From fbcf59a9a8a286d2e36564e1e3aead0e62283b51 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 22 Feb 2019 22:36:03 +0000 Subject: [PATCH 2337/2608] nfp: bpf: fix code-gen bug on BPF_ALU | BPF_XOR | BPF_K [ Upstream commit 71c190249f0ced5b26377ea6bf829ab3af77a40c ] The intended optimization should be A ^ 0 = A, not A ^ -1 = A. Fixes: cd7df56ed3e6 ("nfp: add BPF to NFP code translator") Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 239dfbe8a0a1..3105ee5bbf8e 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1017,7 +1017,7 @@ static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm); + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm); } static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) From ab7e40e70e5e14479cbd7b3a2c787b64110eef2d Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 22 Feb 2019 22:36:04 +0000 Subject: [PATCH 2338/2608] nfp: bpf: fix ALU32 high bits clearance bug [ Upstream commit f036ebd9bfbe1e91a3d855e85e05fc5ff156b641 ] NFP BPF JIT compiler is doing a couple of small optimizations when jitting ALU imm instructions, some of these optimizations could save code-gen, for example: A & -1 = A A | 0 = A A ^ 0 = A However, for ALU32, high 32-bit of the 64-bit register should still be cleared according to ISA semantics. Fixes: cd7df56ed3e6 ("nfp: add BPF to NFP code translator") Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 3105ee5bbf8e..c1ffec85817a 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -756,15 +756,10 @@ wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, static int wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - enum alu_op alu_op, bool skip) + enum alu_op alu_op) { const struct bpf_insn *insn = &meta->insn; - if (skip) { - meta->skip = true; - return 0; - } - wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm); wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); @@ -1017,7 +1012,7 @@ static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm); + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR); } static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) @@ -1027,7 +1022,7 @@ static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND); } static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) @@ -1037,7 +1032,7 @@ static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR); } static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) @@ -1047,7 +1042,7 @@ static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm); + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD); } static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) @@ -1057,7 +1052,7 @@ static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm); + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB); } static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) From fb7d3573ffb0f9c461913e252bed282742f3b116 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Fri, 22 Feb 2019 14:57:23 +0800 Subject: [PATCH 2339/2608] net: set static variable an initial value in atl2_probe() [ Upstream commit 4593403fa516a5a4cffe6883c5062d60932cbfbe ] cards_found is a static variable, but when it enters atl2_probe(), cards_found is set to zero, the value is not consistent with last probe, so next behavior is not our expect. Signed-off-by: Mao Wenan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/atheros/atlx/atl2.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 77a1c03255de..225b4d452e0e 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -1334,13 +1334,11 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *netdev; struct atl2_adapter *adapter; - static int cards_found; + static int cards_found = 0; unsigned long mmio_start; int mmio_len; int err; - cards_found = 0; - err = pci_enable_device(pdev); if (err) return err; From 4cf6ad0bf823a1acac199a976719278b7e30892d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 22 Feb 2019 22:35:32 -0800 Subject: [PATCH 2340/2608] tmpfs: fix uninitialized return value in shmem_link [ Upstream commit 29b00e609960ae0fcff382f4c7079dd0874a5311 ] When we made the shmem_reserve_inode call in shmem_link conditional, we forgot to update the declaration for ret so that it always has a known value. Dan Carpenter pointed out this deficiency in the original patch. Fixes: 1062af920c07 ("tmpfs: fix link accounting when a tmpfile is linked in") Reported-by: Dan Carpenter Signed-off-by: Darrick J. Wong Signed-off-by: Hugh Dickins Cc: Matej Kupljen Cc: Al Viro Cc: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index 9b78c04f532b..037e2ee9ccac 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3096,7 +3096,7 @@ static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode, static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(old_dentry); - int ret; + int ret = 0; /* * No ordinary (disk based) filesystem counts links as inodes; From 8d6df5097c0005320ab6f3cd8dda2ef31db6c6d1 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 19 Nov 2018 10:33:44 -0500 Subject: [PATCH 2341/2608] media: videobuf2-v4l2: drop WARN_ON in vb2_warn_zero_bytesused() commit 5e99456c20f712dcc13d9f6ca4278937d5367355 upstream. Userspace shouldn't set bytesused to 0 for output buffers. vb2_warn_zero_bytesused() warns about this (only once!), but it also calls WARN_ON(1), which is confusing since it is not immediately clear that it warns about a 0 value for bytesused. Just drop the WARN_ON as it serves no purpose. Signed-off-by: Hans Verkuil Acked-by: Ezequiel Garcia Signed-off-by: Mauro Carvalho Chehab Cc: Matthias Maennich Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/videobuf2-v4l2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/media/v4l2-core/videobuf2-v4l2.c b/drivers/media/v4l2-core/videobuf2-v4l2.c index 0c0669976bdc..69ca8debb711 100644 --- a/drivers/media/v4l2-core/videobuf2-v4l2.c +++ b/drivers/media/v4l2-core/videobuf2-v4l2.c @@ -145,7 +145,6 @@ static void vb2_warn_zero_bytesused(struct vb2_buffer *vb) return; check_once = true; - WARN_ON(1); pr_warn("use of bytesused == 0 is deprecated and will be removed in the future,\n"); if (vb->vb2_queue->allow_zero_bytesused) From e713b0f4b480d671c495f542d769c8c6725c2dbd Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 21 Feb 2019 14:19:17 +0200 Subject: [PATCH 2342/2608] stm class: Prevent division by zero commit bf7cbaae0831252b416f375ca9b1027ecd4642dd upstream. Using STP_POLICY_ID_SET ioctl command with dummy_stm device, or any STM device that supplies zero mmio channel size, will trigger a division by zero bug in the kernel. Prevent this by disallowing channel widths other than 1 for such devices. Signed-off-by: Alexander Shishkin Fixes: 7bd1d4093c2f ("stm class: Introduce an abstraction for System Trace Module devices") CC: stable@vger.kernel.org # v4.4+ Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/stm/core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c index 88a79b45b80c..41724d18e712 100644 --- a/drivers/hwtracing/stm/core.c +++ b/drivers/hwtracing/stm/core.c @@ -561,7 +561,7 @@ static int stm_char_policy_set_ioctl(struct stm_file *stmf, void __user *arg) { struct stm_device *stm = stmf->stm; struct stp_policy_id *id; - int ret = -EINVAL; + int ret = -EINVAL, wlimit = 1; u32 size; if (stmf->output.nr_chans) @@ -589,8 +589,10 @@ static int stm_char_policy_set_ioctl(struct stm_file *stmf, void __user *arg) if (id->__reserved_0 || id->__reserved_1) goto err_free; - if (id->width < 1 || - id->width > PAGE_SIZE / stm->data->sw_mmiosz) + if (stm->data->sw_mmiosz) + wlimit = PAGE_SIZE / stm->data->sw_mmiosz; + + if (id->width < 1 || id->width > wlimit) goto err_free; ret = stm_file_assign(stmf, id->id, id->width); From 86dd0423d6b62684b3c346577029fecc4f98f92a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 15 Jan 2019 10:47:00 -0800 Subject: [PATCH 2343/2608] libnvdimm/label: Clear 'updating' flag after label-set update commit 966d23a006ca7b44ac8cf4d0c96b19785e0c3da0 upstream. The UEFI 2.7 specification sets expectations that the 'updating' flag is eventually cleared. To date, the libnvdimm core has never adhered to that protocol. The policy of the core matches the policy of other multi-device info-block formats like MD-Software-RAID that expect administrator intervention on inconsistent info-blocks, not automatic invalidation. However, some pre-boot environments may unfortunately attempt to "clean up" the labels and invalidate a set when it fails to find at least one "non-updating" label in the set. Clear the updating flag after set updates to minimize the window of vulnerability to aggressive pre-boot environments. Ideally implementations would not write to the label area outside of creating namespaces. Note that this only minimizes the window, it does not close it as the system can still crash while clearing the flag and the set can be subsequently deleted / invalidated by the pre-boot environment. Fixes: f524bf271a5c ("libnvdimm: write pmem label set") Cc: Cc: Kelly Couch Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/label.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index de66c02f6140..184149a49b02 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -616,7 +616,7 @@ static const guid_t *to_abstraction_guid(enum nvdimm_claim_class claim_class, static int __pmem_label_update(struct nd_region *nd_region, struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm, - int pos) + int pos, unsigned long flags) { struct nd_namespace_common *ndns = &nspm->nsio.common; struct nd_interleave_set *nd_set = nd_region->nd_set; @@ -657,7 +657,7 @@ static int __pmem_label_update(struct nd_region *nd_region, memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN); if (nspm->alt_name) memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN); - nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_UPDATING); + nd_label->flags = __cpu_to_le32(flags); nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings); nd_label->position = __cpu_to_le16(pos); nd_label->isetcookie = __cpu_to_le64(cookie); @@ -1111,13 +1111,13 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid) int nd_pmem_namespace_label_update(struct nd_region *nd_region, struct nd_namespace_pmem *nspm, resource_size_t size) { - int i; + int i, rc; for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct resource *res; - int rc, count = 0; + int count = 0; if (size == 0) { rc = del_labels(nd_mapping, nspm->uuid); @@ -1135,7 +1135,20 @@ int nd_pmem_namespace_label_update(struct nd_region *nd_region, if (rc < 0) return rc; - rc = __pmem_label_update(nd_region, nd_mapping, nspm, i); + rc = __pmem_label_update(nd_region, nd_mapping, nspm, i, + NSLABEL_FLAG_UPDATING); + if (rc) + return rc; + } + + if (size == 0) + return 0; + + /* Clear the UPDATING flag per UEFI 2.7 expectations */ + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + + rc = __pmem_label_update(nd_region, nd_mapping, nspm, i, 0); if (rc) return rc; } From a3211ba18b10d26e827a7fde380f95985e6f118e Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 22 Jan 2019 10:48:09 +0800 Subject: [PATCH 2344/2608] libnvdimm, pfn: Fix over-trim in trim_pfn_device() commit f101ada7da6551127d192c2f1742c1e9e0f62799 upstream. When trying to see whether current nd_region intersects with others, trim_pfn_device() has already calculated the *size* to be expanded to SECTION size. Do not double append 'adjust' to 'size' when calculating whether the end of a region collides with the next pmem region. Fixes: ae86cbfef381 "libnvdimm, pfn: Pad pfn namespaces relative to other regions" Cc: Signed-off-by: Wei Yang Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/pfn_devs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 6d38191ff0da..c657a9871483 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -618,7 +618,7 @@ static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trun if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE) == REGION_MIXED || !IS_ALIGNED(end, nd_pfn->align) - || nd_region_conflict(nd_region, start, size + adjust)) + || nd_region_conflict(nd_region, start, size)) *end_trunc = end - phys_pmem_align_down(nd_pfn, end); } From 044bfdb953cd62d497582c069975abb92308c74e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 24 Jan 2019 17:33:06 -0800 Subject: [PATCH 2345/2608] libnvdimm/pmem: Honor force_raw for legacy pmem regions commit fa7d2e639cd90442d868dfc6ca1d4cc9d8bf206e upstream. For recovery, where non-dax access is needed to a given physical address range, and testing, allow the 'force_raw' attribute to override the default establishment of a dev_pagemap. Otherwise without this capability it is possible to end up with a namespace that can not be activated due to corrupted info-block, and one that can not be repaired due to a section collision. Cc: Fixes: 004f1afbe199 ("libnvdimm, pmem: direct map legacy pmem by default") Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/namespace_devs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 228bafa4d322..50b01d3eadd9 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -138,6 +138,7 @@ bool nd_is_uuid_unique(struct device *dev, u8 *uuid) bool pmem_should_map_pages(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_namespace_common *ndns = to_ndns(dev); struct nd_namespace_io *nsio; if (!IS_ENABLED(CONFIG_ZONE_DEVICE)) @@ -149,6 +150,9 @@ bool pmem_should_map_pages(struct device *dev) if (is_nd_pfn(dev) || is_nd_btt(dev)) return false; + if (ndns->force_raw) + return false; + nsio = to_nd_namespace_io(dev); if (region_intersects(nsio->res.start, resource_size(&nsio->res), IORESOURCE_SYSTEM_RAM, From 6426aaa60c346b237081bf2aaf30bba098bfc881 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 6 Feb 2019 13:04:53 +1100 Subject: [PATCH 2346/2608] libnvdimm: Fix altmap reservation size calculation commit 07464e88365e9236febaca9ed1a2e2006d8bc952 upstream. Libnvdimm reserves the first 8K of pfn and devicedax namespaces to store a superblock describing the namespace. This 8K reservation is contained within the altmap area which the kernel uses for the vmemmap backing for the pages within the namespace. The altmap allows for some pages at the start of the altmap area to be reserved and that mechanism is used to protect the superblock from being re-used as vmemmap backing. The number of PFNs to reserve is calculated using: PHYS_PFN(SZ_8K) Which is implemented as: #define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT)) So on systems where PAGE_SIZE is greater than 8K the reservation size is truncated to zero and the superblock area is re-used as vmemmap backing. As a result all the namespace information stored in the superblock (i.e. if it's a PFN or DAX namespace) is lost and the namespace needs to be re-created to get access to the contents. This patch fixes this by using PFN_UP() rather than PHYS_PFN() to ensure that at least one page is reserved. On systems with a 4K pages size this patch should have no effect. Cc: stable@vger.kernel.org Cc: Dan Williams Fixes: ac515c084be9 ("libnvdimm, pmem, pfn: move pfn setup to the core") Signed-off-by: Oliver O'Halloran Reviewed-by: Vishal Verma Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/pfn_devs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index c657a9871483..b9dad88b8ea3 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -535,7 +535,7 @@ static unsigned long init_altmap_base(resource_size_t base) static unsigned long init_altmap_reserve(resource_size_t base) { - unsigned long reserve = PHYS_PFN(SZ_8K); + unsigned long reserve = PFN_UP(SZ_8K); unsigned long base_pfn = PHYS_PFN(base); reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn); From 8f94a9388accd55e39028e56b9f020fca7ebad4d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jan 2019 11:41:29 -0500 Subject: [PATCH 2347/2608] fix cgroup_do_mount() handling of failure exits commit 399504e21a10be16dd1408ba0147367d9d82a10c upstream. same story as with last May fixes in sysfs (7b745a4e4051 "unfuck sysfs_mount()"); new_sb is left uninitialized in case of early errors in kernfs_mount_ns() and papering over it by treating any error from kernfs_mount_ns() as equivalent to !new_ns ends up conflating the cases when objects had never been transferred to a superblock with ones when that has happened and resulting new superblock had been dropped. Easily fixed (same way as in sysfs case). Additionally, there's a superblock leak on kernfs_node_dentry() failure *and* a dentry leak inside kernfs_node_dentry() itself - the latter on probably impossible errors, but the former not impossible to trigger (as the matter of fact, injecting allocation failures at that point *does* trigger it). Cc: stable@kernel.org Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/mount.c | 8 ++++++-- kernel/cgroup/cgroup.c | 9 ++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 95a7c88baed9..5019058e0f6a 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -196,8 +196,10 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, return dentry; knparent = find_next_ancestor(kn, NULL); - if (WARN_ON(!knparent)) + if (WARN_ON(!knparent)) { + dput(dentry); return ERR_PTR(-EINVAL); + } do { struct dentry *dtmp; @@ -206,8 +208,10 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, if (kn == knparent) return dentry; kntmp = find_next_ancestor(kn, knparent); - if (WARN_ON(!kntmp)) + if (WARN_ON(!kntmp)) { + dput(dentry); return ERR_PTR(-EINVAL); + } dtmp = lookup_one_len_unlocked(kntmp->name, dentry, strlen(kntmp->name)); dput(dentry); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 21bbfc09e395..7e79358b4473 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1942,7 +1942,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, struct cgroup_namespace *ns) { struct dentry *dentry; - bool new_sb; + bool new_sb = false; dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb); @@ -1952,6 +1952,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, */ if (!IS_ERR(dentry) && ns != &init_cgroup_ns) { struct dentry *nsdentry; + struct super_block *sb = dentry->d_sb; struct cgroup *cgrp; mutex_lock(&cgroup_mutex); @@ -1962,12 +1963,14 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, spin_unlock_irq(&css_set_lock); mutex_unlock(&cgroup_mutex); - nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb); + nsdentry = kernfs_node_dentry(cgrp->kn, sb); dput(dentry); + if (IS_ERR(nsdentry)) + deactivate_locked_super(sb); dentry = nsdentry; } - if (IS_ERR(dentry) || !new_sb) + if (!new_sb) cgroup_put(&root->cgrp); return dentry; From e605d572c3ca00698ed0d05dc343c3a195196fdb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 27 Jan 2019 10:16:52 +0100 Subject: [PATCH 2348/2608] crypto: arm/crct10dif - revert to C code for short inputs commit 62fecf295e3c48be1b5f17c440b93875b9adb4d6 upstream. The SIMD routine ported from x86 used to have a special code path for inputs < 16 bytes, which got lost somewhere along the way. Instead, the current glue code aligns the input pointer to permit the NEON routine to use special versions of the vld1 instructions that assume 16 byte alignment, but this could result in inputs of less than 16 bytes to be passed in. This not only fails the new extended tests that Eric has implemented, it also results in the code reading past the end of the input, which could potentially result in crashes when dealing with less than 16 bytes of input at the end of a page which is followed by an unmapped page. So update the glue code to only invoke the NEON routine if the input is at least 16 bytes. Reported-by: Eric Biggers Reviewed-by: Eric Biggers Fixes: 1d481f1cd892 ("crypto: arm/crct10dif - port x86 SSE implementation to ARM") Cc: # v4.10+ Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm/crypto/crct10dif-ce-core.S | 14 +++++++------- arch/arm/crypto/crct10dif-ce-glue.c | 23 ++++++----------------- 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/crypto/crct10dif-ce-core.S index ce45ba0c0687..16019b5961e7 100644 --- a/arch/arm/crypto/crct10dif-ce-core.S +++ b/arch/arm/crypto/crct10dif-ce-core.S @@ -124,10 +124,10 @@ ENTRY(crc_t10dif_pmull) vext.8 q10, qzr, q0, #4 // receive the initial 64B data, xor the initial crc value - vld1.64 {q0-q1}, [arg2, :128]! - vld1.64 {q2-q3}, [arg2, :128]! - vld1.64 {q4-q5}, [arg2, :128]! - vld1.64 {q6-q7}, [arg2, :128]! + vld1.64 {q0-q1}, [arg2]! + vld1.64 {q2-q3}, [arg2]! + vld1.64 {q4-q5}, [arg2]! + vld1.64 {q6-q7}, [arg2]! CPU_LE( vrev64.8 q0, q0 ) CPU_LE( vrev64.8 q1, q1 ) CPU_LE( vrev64.8 q2, q2 ) @@ -167,7 +167,7 @@ CPU_LE( vrev64.8 q7, q7 ) _fold_64_B_loop: .macro fold64, reg1, reg2 - vld1.64 {q11-q12}, [arg2, :128]! + vld1.64 {q11-q12}, [arg2]! vmull.p64 q8, \reg1\()h, d21 vmull.p64 \reg1, \reg1\()l, d20 @@ -238,7 +238,7 @@ _16B_reduction_loop: vmull.p64 q7, d15, d21 veor.8 q7, q7, q8 - vld1.64 {q0}, [arg2, :128]! + vld1.64 {q0}, [arg2]! CPU_LE( vrev64.8 q0, q0 ) vswp d0, d1 veor.8 q7, q7, q0 @@ -335,7 +335,7 @@ _less_than_128: vmov.i8 q0, #0 vmov s3, arg1_low32 // get the initial crc value - vld1.64 {q7}, [arg2, :128]! + vld1.64 {q7}, [arg2]! CPU_LE( vrev64.8 q7, q7 ) vswp d14, d15 veor.8 q7, q7, q0 diff --git a/arch/arm/crypto/crct10dif-ce-glue.c b/arch/arm/crypto/crct10dif-ce-glue.c index d428355cf38d..14c19c70a841 100644 --- a/arch/arm/crypto/crct10dif-ce-glue.c +++ b/arch/arm/crypto/crct10dif-ce-glue.c @@ -35,26 +35,15 @@ static int crct10dif_update(struct shash_desc *desc, const u8 *data, unsigned int length) { u16 *crc = shash_desc_ctx(desc); - unsigned int l; - if (!may_use_simd()) { - *crc = crc_t10dif_generic(*crc, data, length); + if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) { + kernel_neon_begin(); + *crc = crc_t10dif_pmull(*crc, data, length); + kernel_neon_end(); } else { - if (unlikely((u32)data % CRC_T10DIF_PMULL_CHUNK_SIZE)) { - l = min_t(u32, length, CRC_T10DIF_PMULL_CHUNK_SIZE - - ((u32)data % CRC_T10DIF_PMULL_CHUNK_SIZE)); - - *crc = crc_t10dif_generic(*crc, data, l); - - length -= l; - data += l; - } - if (length > 0) { - kernel_neon_begin(); - *crc = crc_t10dif_pmull(*crc, data, length); - kernel_neon_end(); - } + *crc = crc_t10dif_generic(*crc, data, length); } + return 0; } From 5b9728268e817ebb7b1e4c4e1bb36abcd1537938 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 27 Jan 2019 10:16:53 +0100 Subject: [PATCH 2349/2608] crypto: arm64/crct10dif - revert to C code for short inputs commit d72b9d4acd548251f55b16843fc7a05dc5c80de8 upstream. The SIMD routine ported from x86 used to have a special code path for inputs < 16 bytes, which got lost somewhere along the way. Instead, the current glue code aligns the input pointer to 16 bytes, which is not really necessary on this architecture (although it could be beneficial to performance to expose aligned data to the the NEON routine), but this could result in inputs of less than 16 bytes to be passed in. This not only fails the new extended tests that Eric has implemented, it also results in the code reading past the end of the input, which could potentially result in crashes when dealing with less than 16 bytes of input at the end of a page which is followed by an unmapped page. So update the glue code to only invoke the NEON routine if the input is at least 16 bytes. Reported-by: Eric Biggers Reviewed-by: Eric Biggers Fixes: 6ef5737f3931 ("crypto: arm64/crct10dif - port x86 SSE implementation to arm64") Cc: # v4.10+ Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/crct10dif-ce-glue.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c index 96f0cae4a022..617bcfc1b080 100644 --- a/arch/arm64/crypto/crct10dif-ce-glue.c +++ b/arch/arm64/crypto/crct10dif-ce-glue.c @@ -36,26 +36,13 @@ static int crct10dif_update(struct shash_desc *desc, const u8 *data, unsigned int length) { u16 *crc = shash_desc_ctx(desc); - unsigned int l; - if (unlikely((u64)data % CRC_T10DIF_PMULL_CHUNK_SIZE)) { - l = min_t(u32, length, CRC_T10DIF_PMULL_CHUNK_SIZE - - ((u64)data % CRC_T10DIF_PMULL_CHUNK_SIZE)); - - *crc = crc_t10dif_generic(*crc, data, l); - - length -= l; - data += l; - } - - if (length > 0) { - if (may_use_simd()) { - kernel_neon_begin(); - *crc = crc_t10dif_pmull(*crc, data, length); - kernel_neon_end(); - } else { - *crc = crc_t10dif_generic(*crc, data, length); - } + if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) { + kernel_neon_begin(); + *crc = crc_t10dif_pmull(*crc, data, length); + kernel_neon_end(); + } else { + *crc = crc_t10dif_generic(*crc, data, length); } return 0; From 3a48ec7084ed70f0d1877ded8bc2b2844a5beb29 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Jan 2019 18:47:42 -0800 Subject: [PATCH 2350/2608] crypto: hash - set CRYPTO_TFM_NEED_KEY if ->setkey() fails commit ba7d7433a0e998c902132bd47330e355a1eaa894 upstream. Some algorithms have a ->setkey() method that is not atomic, in the sense that setting a key can fail after changes were already made to the tfm context. In this case, if a key was already set the tfm can end up in a state that corresponds to neither the old key nor the new key. It's not feasible to make all ->setkey() methods atomic, especially ones that have to key multiple sub-tfms. Therefore, make the crypto API set CRYPTO_TFM_NEED_KEY if ->setkey() fails and the algorithm requires a key, to prevent the tfm from being used until a new key is set. Note: we can't set CRYPTO_TFM_NEED_KEY for OPTIONAL_KEY algorithms, so ->setkey() for those must nevertheless be atomic. That's fine for now since only the crc32 and crc32c algorithms set OPTIONAL_KEY, and it's not intended that OPTIONAL_KEY be used much. [Cc stable mainly because when introducing the NEED_KEY flag I changed AF_ALG to rely on it; and unlike in-kernel crypto API users, AF_ALG previously didn't have this problem. So these "incompletely keyed" states became theoretically accessible via AF_ALG -- though, the opportunities for causing real mischief seem pretty limited.] Fixes: 9fa68f620041 ("crypto: hash - prevent using keyed hashes without setting key") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ahash.c | 28 +++++++++++++++++++--------- crypto/shash.c | 18 +++++++++++++----- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index 5bec16ebbf8b..5a9fa1a867f9 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -190,6 +190,21 @@ static int ahash_setkey_unaligned(struct crypto_ahash *tfm, const u8 *key, return ret; } +static int ahash_nosetkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + return -ENOSYS; +} + +static void ahash_set_needkey(struct crypto_ahash *tfm) +{ + const struct hash_alg_common *alg = crypto_hash_alg_common(tfm); + + if (tfm->setkey != ahash_nosetkey && + !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY)) + crypto_ahash_set_flags(tfm, CRYPTO_TFM_NEED_KEY); +} + int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen) { @@ -201,20 +216,16 @@ int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, else err = tfm->setkey(tfm, key, keylen); - if (err) + if (unlikely(err)) { + ahash_set_needkey(tfm); return err; + } crypto_ahash_clear_flags(tfm, CRYPTO_TFM_NEED_KEY); return 0; } EXPORT_SYMBOL_GPL(crypto_ahash_setkey); -static int ahash_nosetkey(struct crypto_ahash *tfm, const u8 *key, - unsigned int keylen) -{ - return -ENOSYS; -} - static inline unsigned int ahash_align_buffer_size(unsigned len, unsigned long mask) { @@ -483,8 +494,7 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm) if (alg->setkey) { hash->setkey = alg->setkey; - if (!(alg->halg.base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY)) - crypto_ahash_set_flags(hash, CRYPTO_TFM_NEED_KEY); + ahash_set_needkey(hash); } if (alg->export) hash->export = alg->export; diff --git a/crypto/shash.c b/crypto/shash.c index 5d732c6bb4b2..a04145e5306a 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -53,6 +53,13 @@ static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key, return err; } +static void shash_set_needkey(struct crypto_shash *tfm, struct shash_alg *alg) +{ + if (crypto_shash_alg_has_setkey(alg) && + !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY)) + crypto_shash_set_flags(tfm, CRYPTO_TFM_NEED_KEY); +} + int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { @@ -65,8 +72,10 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key, else err = shash->setkey(tfm, key, keylen); - if (err) + if (unlikely(err)) { + shash_set_needkey(tfm, shash); return err; + } crypto_shash_clear_flags(tfm, CRYPTO_TFM_NEED_KEY); return 0; @@ -368,7 +377,8 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm) crt->final = shash_async_final; crt->finup = shash_async_finup; crt->digest = shash_async_digest; - crt->setkey = shash_async_setkey; + if (crypto_shash_alg_has_setkey(alg)) + crt->setkey = shash_async_setkey; crypto_ahash_set_flags(crt, crypto_shash_get_flags(shash) & CRYPTO_TFM_NEED_KEY); @@ -390,9 +400,7 @@ static int crypto_shash_init_tfm(struct crypto_tfm *tfm) hash->descsize = alg->descsize; - if (crypto_shash_alg_has_setkey(alg) && - !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY)) - crypto_shash_set_flags(hash, CRYPTO_TFM_NEED_KEY); + shash_set_needkey(hash, alg); return 0; } From 29ef5d0ff32ea27be5170efe57e352686e92b9b7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Jan 2019 20:57:35 -0800 Subject: [PATCH 2351/2608] crypto: testmgr - skip crc32c context test for ahash algorithms commit eb5e6730db98fcc4b51148b4a819fa4bf864ae54 upstream. Instantiating "cryptd(crc32c)" causes a crypto self-test failure because the crypto_alloc_shash() in alg_test_crc32c() fails. This is because cryptd(crc32c) is an ahash algorithm, not a shash algorithm; so it can only be accessed through the ahash API, unlike shash algorithms which can be accessed through both the ahash and shash APIs. As the test is testing the shash descriptor format which is only applicable to shash algorithms, skip it for ahash algorithms. (Note that it's still important to fix crypto self-test failures even for weird algorithm instantiations like cryptd(crc32c) that no one would really use; in fips_enabled mode unprivileged users can use them to panic the kernel, and also they prevent treating a crypto self-test failure as a bug when fuzzing the kernel.) Fixes: 8e3ee85e68c5 ("crypto: crc32c - Test descriptor context format") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/testmgr.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 7125ba3880af..d91278c01ea8 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1839,14 +1839,21 @@ static int alg_test_crc32c(const struct alg_test_desc *desc, err = alg_test_hash(desc, driver, type, mask); if (err) - goto out; + return err; tfm = crypto_alloc_shash(driver, type, mask); if (IS_ERR(tfm)) { + if (PTR_ERR(tfm) == -ENOENT) { + /* + * This crc32c implementation is only available through + * ahash API, not the shash API, so the remaining part + * of the test is not applicable to it. + */ + return 0; + } printk(KERN_ERR "alg: crc32c: Failed to load transform for %s: " "%ld\n", driver, PTR_ERR(tfm)); - err = PTR_ERR(tfm); - goto out; + return PTR_ERR(tfm); } do { @@ -1873,7 +1880,6 @@ static int alg_test_crc32c(const struct alg_test_desc *desc, crypto_free_shash(tfm); -out: return err; } From 9ad9f6f9d209c12f2df1ca370dd93afe0063d1fd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 24 Jan 2019 17:33:45 +0100 Subject: [PATCH 2352/2608] crypto: arm64/aes-ccm - fix logical bug in AAD MAC handling commit eaf46edf6ea89675bd36245369c8de5063a0272c upstream. The NEON MAC calculation routine fails to handle the case correctly where there is some data in the buffer, and the input fills it up exactly. In this case, we enter the loop at the end with w8 == 0, while a negative value is assumed, and so the loop carries on until the increment of the 32-bit counter wraps around, which is quite obviously wrong. So omit the loop altogether in this case, and exit right away. Reported-by: Eric Biggers Fixes: a3fd82105b9d1 ("arm64/crypto: AES in CCM mode using ARMv8 Crypto ...") Cc: stable@vger.kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/aes-ce-ccm-core.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S index e3a375c4cb83..1b151442dac1 100644 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -74,12 +74,13 @@ ENTRY(ce_aes_ccm_auth_data) beq 10f ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ b 7b -8: mov w7, w8 +8: cbz w8, 91f + mov w7, w8 add w8, w8, #16 9: ext v1.16b, v1.16b, v1.16b, #1 adds w7, w7, #1 bne 9b - eor v0.16b, v0.16b, v1.16b +91: eor v0.16b, v0.16b, v1.16b st1 {v0.16b}, [x0] 10: str w8, [x3] ret From 7976a8bf674e29084e659155b5791eec88e59542 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 24 Jan 2019 17:33:46 +0100 Subject: [PATCH 2353/2608] crypto: arm64/aes-ccm - fix bugs in non-NEON fallback routine commit 969e2f59d589c15f6aaf306e590dde16f12ea4b3 upstream. Commit 5092fcf34908 ("crypto: arm64/aes-ce-ccm: add non-SIMD generic fallback") introduced C fallback code to replace the NEON routines when invoked from a context where the NEON is not available (i.e., from the context of a softirq taken while the NEON is already being used in kernel process context) Fix two logical flaws in the MAC calculation of the associated data. Reported-by: Eric Biggers Fixes: 5092fcf34908 ("crypto: arm64/aes-ce-ccm: add non-SIMD generic fallback") Cc: stable@vger.kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/aes-ce-ccm-glue.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index a1254036f2b1..ae0d26705851 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -123,7 +123,7 @@ static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[], abytes -= added; } - while (abytes > AES_BLOCK_SIZE) { + while (abytes >= AES_BLOCK_SIZE) { __aes_arm64_encrypt(key->key_enc, mac, mac, num_rounds(key)); crypto_xor(mac, in, AES_BLOCK_SIZE); @@ -137,8 +137,6 @@ static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[], num_rounds(key)); crypto_xor(mac, in, abytes); *macp = abytes; - } else { - *macp = 0; } } } From fe0c218ef467433f908ed185474b30f969f2b425 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 13 Feb 2019 15:43:08 -0800 Subject: [PATCH 2354/2608] CIFS: Do not reset lease state to NONE on lease break commit 7b9b9edb49ad377b1e06abf14354c227e9ac4b06 upstream. Currently on lease break the client sets a caching level twice: when oplock is detected and when oplock is processed. While the 1st attempt sets the level to the value provided by the server, the 2nd one resets the level to None unconditionally. This happens because the oplock/lease processing code was changed to avoid races between page cache flushes and oplock breaks. The commit c11f1df5003d534 ("cifs: Wait for writebacks to complete before attempting write.") fixed the races for oplocks but didn't apply the same changes for leases resulting in overwriting the server granted value to None. Fix this by properly processing lease breaks. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2misc.c | 17 ++++++++++++++--- fs/cifs/smb2ops.c | 15 ++++++++++++--- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index efdfdb47a7dd..a97a0e0b1a74 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -479,7 +479,6 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, __u8 lease_state; struct list_head *tmp; struct cifsFileInfo *cfile; - struct TCP_Server_Info *server = tcon->ses->server; struct cifs_pending_open *open; struct cifsInodeInfo *cinode; int ack_req = le32_to_cpu(rsp->Flags & @@ -499,13 +498,25 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, cifs_dbg(FYI, "lease key match, lease break 0x%x\n", le32_to_cpu(rsp->NewLeaseState)); - server->ops->set_oplock_level(cinode, lease_state, 0, NULL); - if (ack_req) cfile->oplock_break_cancelled = false; else cfile->oplock_break_cancelled = true; + set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags); + + /* + * Set or clear flags depending on the lease state being READ. + * HANDLE caching flag should be added when the client starts + * to defer closing remote file handles with HANDLE leases. + */ + if (lease_state & SMB2_LEASE_READ_CACHING_HE) + set_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, + &cinode->flags); + else + clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, + &cinode->flags); + queue_work(cifsoplockd_wq, &cfile->oplock_break); kfree(lw); return true; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index fb1c65f93114..418062c7f040 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1932,6 +1932,15 @@ smb2_downgrade_oplock(struct TCP_Server_Info *server, server->ops->set_oplock_level(cinode, 0, 0, NULL); } +static void +smb21_downgrade_oplock(struct TCP_Server_Info *server, + struct cifsInodeInfo *cinode, bool set_level2) +{ + server->ops->set_oplock_level(cinode, + set_level2 ? SMB2_LEASE_READ_CACHING_HE : + 0, 0, NULL); +} + static void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, unsigned int epoch, bool *purge_cache) @@ -2917,7 +2926,7 @@ struct smb_version_operations smb21_operations = { .print_stats = smb2_print_stats, .is_oplock_break = smb2_is_valid_oplock_break, .handle_cancelled_mid = smb2_handle_cancelled_mid, - .downgrade_oplock = smb2_downgrade_oplock, + .downgrade_oplock = smb21_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, .negotiate_wsize = smb2_negotiate_wsize, @@ -3012,7 +3021,7 @@ struct smb_version_operations smb30_operations = { .dump_share_caps = smb2_dump_share_caps, .is_oplock_break = smb2_is_valid_oplock_break, .handle_cancelled_mid = smb2_handle_cancelled_mid, - .downgrade_oplock = smb2_downgrade_oplock, + .downgrade_oplock = smb21_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, .negotiate_wsize = smb2_negotiate_wsize, @@ -3117,7 +3126,7 @@ struct smb_version_operations smb311_operations = { .dump_share_caps = smb2_dump_share_caps, .is_oplock_break = smb2_is_valid_oplock_break, .handle_cancelled_mid = smb2_handle_cancelled_mid, - .downgrade_oplock = smb2_downgrade_oplock, + .downgrade_oplock = smb21_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, .negotiate_wsize = smb2_negotiate_wsize, From d77d3f94ce1511c4958dde109bbb2aa0d506d1d6 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 4 Mar 2019 17:48:01 -0800 Subject: [PATCH 2355/2608] CIFS: Fix read after write for files with read caching commit 6dfbd84684700cb58b34e8602c01c12f3d2595c8 upstream. When we have a READ lease for a file and have just issued a write operation to the server we need to purge the cache and set oplock/lease level to NONE to avoid reading stale data. Currently we do that only if a write operation succedeed thus not covering cases when a request was sent to the server but a negative error code was returned later for some other reasons (e.g. -EIOCBQUEUED or -EINTR). Fix this by turning off caching regardless of the error code being returned. The patches fixes generic tests 075 and 112 from the xfs-tests. Cc: Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 852d7d1dcbbd..72d6f4db9bdc 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2889,14 +2889,16 @@ cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) * these pages but not on the region from pos to ppos+len-1. */ written = cifs_user_writev(iocb, from); - if (written > 0 && CIFS_CACHE_READ(cinode)) { + if (CIFS_CACHE_READ(cinode)) { /* - * Windows 7 server can delay breaking level2 oplock if a write - * request comes - break it on the client to prevent reading - * an old data. + * We have read level caching and we have just sent a write + * request to the server thus making data in the cache stale. + * Zap the cache and set oplock/lease level to NONE to avoid + * reading stale data from the cache. All subsequent read + * operations will read new data from the server. */ cifs_zap_mapping(inode); - cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n", + cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n", inode); cinode->oplock = 0; } From 39a979c6e23a27087f2e707c7a25568c240e8e2c Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 4 Feb 2019 15:07:24 -0600 Subject: [PATCH 2356/2608] tracing: Use strncpy instead of memcpy for string keys in hist triggers commit 9f0bbf3115ca9f91f43b7c74e9ac7d79f47fc6c2 upstream. Because there may be random garbage beyond a string's null terminator, it's not correct to copy the the complete character array for use as a hist trigger key. This results in multiple histogram entries for the 'same' string key. So, in the case of a string key, use strncpy instead of memcpy to avoid copying in the extra bytes. Before, using the gdbus entries in the following hist trigger as an example: # echo 'hist:key=comm' > /sys/kernel/debug/tracing/events/sched/sched_waking/trigger # cat /sys/kernel/debug/tracing/events/sched/sched_waking/hist ... { comm: ImgDecoder #4 } hitcount: 203 { comm: gmain } hitcount: 213 { comm: gmain } hitcount: 216 { comm: StreamTrans #73 } hitcount: 221 { comm: mozStorage #3 } hitcount: 230 { comm: gdbus } hitcount: 233 { comm: StyleThread#5 } hitcount: 253 { comm: gdbus } hitcount: 256 { comm: gdbus } hitcount: 260 { comm: StyleThread#4 } hitcount: 271 ... # cat /sys/kernel/debug/tracing/events/sched/sched_waking/hist | egrep gdbus | wc -l 51 After: # cat /sys/kernel/debug/tracing/events/sched/sched_waking/hist | egrep gdbus | wc -l 1 Link: http://lkml.kernel.org/r/50c35ae1267d64eee975b8125e151e600071d4dc.1549309756.git.tom.zanussi@linux.intel.com Cc: Namhyung Kim Cc: stable@vger.kernel.org Fixes: 79e577cbce4c4 ("tracing: Support string type key properly") Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_hist.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 7eb975a2d0e1..e8c9eba9b1e7 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -872,9 +872,10 @@ static inline void add_to_key(char *compound_key, void *key, /* ensure NULL-termination */ if (size > key_field->size - 1) size = key_field->size - 1; - } - memcpy(compound_key + key_field->offset, key, size); + strncpy(compound_key + key_field->offset, (char *)key, size); + } else + memcpy(compound_key + key_field->offset, key, size); } static void event_hist_trigger(struct event_trigger_data *data, void *rec) From 219397048238608f9fafe8d160063fa72ecdaeff Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Wed, 13 Feb 2019 20:29:06 +0800 Subject: [PATCH 2357/2608] tracing: Do not free iter->trace in fail path of tracing_open_pipe() commit e7f0c424d0806b05d6f47be9f202b037eb701707 upstream. Commit d716ff71dd12 ("tracing: Remove taking of trace_types_lock in pipe files") use the current tracer instead of the copy in tracing_open_pipe(), but it forget to remove the freeing sentence in the error path. There's an error path that can call kfree(iter->trace) after the iter->trace was assigned to tr->current_trace, which would be bad to free. Link: http://lkml.kernel.org/r/1550060946-45984-1-git-send-email-yi.zhang@huawei.com Cc: stable@vger.kernel.org Fixes: d716ff71dd12 ("tracing: Remove taking of trace_types_lock in pipe files") Signed-off-by: zhangyi (F) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bd6e6142473f..287e61aba57c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5604,7 +5604,6 @@ out: return ret; fail: - kfree(iter->trace); kfree(iter); __trace_array_put(tr); mutex_unlock(&trace_types_lock); From cee9e1b3d61987cfa76f3fcf5c970355f71a06aa Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 7 Mar 2019 10:11:19 +0100 Subject: [PATCH 2358/2608] xen: fix dom0 boot on huge systems commit 01bd2ac2f55a1916d81dace12fa8d7ae1c79b5ea upstream. Commit f7c90c2aa40048 ("x86/xen: don't write ptes directly in 32-bit PV guests") introduced a regression for booting dom0 on huge systems with lots of RAM (in the TB range). Reason is that on those hosts the p2m list needs to be moved early in the boot process and this requires temporary page tables to be created. Said commit modified xen_set_pte_init() to use a hypercall for writing a PTE, but this requires the page table being in the direct mapped area, which is not the case for the temporary page tables used in xen_relocate_p2m(). As the page tables are completely written before being linked to the actual address space instead of set_pte() a plain write to memory can be used in xen_relocate_p2m(). Fixes: f7c90c2aa40048 ("x86/xen: don't write ptes directly in 32-bit PV guests") Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/mmu_pv.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 7631e6130d44..44b1f1334ef8 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2080,10 +2080,10 @@ void __init xen_relocate_p2m(void) pt = early_memremap(pt_phys, PAGE_SIZE); clear_page(pt); for (idx_pte = 0; - idx_pte < min(n_pte, PTRS_PER_PTE); - idx_pte++) { - set_pte(pt + idx_pte, - pfn_pte(p2m_pfn, PAGE_KERNEL)); + idx_pte < min(n_pte, PTRS_PER_PTE); + idx_pte++) { + pt[idx_pte] = pfn_pte(p2m_pfn, + PAGE_KERNEL); p2m_pfn++; } n_pte -= PTRS_PER_PTE; @@ -2091,8 +2091,7 @@ void __init xen_relocate_p2m(void) make_lowmem_page_readonly(__va(pt_phys)); pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, PFN_DOWN(pt_phys)); - set_pmd(pmd + idx_pt, - __pmd(_PAGE_TABLE | pt_phys)); + pmd[idx_pt] = __pmd(_PAGE_TABLE | pt_phys); pt_phys += PAGE_SIZE; } n_pt -= PTRS_PER_PMD; @@ -2100,7 +2099,7 @@ void __init xen_relocate_p2m(void) make_lowmem_page_readonly(__va(pmd_phys)); pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, PFN_DOWN(pmd_phys)); - set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys)); + pud[idx_pmd] = __pud(_PAGE_TABLE | pmd_phys); pmd_phys += PAGE_SIZE; } n_pmd -= PTRS_PER_PUD; From 5b25ec3acf1769142d6a244bdde3bbd35ea3b3e3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 11 Mar 2019 18:41:03 +0200 Subject: [PATCH 2359/2608] ACPI / device_sysfs: Avoid OF modalias creation for removed device commit f16eb8a4b096514ac06fb25bf599dcc792899b3d upstream. If SSDT overlay is loaded via ConfigFS and then unloaded the device, we would like to have OF modalias for, already gone. Thus, acpi_get_name() returns no allocated buffer for such case and kernel crashes afterwards: ACPI: Host-directed Dynamic ACPI Table Unload ads7950 spi-PRP0001:00: Dropping the link to regulator.0 BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 80000000070d6067 P4D 80000000070d6067 PUD 70d0067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 40 Comm: kworker/u4:2 Not tainted 5.0.0+ #96 Hardware name: Intel Corporation Merrifield/BODEGA BAY, BIOS 542 2015.01.21:18.19.48 Workqueue: kacpi_hotplug acpi_device_del_work_fn RIP: 0010:create_of_modalias.isra.1+0x4c/0x150 Code: 00 00 48 89 44 24 18 31 c0 48 8d 54 24 08 48 c7 44 24 10 00 00 00 00 48 c7 44 24 08 ff ff ff ff e8 7a b0 03 00 48 8b 4c 24 10 <0f> b6 01 84 c0 74 27 48 c7 c7 00 09 f4 a5 0f b6 f0 8d 50 20 f6 04 RSP: 0000:ffffa51040297c10 EFLAGS: 00010246 RAX: 0000000000001001 RBX: 0000000000000785 RCX: 0000000000000000 RDX: 0000000000001001 RSI: 0000000000000286 RDI: ffffa2163dc042e0 RBP: ffffa216062b1196 R08: 0000000000001001 R09: ffffa21639873000 R10: ffffffffa606761d R11: 0000000000000001 R12: ffffa21639873218 R13: ffffa2163deb5060 R14: ffffa216063d1010 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffffa2163e000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000007114000 CR4: 00000000001006f0 Call Trace: __acpi_device_uevent_modalias+0xb0/0x100 spi_uevent+0xd/0x40 ... In order to fix above let create_of_modalias() check the status returned by acpi_get_name() and bail out in case of failure. Fixes: 8765c5ba1949 ("ACPI / scan: Rework modalias creation when "compatible" is present") Link: https://bugzilla.kernel.org/show_bug.cgi?id=201381 Reported-by: Ferry Toth Tested-by: Ferry Toth Signed-off-by: Andy Shevchenko Reviewed-by: Mika Westerberg Cc: 4.1+ # 4.1+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/device_sysfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c index a041689e5701..012aa86d4b16 100644 --- a/drivers/acpi/device_sysfs.c +++ b/drivers/acpi/device_sysfs.c @@ -202,11 +202,15 @@ static int create_of_modalias(struct acpi_device *acpi_dev, char *modalias, { struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER }; const union acpi_object *of_compatible, *obj; + acpi_status status; int len, count; int i, nval; char *c; - acpi_get_name(acpi_dev->handle, ACPI_SINGLE_NAME, &buf); + status = acpi_get_name(acpi_dev->handle, ACPI_SINGLE_NAME, &buf); + if (ACPI_FAILURE(status)) + return -ENODEV; + /* DT strings are all in lower case */ for (c = buf.pointer; *c != '\0'; c++) *c = tolower(*c); From 9db739994e0317e6870ccb609c5ee636541d5277 Mon Sep 17 00:00:00 2001 From: BOUGH CHEN Date: Thu, 27 Dec 2018 11:20:24 +0000 Subject: [PATCH 2360/2608] mmc: sdhci-esdhc-imx: fix HS400 timing issue commit de0a0decf2edfc5b0c782915f4120cf990a9bd13 upstream. Now tuning reset will be done when the timing is MMC_TIMING_LEGACY/ MMC_TIMING_MMC_HS/MMC_TIMING_SD_HS. But for timing MMC_TIMING_MMC_HS, we can not do tuning reset, otherwise HS400 timing is not right. Here is the process of init HS400, first finish tuning in HS200 mode, then switch to HS mode and 8 bit DDR mode, finally switch to HS400 mode. If we do tuning reset in HS mode, this will cause HS400 mode lost the tuning setting, which will cause CRC error. Signed-off-by: Haibo Chen Cc: stable@vger.kernel.org # v4.12+ Acked-by: Adrian Hunter Fixes: d9370424c948 ("mmc: sdhci-esdhc-imx: reset tuning circuit when power on mmc card") Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-esdhc-imx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 59041f07b53c..ff5c4ad37a3a 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -961,6 +961,7 @@ static void esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned timing) case MMC_TIMING_UHS_SDR25: case MMC_TIMING_UHS_SDR50: case MMC_TIMING_UHS_SDR104: + case MMC_TIMING_MMC_HS: case MMC_TIMING_MMC_HS200: writel(m, host->ioaddr + ESDHC_MIX_CTRL); break; From e265265ad82652fdbfd1e0ac7bd1b9406e78f13c Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Tue, 29 Jan 2019 13:14:22 +0530 Subject: [PATCH 2361/2608] spi: ti-qspi: Fix mmap read when more than one CS in use commit 673c865efbdc5fec3cc525c46d71844d42c60072 upstream. Commit 4dea6c9b0b64 ("spi: spi-ti-qspi: add mmap mode read support") has has got order of parameter wrong when calling regmap_update_bits() to select CS for mmap access. Mask and value arguments are interchanged. Code will work on a system with single slave, but fails when more than one CS is in use. Fix this by correcting the order of parameters when calling regmap_update_bits(). Fixes: 4dea6c9b0b64 ("spi: spi-ti-qspi: add mmap mode read support") Cc: stable@vger.kernel.org Signed-off-by: Vignesh R Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-ti-qspi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c index c24d9b45a27c..d0ea62d151c0 100644 --- a/drivers/spi/spi-ti-qspi.c +++ b/drivers/spi/spi-ti-qspi.c @@ -490,8 +490,8 @@ static void ti_qspi_enable_memory_map(struct spi_device *spi) ti_qspi_write(qspi, MM_SWITCH, QSPI_SPI_SWITCH_REG); if (qspi->ctrl_base) { regmap_update_bits(qspi->ctrl_base, qspi->ctrl_reg, - MEM_CS_EN(spi->chip_select), - MEM_CS_MASK); + MEM_CS_MASK, + MEM_CS_EN(spi->chip_select)); } qspi->mmap_enabled = true; } @@ -503,7 +503,7 @@ static void ti_qspi_disable_memory_map(struct spi_device *spi) ti_qspi_write(qspi, 0, QSPI_SPI_SWITCH_REG); if (qspi->ctrl_base) regmap_update_bits(qspi->ctrl_base, qspi->ctrl_reg, - 0, MEM_CS_MASK); + MEM_CS_MASK, 0); qspi->mmap_enabled = false; } From 008a9cb95b7ad262e5385dd35ae4e1759a71373c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 19 Feb 2019 23:21:28 +0300 Subject: [PATCH 2362/2608] spi: pxa2xx: Setup maximum supported DMA transfer length commit ef070b4e4aa25bb5f8632ad196644026c11903bf upstream. When the commit b6ced294fb61 ("spi: pxa2xx: Switch to SPI core DMA mapping functionality") switches to SPI core provided DMA helpers, it missed to setup maximum supported DMA transfer length for the controller and thus users mistakenly try to send more data than supported with the following warning: ili9341 spi-PRP0001:01: DMA disabled for transfer length 153600 greater than 65536 Setup maximum supported DMA transfer length in order to make users know the limit. Fixes: b6ced294fb61 ("spi: pxa2xx: Switch to SPI core DMA mapping functionality") Signed-off-by: Andy Shevchenko Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-pxa2xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 3a2e46e49405..c0e915d8da5d 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1698,6 +1698,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) platform_info->enable_dma = false; } else { master->can_dma = pxa2xx_spi_can_dma; + master->max_dma_len = MAX_DMA_LEN; } } From 7bb85c8339750102cbdf45e1c86acf976168ecaa Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 9 Feb 2019 18:14:14 +0100 Subject: [PATCH 2363/2608] regulator: s2mps11: Fix steps for buck7, buck8 and LDO35 commit 56b5d4ea778c1b0989c5cdb5406d4a488144c416 upstream. LDO35 uses 25 mV step, not 50 mV. Bucks 7 and 8 use 12.5 mV step instead of 6.25 mV. Wrong step caused over-voltage (LDO35) or under-voltage (buck7 and 8) if regulators were used (e.g. on Exynos5420 Arndale Octa board). Cc: Fixes: cb74685ecb39 ("regulator: s2mps11: Add samsung s2mps11 regulator driver") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/s2mps11.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c index 7726b874e539..17a816656b92 100644 --- a/drivers/regulator/s2mps11.c +++ b/drivers/regulator/s2mps11.c @@ -376,7 +376,7 @@ static const struct regulator_desc s2mps11_regulators[] = { regulator_desc_s2mps11_ldo(32, STEP_50_MV), regulator_desc_s2mps11_ldo(33, STEP_50_MV), regulator_desc_s2mps11_ldo(34, STEP_50_MV), - regulator_desc_s2mps11_ldo(35, STEP_50_MV), + regulator_desc_s2mps11_ldo(35, STEP_25_MV), regulator_desc_s2mps11_ldo(36, STEP_50_MV), regulator_desc_s2mps11_ldo(37, STEP_50_MV), regulator_desc_s2mps11_ldo(38, STEP_50_MV), @@ -386,8 +386,8 @@ static const struct regulator_desc s2mps11_regulators[] = { regulator_desc_s2mps11_buck1_4(4), regulator_desc_s2mps11_buck5, regulator_desc_s2mps11_buck67810(6, MIN_600_MV, STEP_6_25_MV), - regulator_desc_s2mps11_buck67810(7, MIN_600_MV, STEP_6_25_MV), - regulator_desc_s2mps11_buck67810(8, MIN_600_MV, STEP_6_25_MV), + regulator_desc_s2mps11_buck67810(7, MIN_600_MV, STEP_12_5_MV), + regulator_desc_s2mps11_buck67810(8, MIN_600_MV, STEP_12_5_MV), regulator_desc_s2mps11_buck9, regulator_desc_s2mps11_buck67810(10, MIN_750_MV, STEP_12_5_MV), }; From 3b233f440d70023b292d525864ac08961e3b1622 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 10 Jan 2019 12:11:16 +0800 Subject: [PATCH 2364/2608] regulator: max77620: Initialize values for DT properties commit 0ab66b3c326ef8f77dae9f528118966365757c0c upstream. If regulator DT node doesn't exist, its of_parse_cb callback function isn't called. Then all values for DT properties are filled with zero. This leads to wrong register update for FPS and POK settings. Signed-off-by: Jinyoung Park Signed-off-by: Mark Zhang Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/max77620-regulator.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/max77620-regulator.c b/drivers/regulator/max77620-regulator.c index b94e3a721721..cd93cf53e23c 100644 --- a/drivers/regulator/max77620-regulator.c +++ b/drivers/regulator/max77620-regulator.c @@ -1,7 +1,7 @@ /* * Maxim MAX77620 Regulator driver * - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. * * Author: Mallikarjun Kasoju * Laxman Dewangan @@ -803,6 +803,14 @@ static int max77620_regulator_probe(struct platform_device *pdev) rdesc = &rinfo[id].desc; pmic->rinfo[id] = &max77620_regs_info[id]; pmic->enable_power_mode[id] = MAX77620_POWER_MODE_NORMAL; + pmic->reg_pdata[id].active_fps_src = -1; + pmic->reg_pdata[id].active_fps_pd_slot = -1; + pmic->reg_pdata[id].active_fps_pu_slot = -1; + pmic->reg_pdata[id].suspend_fps_src = -1; + pmic->reg_pdata[id].suspend_fps_pd_slot = -1; + pmic->reg_pdata[id].suspend_fps_pu_slot = -1; + pmic->reg_pdata[id].power_ok = -1; + pmic->reg_pdata[id].ramp_rate_setting = -1; ret = max77620_read_slew_rate(pmic, id); if (ret < 0) From f5d71a9ef580f3a8ae9f9ed598fecdc7d5566f16 Mon Sep 17 00:00:00 2001 From: Stuart Menefy Date: Tue, 12 Feb 2019 21:51:18 +0000 Subject: [PATCH 2365/2608] regulator: s2mpa01: Fix step values for some LDOs commit 28c4f730d2a44f2591cb104091da29a38dac49fe upstream. The step values for some of the LDOs appears to be incorrect, resulting in incorrect voltages (or at least, ones which are different from the Samsung 3.4 vendor kernel). Signed-off-by: Stuart Menefy Reviewed-by: Krzysztof Kozlowski Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/s2mpa01.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/regulator/s2mpa01.c b/drivers/regulator/s2mpa01.c index 48f0ca90743c..076735a3c85a 100644 --- a/drivers/regulator/s2mpa01.c +++ b/drivers/regulator/s2mpa01.c @@ -304,13 +304,13 @@ static const struct regulator_desc regulators[] = { regulator_desc_ldo(2, STEP_50_MV), regulator_desc_ldo(3, STEP_50_MV), regulator_desc_ldo(4, STEP_50_MV), - regulator_desc_ldo(5, STEP_50_MV), + regulator_desc_ldo(5, STEP_25_MV), regulator_desc_ldo(6, STEP_25_MV), regulator_desc_ldo(7, STEP_50_MV), regulator_desc_ldo(8, STEP_50_MV), regulator_desc_ldo(9, STEP_50_MV), regulator_desc_ldo(10, STEP_50_MV), - regulator_desc_ldo(11, STEP_25_MV), + regulator_desc_ldo(11, STEP_50_MV), regulator_desc_ldo(12, STEP_50_MV), regulator_desc_ldo(13, STEP_50_MV), regulator_desc_ldo(14, STEP_50_MV), @@ -321,11 +321,11 @@ static const struct regulator_desc regulators[] = { regulator_desc_ldo(19, STEP_50_MV), regulator_desc_ldo(20, STEP_50_MV), regulator_desc_ldo(21, STEP_50_MV), - regulator_desc_ldo(22, STEP_25_MV), - regulator_desc_ldo(23, STEP_25_MV), + regulator_desc_ldo(22, STEP_50_MV), + regulator_desc_ldo(23, STEP_50_MV), regulator_desc_ldo(24, STEP_50_MV), regulator_desc_ldo(25, STEP_50_MV), - regulator_desc_ldo(26, STEP_50_MV), + regulator_desc_ldo(26, STEP_25_MV), regulator_desc_buck1_4(1), regulator_desc_buck1_4(2), regulator_desc_buck1_4(3), From d265666e879c3e10701fe28106ff6d45419f8f3a Mon Sep 17 00:00:00 2001 From: Stuart Menefy Date: Sun, 10 Feb 2019 22:51:13 +0000 Subject: [PATCH 2366/2608] clocksource/drivers/exynos_mct: Move one-shot check from tick clear to ISR commit a5719a40aef956ba704f2aa1c7b977224d60fa96 upstream. When a timer tick occurs and the clock is in one-shot mode, the timer needs to be stopped to prevent it triggering subsequent interrupts. Currently this code is in exynos4_mct_tick_clear(), but as it is only needed when an ISR occurs move it into exynos4_mct_tick_isr(), leaving exynos4_mct_tick_clear() just doing what its name suggests it should. Signed-off-by: Stuart Menefy Reviewed-by: Krzysztof Kozlowski Tested-by: Marek Szyprowski Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Daniel Lezcano Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/exynos_mct.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 7a244b681876..1e325f89d408 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -388,6 +388,13 @@ static void exynos4_mct_tick_start(unsigned long cycles, exynos4_mct_write(tmp, mevt->base + MCT_L_TCON_OFFSET); } +static void exynos4_mct_tick_clear(struct mct_clock_event_device *mevt) +{ + /* Clear the MCT tick interrupt */ + if (readl_relaxed(reg_base + mevt->base + MCT_L_INT_CSTAT_OFFSET) & 1) + exynos4_mct_write(0x1, mevt->base + MCT_L_INT_CSTAT_OFFSET); +} + static int exynos4_tick_set_next_event(unsigned long cycles, struct clock_event_device *evt) { @@ -420,8 +427,11 @@ static int set_state_periodic(struct clock_event_device *evt) return 0; } -static void exynos4_mct_tick_clear(struct mct_clock_event_device *mevt) +static irqreturn_t exynos4_mct_tick_isr(int irq, void *dev_id) { + struct mct_clock_event_device *mevt = dev_id; + struct clock_event_device *evt = &mevt->evt; + /* * This is for supporting oneshot mode. * Mct would generate interrupt periodically @@ -430,16 +440,6 @@ static void exynos4_mct_tick_clear(struct mct_clock_event_device *mevt) if (!clockevent_state_periodic(&mevt->evt)) exynos4_mct_tick_stop(mevt); - /* Clear the MCT tick interrupt */ - if (readl_relaxed(reg_base + mevt->base + MCT_L_INT_CSTAT_OFFSET) & 1) - exynos4_mct_write(0x1, mevt->base + MCT_L_INT_CSTAT_OFFSET); -} - -static irqreturn_t exynos4_mct_tick_isr(int irq, void *dev_id) -{ - struct mct_clock_event_device *mevt = dev_id; - struct clock_event_device *evt = &mevt->evt; - exynos4_mct_tick_clear(mevt); evt->event_handler(evt); From fddab0355a57d193eb82139dc6ae2215cb64ee43 Mon Sep 17 00:00:00 2001 From: Stuart Menefy Date: Sun, 10 Feb 2019 22:51:14 +0000 Subject: [PATCH 2367/2608] clocksource/drivers/exynos_mct: Clear timer interrupt when shutdown commit d2f276c8d3c224d5b493c42b6cf006ae4e64fb1c upstream. When shutting down the timer, ensure that after we have stopped the timer any pending interrupts are cleared. This fixes a problem when suspending, as interrupts are disabled before the timer is stopped, so the timer interrupt may still be asserted, preventing the system entering a low power state when the wfi is executed. Signed-off-by: Stuart Menefy Reviewed-by: Krzysztof Kozlowski Tested-by: Marek Szyprowski Cc: # v4.3+ Signed-off-by: Daniel Lezcano Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/exynos_mct.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 1e325f89d408..d55c30f6981d 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -411,6 +411,7 @@ static int set_state_shutdown(struct clock_event_device *evt) mevt = container_of(evt, struct mct_clock_event_device, evt); exynos4_mct_tick_stop(mevt); + exynos4_mct_tick_clear(mevt); return 0; } From 6f87f879251d214ed8cd1ef8a6b7df5a612395c7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 14 Feb 2019 15:40:56 +0100 Subject: [PATCH 2368/2608] s390/setup: fix early warning messages commit 8727638426b0aea59d7f904ad8ddf483f9234f88 upstream. The setup_lowcore() function creates a new prefix page for the boot CPU. The PSW mask for the system_call, external interrupt, i/o interrupt and the program check handler have the DAT bit set in this new prefix page. At the time setup_lowcore is called the system still runs without virtual address translation, the paging_init() function creates the kernel page table and loads the CR13 with the kernel ASCE. Any code between setup_lowcore() and the end of paging_init() that has a BUG or WARN statement will create a program check that can not be handled correctly as there is no kernel page table yet. To allow early WARN statements initially setup the lowcore with DAT off and set the DAT bit only after paging_init() has completed. Cc: stable@vger.kernel.org Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/setup.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 3cb71fc94995..a9f5323f2f51 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -300,7 +300,7 @@ early_param("vmalloc", parse_vmalloc); void *restart_stack __section(.data); -static void __init setup_lowcore(void) +static void __init setup_lowcore_dat_off(void) { struct lowcore *lc; @@ -311,19 +311,16 @@ static void __init setup_lowcore(void) lc = memblock_virt_alloc_low(sizeof(*lc), sizeof(*lc)); lc->restart_psw.mask = PSW_KERNEL_BITS; lc->restart_psw.addr = (unsigned long) restart_int_handler; - lc->external_new_psw.mask = PSW_KERNEL_BITS | - PSW_MASK_DAT | PSW_MASK_MCHECK; + lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->external_new_psw.addr = (unsigned long) ext_int_handler; lc->svc_new_psw.mask = PSW_KERNEL_BITS | - PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; lc->svc_new_psw.addr = (unsigned long) system_call; - lc->program_new_psw.mask = PSW_KERNEL_BITS | - PSW_MASK_DAT | PSW_MASK_MCHECK; + lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->program_new_psw.addr = (unsigned long) pgm_check_handler; lc->mcck_new_psw.mask = PSW_KERNEL_BITS; lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = PSW_KERNEL_BITS | - PSW_MASK_DAT | PSW_MASK_MCHECK; + lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->io_new_psw.addr = (unsigned long) io_int_handler; lc->clock_comparator = clock_comparator_max; lc->kernel_stack = ((unsigned long) &init_thread_union) @@ -391,6 +388,17 @@ static void __init setup_lowcore(void) lowcore_ptr[0] = lc; } +static void __init setup_lowcore_dat_on(void) +{ + struct lowcore *lc; + + lc = lowcore_ptr[0]; + lc->external_new_psw.mask |= PSW_MASK_DAT; + lc->svc_new_psw.mask |= PSW_MASK_DAT; + lc->program_new_psw.mask |= PSW_MASK_DAT; + lc->io_new_psw.mask |= PSW_MASK_DAT; +} + static struct resource code_resource = { .name = "Kernel code", .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, @@ -948,7 +956,7 @@ void __init setup_arch(char **cmdline_p) #endif setup_resources(); - setup_lowcore(); + setup_lowcore_dat_off(); smp_fill_possible_mask(); cpu_detect_mhz_feature(); cpu_init(); @@ -961,6 +969,12 @@ void __init setup_arch(char **cmdline_p) */ paging_init(); + /* + * After paging_init created the kernel page table, the new PSWs + * in lowcore can now run with DAT enabled. + */ + setup_lowcore_dat_on(); + /* Setup default console */ conmode_default(); set_preferred_console(); From 79fa67b96f559a5af47bc1163d047672400ce8d1 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Mon, 21 Jan 2019 13:19:43 +0100 Subject: [PATCH 2369/2608] s390/virtio: handle find on invalid queue gracefully commit 3438b2c039b4bf26881786a1f3450f016d66ad11 upstream. A queue with a capacity of zero is clearly not a valid virtio queue. Some emulators report zero queue size if queried with an invalid queue index. Instead of crashing in this case let us just return -ENOENT. To make that work properly, let us fix the notifier cleanup logic as well. Cc: stable@vger.kernel.org Signed-off-by: Halil Pasic Signed-off-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/virtio/virtio_ccw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 0847d05e138b..f9cf676a0469 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -275,6 +275,8 @@ static void virtio_ccw_drop_indicators(struct virtio_ccw_device *vcdev) { struct virtio_ccw_vq_info *info; + if (!vcdev->airq_info) + return; list_for_each_entry(info, &vcdev->virtqueues, node) drop_airq_indicator(info->vq, vcdev->airq_info); } @@ -416,7 +418,7 @@ static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev, ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_VQ_CONF); if (ret) return ret; - return vcdev->config_block->num; + return vcdev->config_block->num ?: -ENOENT; } static void virtio_ccw_del_vq(struct virtqueue *vq, struct ccw1 *ccw) From bd69e827186e54ed51f4cc423e5603033248afa1 Mon Sep 17 00:00:00 2001 From: Felipe Franciosi Date: Wed, 27 Feb 2019 16:10:34 +0000 Subject: [PATCH 2370/2608] scsi: virtio_scsi: don't send sc payload with tmfs commit 3722e6a52174d7c3a00e6f5efd006ca093f346c1 upstream. The virtio scsi spec defines struct virtio_scsi_ctrl_tmf as a set of device-readable records and a single device-writable response entry: struct virtio_scsi_ctrl_tmf { // Device-readable part le32 type; le32 subtype; u8 lun[8]; le64 id; // Device-writable part u8 response; } The above should be organised as two descriptor entries (or potentially more if using VIRTIO_F_ANY_LAYOUT), but without any extra data after "le64 id" or after "u8 response". The Linux driver doesn't respect that, with virtscsi_abort() and virtscsi_device_reset() setting cmd->sc before calling virtscsi_tmf(). It results in the original scsi command payload (or writable buffers) added to the tmf. This fixes the problem by leaving cmd->sc zeroed out, which makes virtscsi_kick_cmd() add the tmf to the control vq without any payload. Cc: stable@vger.kernel.org Signed-off-by: Felipe Franciosi Reviewed-by: Paolo Bonzini Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/virtio_scsi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 54e3a0f6844c..1f4bd7d0154d 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -638,7 +638,6 @@ static int virtscsi_device_reset(struct scsi_cmnd *sc) return FAILED; memset(cmd, 0, sizeof(*cmd)); - cmd->sc = sc; cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){ .type = VIRTIO_SCSI_T_TMF, .subtype = cpu_to_virtio32(vscsi->vdev, @@ -697,7 +696,6 @@ static int virtscsi_abort(struct scsi_cmnd *sc) return FAILED; memset(cmd, 0, sizeof(*cmd)); - cmd->sc = sc; cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){ .type = VIRTIO_SCSI_T_TMF, .subtype = VIRTIO_SCSI_T_TMF_ABORT_TASK, From 02a29f3c1ff08a77432fac2bf581faeeada12589 Mon Sep 17 00:00:00 2001 From: Sagar Biradar Date: Thu, 7 Mar 2019 23:26:41 -0800 Subject: [PATCH 2371/2608] scsi: aacraid: Fix performance issue on logical drives commit 0015437cc046e5ec2b57b00ff8312b8d432eac7c upstream. Fix performance issue where the queue depth for SmartIOC logical volumes is set to 1, and allow the usual logical volume code to be executed Fixes: a052865fe287 (aacraid: Set correct Queue Depth for HBA1000 RAW disks) Cc: stable@vger.kernel.org Signed-off-by: Sagar Biradar Reviewed-by: Dave Carroll Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/linit.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 4917649cacd5..053a31c5485f 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -413,13 +413,16 @@ static int aac_slave_configure(struct scsi_device *sdev) if (chn < AAC_MAX_BUSES && tid < AAC_MAX_TARGETS && aac->sa_firmware) { devtype = aac->hba_map[chn][tid].devtype; - if (devtype == AAC_DEVTYPE_NATIVE_RAW) + if (devtype == AAC_DEVTYPE_NATIVE_RAW) { depth = aac->hba_map[chn][tid].qd_limit; - else if (devtype == AAC_DEVTYPE_ARC_RAW) + set_timeout = 1; + goto common_config; + } + if (devtype == AAC_DEVTYPE_ARC_RAW) { set_qd_dev_type = true; - - set_timeout = 1; - goto common_config; + set_timeout = 1; + goto common_config; + } } if (aac->jbod && (sdev->type == TYPE_DISK)) From 910da5aafac4a1d0da7495935fb325eaf9cade56 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 12 Feb 2019 16:21:05 -0500 Subject: [PATCH 2372/2608] scsi: sd: Optimal I/O size should be a multiple of physical block size commit a83da8a4509d3ebfe03bb7fffce022e4d5d4764f upstream. It was reported that some devices report an OPTIMAL TRANSFER LENGTH of 0xFFFF blocks. That looks bogus, especially for a device with a 4096-byte physical block size. Ignore OPTIMAL TRANSFER LENGTH if it is not a multiple of the device's reported physical block size. To make the sanity checking conditionals more readable--and to facilitate printing warnings--relocate the checking to a helper function. No functional change aside from the printks. Cc: Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199759 Reported-by: Christoph Anton Mitterer Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 59 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 048fccc72e03..d0cc8fb40f63 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3077,6 +3077,55 @@ static void sd_read_security(struct scsi_disk *sdkp, unsigned char *buffer) sdkp->security = 1; } +/* + * Determine the device's preferred I/O size for reads and writes + * unless the reported value is unreasonably small, large, not a + * multiple of the physical block size, or simply garbage. + */ +static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, + unsigned int dev_max) +{ + struct scsi_device *sdp = sdkp->device; + unsigned int opt_xfer_bytes = + logical_to_bytes(sdp, sdkp->opt_xfer_blocks); + + if (sdkp->opt_xfer_blocks > dev_max) { + sd_first_printk(KERN_WARNING, sdkp, + "Optimal transfer size %u logical blocks " \ + "> dev_max (%u logical blocks)\n", + sdkp->opt_xfer_blocks, dev_max); + return false; + } + + if (sdkp->opt_xfer_blocks > SD_DEF_XFER_BLOCKS) { + sd_first_printk(KERN_WARNING, sdkp, + "Optimal transfer size %u logical blocks " \ + "> sd driver limit (%u logical blocks)\n", + sdkp->opt_xfer_blocks, SD_DEF_XFER_BLOCKS); + return false; + } + + if (opt_xfer_bytes < PAGE_SIZE) { + sd_first_printk(KERN_WARNING, sdkp, + "Optimal transfer size %u bytes < " \ + "PAGE_SIZE (%u bytes)\n", + opt_xfer_bytes, (unsigned int)PAGE_SIZE); + return false; + } + + if (opt_xfer_bytes & (sdkp->physical_block_size - 1)) { + sd_first_printk(KERN_WARNING, sdkp, + "Optimal transfer size %u bytes not a " \ + "multiple of physical block size (%u bytes)\n", + opt_xfer_bytes, sdkp->physical_block_size); + return false; + } + + sd_first_printk(KERN_INFO, sdkp, "Optimal transfer size %u bytes\n", + opt_xfer_bytes); + return true; +} + /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -3146,15 +3195,7 @@ static int sd_revalidate_disk(struct gendisk *disk) dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks); q->limits.max_dev_sectors = logical_to_sectors(sdp, dev_max); - /* - * Determine the device's preferred I/O size for reads and writes - * unless the reported value is unreasonably small, large, or - * garbage. - */ - if (sdkp->opt_xfer_blocks && - sdkp->opt_xfer_blocks <= dev_max && - sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS && - logical_to_bytes(sdp, sdkp->opt_xfer_blocks) >= PAGE_SIZE) { + if (sd_validate_opt_xfer_size(sdkp, dev_max)) { q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks); rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks); } else From 8ec3bcb579a10a8ca1be4e5e12b271011d392f27 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 25 Jan 2019 10:34:56 -0800 Subject: [PATCH 2373/2608] scsi: target/iscsi: Avoid iscsit_release_commands_from_conn() deadlock commit 32e36bfbcf31452a854263e7c7f32fbefc4b44d8 upstream. When using SCSI passthrough in combination with the iSCSI target driver then cmd->t_state_lock may be obtained from interrupt context. Hence, all code that obtains cmd->t_state_lock from thread context must disable interrupts first. This patch avoids that lockdep reports the following: WARNING: inconsistent lock state 4.18.0-dbg+ #1 Not tainted -------------------------------- inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. iscsi_ttx/1800 [HC1[1]:SC0[2]:HE0:SE0] takes: 000000006e7b0ceb (&(&cmd->t_state_lock)->rlock){?...}, at: target_complete_cmd+0x47/0x2c0 [target_core_mod] {HARDIRQ-ON-W} state was registered at: lock_acquire+0xd2/0x260 _raw_spin_lock+0x32/0x50 iscsit_close_connection+0x97e/0x1020 [iscsi_target_mod] iscsit_take_action_for_connection_exit+0x108/0x200 [iscsi_target_mod] iscsi_target_rx_thread+0x180/0x190 [iscsi_target_mod] kthread+0x1cf/0x1f0 ret_from_fork+0x24/0x30 irq event stamp: 1281 hardirqs last enabled at (1279): [] __local_bh_enable_ip+0xa9/0x160 hardirqs last disabled at (1281): [] interrupt_entry+0xb5/0xd0 softirqs last enabled at (1278): [] lock_sock_nested+0x51/0xc0 softirqs last disabled at (1280): [] ip6_finish_output2+0x124/0xe40 [ipv6] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&cmd->t_state_lock)->rlock); lock(&(&cmd->t_state_lock)->rlock); --- drivers/target/iscsi/iscsi_target.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index d2cafdae8317..fb7bd422e2e1 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4077,9 +4077,9 @@ static void iscsit_release_commands_from_conn(struct iscsi_conn *conn) struct se_cmd *se_cmd = &cmd->se_cmd; if (se_cmd->se_tfo != NULL) { - spin_lock(&se_cmd->t_state_lock); + spin_lock_irq(&se_cmd->t_state_lock); se_cmd->transport_state |= CMD_T_FABRIC_STOP; - spin_unlock(&se_cmd->t_state_lock); + spin_unlock_irq(&se_cmd->t_state_lock); } } spin_unlock_bh(&conn->cmd_lock); From a8dddf8b08b943bd32cd9581e75e4747cec5f754 Mon Sep 17 00:00:00 2001 From: Varad Gautam Date: Thu, 24 Jan 2019 14:03:06 +0100 Subject: [PATCH 2374/2608] fs/devpts: always delete dcache dentry-s in dput() commit 73052b0daee0b750b39af18460dfec683e4f5887 upstream. d_delete only unhashes an entry if it is reached with dentry->d_lockref.count != 1. Prior to commit 8ead9dd54716 ("devpts: more pty driver interface cleanups"), d_delete was called on a dentry from devpts_pty_kill with two references held, which would trigger the unhashing, and the subsequent dputs would release it. Commit 8ead9dd54716 reworked devpts_pty_kill to stop acquiring the second reference from d_find_alias, and the d_delete call left the dentries still on the hashed list without actually ever being dropped from dcache before explicit cleanup. This causes the number of negative dentries for devpts to pile up, and an `ls /dev/pts` invocation can take seconds to return. Provide always_delete_dentry() from simple_dentry_operations as .d_delete for devpts, to make the dentry be dropped from dcache. Without this cleanup, the number of dentries in /dev/pts/ can be grown arbitrarily as: `python -c 'import pty; pty.spawn(["ls", "/dev/pts"])'` A systemtap probe on dcache_readdir to count d_subdirs shows this count to increase with each pty spawn invocation above: probe kernel.function("dcache_readdir") { subdirs = &@cast($file->f_path->dentry, "dentry")->d_subdirs; p = subdirs; p = @cast(p, "list_head")->next; i = 0 while (p != subdirs) { p = @cast(p, "list_head")->next; i = i+1; } printf("number of dentries: %d\n", i); } Fixes: 8ead9dd54716 ("devpts: more pty driver interface cleanups") Signed-off-by: Varad Gautam Reported-by: Zheng Wang Reported-by: Brandon Schwartz Root-caused-by: Maximilian Heyne Root-caused-by: Nicolas Pernas Maradei CC: David Woodhouse CC: Maximilian Heyne CC: Stefan Nuernberger CC: Amit Shah CC: Linus Torvalds CC: Greg Kroah-Hartman CC: Al Viro CC: Christian Brauner CC: Eric W. Biederman CC: Matthew Wilcox CC: Eric Biggers CC: # 4.9+ Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/devpts/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 542364bf923e..32f6f1c683d9 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -439,6 +439,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent) s->s_blocksize_bits = 10; s->s_magic = DEVPTS_SUPER_MAGIC; s->s_op = &devpts_sops; + s->s_d_op = &simple_dentry_operations; s->s_time_gran = 1; error = -ENOMEM; From 1bdc347e64e359e4383a08fdc8ed9f9a3ccb4a8d Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 23 Jan 2019 15:19:17 +0100 Subject: [PATCH 2375/2608] splice: don't merge into linked buffers commit a0ce2f0aa6ad97c3d4927bf2ca54bcebdf062d55 upstream. Before this patch, it was possible for two pipes to affect each other after data had been transferred between them with tee(): ============ $ cat tee_test.c int main(void) { int pipe_a[2]; if (pipe(pipe_a)) err(1, "pipe"); int pipe_b[2]; if (pipe(pipe_b)) err(1, "pipe"); if (write(pipe_a[1], "abcd", 4) != 4) err(1, "write"); if (tee(pipe_a[0], pipe_b[1], 2, 0) != 2) err(1, "tee"); if (write(pipe_b[1], "xx", 2) != 2) err(1, "write"); char buf[5]; if (read(pipe_a[0], buf, 4) != 4) err(1, "read"); buf[4] = 0; printf("got back: '%s'\n", buf); } $ gcc -o tee_test tee_test.c $ ./tee_test got back: 'abxx' $ ============ As suggested by Al Viro, fix it by creating a separate type for non-mergeable pipe buffers, then changing the types of buffers in splice_pipe_to_pipe() and link_pipe(). Cc: Fixes: 7c77f0b3f920 ("splice: implement pipe to pipe splicing") Fixes: 70524490ee2e ("[PATCH] splice: add support for sys_tee()") Suggested-by: Al Viro Signed-off-by: Jann Horn Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/pipe.c | 14 ++++++++++++++ fs/splice.c | 4 ++++ include/linux/pipe_fs_i.h | 1 + 3 files changed, 19 insertions(+) diff --git a/fs/pipe.c b/fs/pipe.c index 8ef7d7bef775..8f9628494981 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -239,6 +239,14 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = { .get = generic_pipe_buf_get, }; +static const struct pipe_buf_operations anon_pipe_buf_nomerge_ops = { + .can_merge = 0, + .confirm = generic_pipe_buf_confirm, + .release = anon_pipe_buf_release, + .steal = anon_pipe_buf_steal, + .get = generic_pipe_buf_get, +}; + static const struct pipe_buf_operations packet_pipe_buf_ops = { .can_merge = 0, .confirm = generic_pipe_buf_confirm, @@ -247,6 +255,12 @@ static const struct pipe_buf_operations packet_pipe_buf_ops = { .get = generic_pipe_buf_get, }; +void pipe_buf_mark_unmergeable(struct pipe_buffer *buf) +{ + if (buf->ops == &anon_pipe_buf_ops) + buf->ops = &anon_pipe_buf_nomerge_ops; +} + static ssize_t pipe_read(struct kiocb *iocb, struct iov_iter *to) { diff --git a/fs/splice.c b/fs/splice.c index f3084cce0ea6..00d2f142dcf9 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1580,6 +1580,8 @@ retry: */ obuf->flags &= ~PIPE_BUF_FLAG_GIFT; + pipe_buf_mark_unmergeable(obuf); + obuf->len = len; opipe->nrbufs++; ibuf->offset += obuf->len; @@ -1654,6 +1656,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, */ obuf->flags &= ~PIPE_BUF_FLAG_GIFT; + pipe_buf_mark_unmergeable(obuf); + if (obuf->len > len) obuf->len = len; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 6a80cfc63e0c..befdcd304b3d 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -183,6 +183,7 @@ void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); +void pipe_buf_mark_unmergeable(struct pipe_buffer *buf); extern const struct pipe_buf_operations nosteal_pipe_buf_ops; From 1255e28103afa7284cee26b278ea46e2c171d6fd Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 16 Jan 2019 16:23:24 +1100 Subject: [PATCH 2376/2608] m68k: Add -ffreestanding to CFLAGS commit 28713169d879b67be2ef2f84dcf54905de238294 upstream. This patch fixes a build failure when using GCC 8.1: /usr/bin/ld: block/partitions/ldm.o: in function `ldm_parse_tocblock': block/partitions/ldm.c:153: undefined reference to `strcmp' This is caused by a new optimization which effectively replaces a strncmp() call with a strcmp() call. This affects a number of strncmp() call sites in the kernel. The entire class of optimizations is avoided with -fno-builtin, which gets enabled by -ffreestanding. This may avoid possible future build failures in case new optimizations appear in future compilers. I haven't done any performance measurements with this patch but I did count the function calls in a defconfig build. For example, there are now 23 more sprintf() calls and 39 fewer strcpy() calls. The effect on the other libc functions is smaller. If this harms performance we can tackle that regression by optimizing the call sites, ideally using semantic patches. That way, clang and ICC builds might benfit too. Cc: stable@vger.kernel.org Reference: https://marc.info/?l=linux-m68k&m=154514816222244&w=2 Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- arch/m68k/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index f0dd9fc84002..a229d28e14cc 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -58,7 +58,10 @@ cpuflags-$(CONFIG_M5206e) := $(call cc-option,-mcpu=5206e,-m5200) cpuflags-$(CONFIG_M5206) := $(call cc-option,-mcpu=5206,-m5200) KBUILD_AFLAGS += $(cpuflags-y) -KBUILD_CFLAGS += $(cpuflags-y) -pipe +KBUILD_CFLAGS += $(cpuflags-y) + +KBUILD_CFLAGS += -pipe -ffreestanding + ifdef CONFIG_MMU # without -fno-strength-reduce the 53c7xx.c driver fails ;-( KBUILD_CFLAGS += -fno-strength-reduce -ffixed-a2 From bc726ae072b256230915ff341cdc93a51259686c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 13 Dec 2018 21:16:56 +0000 Subject: [PATCH 2377/2608] Btrfs: setup a nofs context for memory allocation at __btrfs_set_acl commit a0873490660246db587849a9e172f2b7b21fa88a upstream. We are holding a transaction handle when setting an acl, therefore we can not allocate the xattr value buffer using GFP_KERNEL, as we could deadlock if reclaim is triggered by the allocation, therefore setup a nofs context. Fixes: 39a27ec1004e8 ("btrfs: use GFP_KERNEL for xattr and acl allocations") CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/acl.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 1ba49ebe67da..1c42d9f1d6c8 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "ctree.h" @@ -89,8 +90,16 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans, } if (acl) { + unsigned int nofs_flag; + size = posix_acl_xattr_size(acl->a_count); + /* + * We're holding a transaction handle, so use a NOFS memory + * allocation context to avoid deadlock if reclaim happens. + */ + nofs_flag = memalloc_nofs_save(); value = kmalloc(size, GFP_KERNEL); + memalloc_nofs_restore(nofs_flag); if (!value) { ret = -ENOMEM; goto out; From 904bc9a1ce0a1037f3c2d09fe2e84de679255bb0 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 18 Feb 2019 11:28:37 +0100 Subject: [PATCH 2378/2608] btrfs: ensure that a DUP or RAID1 block group has exactly two stripes commit 349ae63f40638a28c6fce52e8447c2d14b84cc0c upstream. We recently had a customer issue with a corrupted filesystem. When trying to mount this image btrfs panicked with a division by zero in calc_stripe_length(). The corrupt chunk had a 'num_stripes' value of 1. calc_stripe_length() takes this value and divides it by the number of copies the RAID profile is expected to have to calculate the amount of data stripes. As a DUP profile is expected to have 2 copies this division resulted in 1/2 = 0. Later then the 'data_stripes' variable is used as a divisor in the stripe length calculation which results in a division by 0 and thus a kernel panic. When encountering a filesystem with a DUP block group and a 'num_stripes' value unequal to 2, refuse mounting as the image is corrupted and will lead to unexpected behaviour. Code inspection showed a RAID1 block group has the same issues. Fixes: e06cd3dd7cea ("Btrfs: add validadtion checks for chunk loading") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Qu Wenruo Reviewed-by: Nikolay Borisov Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9663b6aa2a56..38ed8e259e00 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6420,10 +6420,10 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, } if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || - (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) || + (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) || (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || - (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) || + (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) || ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && num_stripes != 1)) { btrfs_err(fs_info, From bc8815ce058d2a215fca83aa2416899d82670f42 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 14 Feb 2019 15:17:20 +0000 Subject: [PATCH 2379/2608] Btrfs: fix corruption reading shared and compressed extents after hole punching commit 8e928218780e2f1cf2f5891c7575e8f0b284fcce upstream. In the past we had data corruption when reading compressed extents that are shared within the same file and they are consecutive, this got fixed by commit 005efedf2c7d0 ("Btrfs: fix read corruption of compressed and shared extents") and by commit 808f80b46790f ("Btrfs: update fix for read corruption of compressed and shared extents"). However there was a case that was missing in those fixes, which is when the shared and compressed extents are referenced with a non-zero offset. The following shell script creates a reproducer for this issue: #!/bin/bash mkfs.btrfs -f /dev/sdc &> /dev/null mount -o compress /dev/sdc /mnt/sdc # Create a file with 3 consecutive compressed extents, each has an # uncompressed size of 128Kb and a compressed size of 4Kb. for ((i = 1; i <= 3; i++)); do head -c 4096 /dev/zero for ((j = 1; j <= 31; j++)); do head -c 4096 /dev/zero | tr '\0' "\377" done done > /mnt/sdc/foobar sync echo "Digest after file creation: $(md5sum /mnt/sdc/foobar)" # Clone the first extent into offsets 128K and 256K. xfs_io -c "reflink /mnt/sdc/foobar 0 128K 128K" /mnt/sdc/foobar xfs_io -c "reflink /mnt/sdc/foobar 0 256K 128K" /mnt/sdc/foobar sync echo "Digest after cloning: $(md5sum /mnt/sdc/foobar)" # Punch holes into the regions that are already full of zeroes. xfs_io -c "fpunch 0 4K" /mnt/sdc/foobar xfs_io -c "fpunch 128K 4K" /mnt/sdc/foobar xfs_io -c "fpunch 256K 4K" /mnt/sdc/foobar sync echo "Digest after hole punching: $(md5sum /mnt/sdc/foobar)" echo "Dropping page cache..." sysctl -q vm.drop_caches=1 echo "Digest after hole punching: $(md5sum /mnt/sdc/foobar)" umount /dev/sdc When running the script we get the following output: Digest after file creation: 5a0888d80d7ab1fd31c229f83a3bbcc8 /mnt/sdc/foobar linked 131072/131072 bytes at offset 131072 128 KiB, 1 ops; 0.0033 sec (36.960 MiB/sec and 295.6830 ops/sec) linked 131072/131072 bytes at offset 262144 128 KiB, 1 ops; 0.0015 sec (78.567 MiB/sec and 628.5355 ops/sec) Digest after cloning: 5a0888d80d7ab1fd31c229f83a3bbcc8 /mnt/sdc/foobar Digest after hole punching: 5a0888d80d7ab1fd31c229f83a3bbcc8 /mnt/sdc/foobar Dropping page cache... Digest after hole punching: fba694ae8664ed0c2e9ff8937e7f1484 /mnt/sdc/foobar This happens because after reading all the pages of the extent in the range from 128K to 256K for example, we read the hole at offset 256K and then when reading the page at offset 260K we don't submit the existing bio, which is responsible for filling all the page in the range 128K to 256K only, therefore adding the pages from range 260K to 384K to the existing bio and submitting it after iterating over the entire range. Once the bio completes, the uncompressed data fills only the pages in the range 128K to 256K because there's no more data read from disk, leaving the pages in the range 260K to 384K unfilled. It is just a slightly different variant of what was solved by commit 005efedf2c7d0 ("Btrfs: fix read corruption of compressed and shared extents"). Fix this by forcing a bio submit, during readpages(), whenever we find a compressed extent map for a page that is different from the extent map for the previous page or has a different starting offset (in case it's the same compressed extent), instead of the extent map's original start offset. A test case for fstests follows soon. Reported-by: Zygo Blaxell Fixes: 808f80b46790f ("Btrfs: update fix for read corruption of compressed and shared extents") Fixes: 005efedf2c7d0 ("Btrfs: fix read corruption of compressed and shared extents") Cc: stable@vger.kernel.org # 4.3+ Tested-by: Zygo Blaxell Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5b62e06567a3..4cc534584665 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3014,11 +3014,11 @@ static int __do_readpage(struct extent_io_tree *tree, */ if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) && prev_em_start && *prev_em_start != (u64)-1 && - *prev_em_start != em->orig_start) + *prev_em_start != em->start) force_bio_submit = true; if (prev_em_start) - *prev_em_start = em->orig_start; + *prev_em_start = em->start; free_extent_map(em); em = NULL; From 05c0283950485d3b5d1d35200b2e679f11ae081b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 3 Jan 2019 20:16:13 -0800 Subject: [PATCH 2380/2608] crypto: pcbc - remove bogus memcpy()s with src == dest commit 251b7aea34ba3c4d4fdfa9447695642eb8b8b098 upstream. The memcpy()s in the PCBC implementation use walk->iv as both the source and destination, which has undefined behavior. These memcpy()'s are actually unneeded, because walk->iv is already used to hold the previous plaintext block XOR'd with the previous ciphertext block. Thus, walk->iv is already updated to its final value. So remove the broken and unnecessary memcpy()s. Fixes: 91652be5d1b9 ("[CRYPTO] pcbc: Add Propagated CBC template") Cc: # v2.6.21+ Cc: David Howells Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Maxim Zhukov Signed-off-by: Greg Kroah-Hartman --- crypto/pcbc.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/crypto/pcbc.c b/crypto/pcbc.c index d9e45a958720..67009a532201 100644 --- a/crypto/pcbc.c +++ b/crypto/pcbc.c @@ -50,7 +50,7 @@ static int crypto_pcbc_encrypt_segment(struct skcipher_request *req, unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; u8 *dst = walk->dst.virt.addr; - u8 *iv = walk->iv; + u8 * const iv = walk->iv; do { crypto_xor(iv, src, bsize); @@ -71,7 +71,7 @@ static int crypto_pcbc_encrypt_inplace(struct skcipher_request *req, int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; - u8 *iv = walk->iv; + u8 * const iv = walk->iv; u8 tmpbuf[bsize]; do { @@ -83,8 +83,6 @@ static int crypto_pcbc_encrypt_inplace(struct skcipher_request *req, src += bsize; } while ((nbytes -= bsize) >= bsize); - memcpy(walk->iv, iv, bsize); - return nbytes; } @@ -120,7 +118,7 @@ static int crypto_pcbc_decrypt_segment(struct skcipher_request *req, unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; u8 *dst = walk->dst.virt.addr; - u8 *iv = walk->iv; + u8 * const iv = walk->iv; do { crypto_cipher_decrypt_one(tfm, dst, src); @@ -131,8 +129,6 @@ static int crypto_pcbc_decrypt_segment(struct skcipher_request *req, dst += bsize; } while ((nbytes -= bsize) >= bsize); - memcpy(walk->iv, iv, bsize); - return nbytes; } @@ -143,7 +139,7 @@ static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req, int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; - u8 *iv = walk->iv; + u8 * const iv = walk->iv; u8 tmpbuf[bsize] __aligned(__alignof__(u32)); do { @@ -155,8 +151,6 @@ static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req, src += bsize; } while ((nbytes -= bsize) >= bsize); - memcpy(walk->iv, iv, bsize); - return nbytes; } From 04e9b13e566d54917591143e0ea477c5808a8383 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Sun, 10 Feb 2019 20:47:49 +0100 Subject: [PATCH 2381/2608] libertas_tf: don't set URB_ZERO_PACKET on IN USB transfer commit 607076a904c435f2677fadaadd4af546279db68b upstream. It doesn't make sense and the USB core warns on each submit of such URB, easily flooding the message buffer with tracebacks. Analogous issue was fixed in regular libertas driver in commit 6528d8804780 ("libertas: don't set URB_ZERO_PACKET on IN USB transfer"). Cc: stable@vger.kernel.org Signed-off-by: Lubomir Rintel Reviewed-by: Steve deRosier Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/libertas_tf/if_usb.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas_tf/if_usb.c b/drivers/net/wireless/marvell/libertas_tf/if_usb.c index e9104eca327b..cae95362efd5 100644 --- a/drivers/net/wireless/marvell/libertas_tf/if_usb.c +++ b/drivers/net/wireless/marvell/libertas_tf/if_usb.c @@ -433,8 +433,6 @@ static int __if_usb_submit_rx_urb(struct if_usb_card *cardp, skb_tail_pointer(skb), MRVDRV_ETH_RX_PACKET_BUFFER_SIZE, callbackfn, cardp); - cardp->rx_urb->transfer_flags |= URB_ZERO_PACKET; - lbtf_deb_usb2(&cardp->udev->dev, "Pointer for rx_urb %p\n", cardp->rx_urb); ret = usb_submit_urb(cardp->rx_urb, GFP_ATOMIC); From b4ea62123624ca194f9d217aecd9d91a4ae28e72 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sun, 10 Feb 2019 05:24:10 +0000 Subject: [PATCH 2382/2608] irqchip/gic-v3-its: Avoid parsing _indirect_ twice for Device table commit 8d565748b6035eeda18895c213396a4c9fac6a4c upstream. In current logic, its_parse_indirect_baser() will be invoked twice when allocating Device tables. Add a *break* to omit the unnecessary and annoying (might be ...) invoking. Fixes: 32bd44dc19de ("irqchip/gic-v3-its: Fix the incorrect parsing of VCPU table size") Cc: stable@vger.kernel.org Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3-its.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index d8ecc90ed1b5..121fb552f873 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1708,6 +1708,8 @@ static int its_alloc_tables(struct its_node *its) indirect = its_parse_indirect_baser(its, baser, psz, &order, its->device_ids); + break; + case GITS_BASER_TYPE_VCPU: indirect = its_parse_indirect_baser(its, baser, psz, &order, From 0bdd16a5bf3892c4dbe1466102effa2c5b0c1fdb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 13 Feb 2019 01:11:19 +0900 Subject: [PATCH 2383/2608] x86/kprobes: Prohibit probing on optprobe template code commit 0192e6535ebe9af68614198ced4fd6d37b778ebf upstream. Prohibit probing on optprobe template code, since it is not a code but a template instruction sequence. If we modify this template, copied template must be broken. Signed-off-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Andrea Righi Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 9326638cbee2 ("kprobes, x86: Use NOKPROBE_SYMBOL() instead of __kprobes annotation") Link: http://lkml.kernel.org/r/154998787911.31052.15274376330136234452.stgit@devbox Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/opt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 3668f28cf5fc..f4e4db0cbd59 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -141,6 +141,11 @@ asm ( void optprobe_template_func(void); STACK_FRAME_NON_STANDARD(optprobe_template_func); +NOKPROBE_SYMBOL(optprobe_template_func); +NOKPROBE_SYMBOL(optprobe_template_entry); +NOKPROBE_SYMBOL(optprobe_template_val); +NOKPROBE_SYMBOL(optprobe_template_call); +NOKPROBE_SYMBOL(optprobe_template_end); #define TMPL_MOVE_IDX \ ((long)&optprobe_template_val - (long)&optprobe_template_entry) From ba43f06f7ae2345c7b905748dba35f53454e832c Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Mon, 4 Feb 2019 02:48:54 -0500 Subject: [PATCH 2384/2608] cpufreq: tegra124: add missing of_node_put() commit 446fae2bb5395f3028d8e3aae1508737e5a72ea1 upstream. of_cpu_device_node_get() will increase the refcount of device_node, it is necessary to call of_node_put() at the end to release the refcount. Fixes: 9eb15dbbfa1a2 ("cpufreq: Add cpufreq driver for Tegra124") Cc: # 4.4+ Signed-off-by: Yangtao Li Acked-by: Thierry Reding Signed-off-by: Viresh Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/tegra124-cpufreq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/tegra124-cpufreq.c b/drivers/cpufreq/tegra124-cpufreq.c index 43530254201a..4bb154f6c54c 100644 --- a/drivers/cpufreq/tegra124-cpufreq.c +++ b/drivers/cpufreq/tegra124-cpufreq.c @@ -134,6 +134,8 @@ static int tegra124_cpufreq_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); + of_node_put(np); + return 0; out_switch_to_pllx: From dd5bee64db1213f90b80aa2328183d1a74d3f267 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 7 Mar 2019 11:22:41 +0100 Subject: [PATCH 2385/2608] cpufreq: pxa2xx: remove incorrect __init annotation commit 9505b98ccddc454008ca7efff90044e3e857c827 upstream. pxa_cpufreq_init_voltages() is marked __init but usually inlined into the non-__init pxa_cpufreq_init() function. When building with clang, it can stay as a standalone function in a discarded section, and produce this warning: WARNING: vmlinux.o(.text+0x616a00): Section mismatch in reference from the function pxa_cpufreq_init() to the function .init.text:pxa_cpufreq_init_voltages() The function pxa_cpufreq_init() references the function __init pxa_cpufreq_init_voltages(). This is often because pxa_cpufreq_init lacks a __init annotation or the annotation of pxa_cpufreq_init_voltages is wrong. Fixes: 50e77fcd790e ("ARM: pxa: remove __init from cpufreq_driver->init()") Signed-off-by: Arnd Bergmann Acked-by: Viresh Kumar Reviewed-by: Nathan Chancellor Acked-by: Robert Jarzmik Cc: All applicable Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/pxa2xx-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c index ce345bf34d5d..a24e9f037865 100644 --- a/drivers/cpufreq/pxa2xx-cpufreq.c +++ b/drivers/cpufreq/pxa2xx-cpufreq.c @@ -192,7 +192,7 @@ static int pxa_cpufreq_change_voltage(const struct pxa_freqs *pxa_freq) return ret; } -static void __init pxa_cpufreq_init_voltages(void) +static void pxa_cpufreq_init_voltages(void) { vcc_core = regulator_get(NULL, "vcc_core"); if (IS_ERR(vcc_core)) { @@ -208,7 +208,7 @@ static int pxa_cpufreq_change_voltage(const struct pxa_freqs *pxa_freq) return 0; } -static void __init pxa_cpufreq_init_voltages(void) { } +static void pxa_cpufreq_init_voltages(void) { } #endif static void find_freq_tables(struct cpufreq_frequency_table **freq_table, From 80988933648438dbb3ec9ba58cf026090c03d540 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Mon, 11 Feb 2019 00:35:06 -0500 Subject: [PATCH 2386/2608] ext4: add mask of ext4 flags to swap commit abdc644e8cbac2e9b19763680e5a7cf9bab2bee7 upstream. The reason is that while swapping two inode, we swap the flags too. Some flags such as EXT4_JOURNAL_DATA_FL can really confuse the things since we're not resetting the address operations structure. The simplest way to keep things sane is to restrict the flags that can be swapped. Signed-off-by: yangerkun Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 3 +++ fs/ext4/ioctl.c | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 02970a2e86a3..95ef26b39e69 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -426,6 +426,9 @@ struct flex_groups { /* Flags that are appropriate for non-directories/regular files. */ #define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL) +/* The only flags that should be swapped */ +#define EXT4_FL_SHOULD_SWAP (EXT4_HUGE_FILE_FL | EXT4_EXTENTS_FL) + /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) { diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index b2a47058e04c..7917cc89ab21 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -61,6 +61,7 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) loff_t isize; struct ext4_inode_info *ei1; struct ext4_inode_info *ei2; + unsigned long tmp; ei1 = EXT4_I(inode1); ei2 = EXT4_I(inode2); @@ -73,7 +74,10 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) swap(inode1->i_mtime, inode2->i_mtime); memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); - swap(ei1->i_flags, ei2->i_flags); + tmp = ei1->i_flags & EXT4_FL_SHOULD_SWAP; + ei1->i_flags = (ei2->i_flags & EXT4_FL_SHOULD_SWAP) | + (ei1->i_flags & ~EXT4_FL_SHOULD_SWAP); + ei2->i_flags = tmp | (ei2->i_flags & ~EXT4_FL_SHOULD_SWAP); swap(ei1->i_disksize, ei2->i_disksize); ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); From 803b8b94731378be23cbada2186883834ab2bebb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 11 Feb 2019 13:30:32 -0500 Subject: [PATCH 2387/2608] ext4: fix crash during online resizing commit f96c3ac8dfc24b4e38fc4c2eba5fea2107b929d1 upstream. When computing maximum size of filesystem possible with given number of group descriptor blocks, we forget to include s_first_data_block into the number of blocks. Thus for filesystems with non-zero s_first_data_block it can happen that computed maximum filesystem size is actually lower than current filesystem size which confuses the code and eventually leads to a BUG_ON in ext4_alloc_group_tables() hitting on flex_gd->count == 0. The problem can be reproduced like: truncate -s 100g /tmp/image mkfs.ext4 -b 1024 -E resize=262144 /tmp/image 32768 mount -t ext4 -o loop /tmp/image /mnt resize2fs /dev/loop0 262145 resize2fs /dev/loop0 300000 Fix the problem by properly including s_first_data_block into the computed number of filesystem blocks. Fixes: 1c6bd7173d66 "ext4: convert file system to meta_bg if needed..." Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 703b516366fd..6f0acfe31418 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1930,7 +1930,8 @@ retry: le16_to_cpu(es->s_reserved_gdt_blocks); n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); n_blocks_count = (ext4_fsblk_t)n_group * - EXT4_BLOCKS_PER_GROUP(sb); + EXT4_BLOCKS_PER_GROUP(sb) + + le32_to_cpu(es->s_first_data_block); n_group--; /* set to last group number */ } From 5e89ae3d9ab7afd5332945255e79e2e2f3275a89 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 26 Feb 2019 08:45:35 -0800 Subject: [PATCH 2388/2608] IB/hfi1: Close race condition on user context disable and close commit bc5add09764c123f58942a37c8335247e683d234 upstream. When disabling and removing a receive context, it is possible for an asynchronous event (i.e IRQ) to occur. Because of this, there is a race between cleaning up the context, and the context being used by the asynchronous event. cpu 0 (context cleanup) rc->ref_count-- (ref_count == 0) hfi1_rcd_free() cpu 1 (IRQ (with rcd index)) rcd_get_by_index() lock ref_count+++ <-- reference count race (WARNING) return rcd unlock cpu 0 hfi1_free_ctxtdata() <-- incorrect free location lock remove rcd from array unlock free rcd This race will cause the following WARNING trace: WARNING: CPU: 0 PID: 175027 at include/linux/kref.h:52 hfi1_rcd_get_by_index+0x84/0xa0 [hfi1] CPU: 0 PID: 175027 Comm: IMB-MPI1 Kdump: loaded Tainted: G OE ------------ 3.10.0-957.el7.x86_64 #1 Hardware name: Intel Corporation S2600KP/S2600KP, BIOS SE5C610.86B.11.01.0076.C4.111920150602 11/19/2015 Call Trace: dump_stack+0x19/0x1b __warn+0xd8/0x100 warn_slowpath_null+0x1d/0x20 hfi1_rcd_get_by_index+0x84/0xa0 [hfi1] is_rcv_urgent_int+0x24/0x90 [hfi1] general_interrupt+0x1b6/0x210 [hfi1] __handle_irq_event_percpu+0x44/0x1c0 handle_irq_event_percpu+0x32/0x80 handle_irq_event+0x3c/0x60 handle_edge_irq+0x7f/0x150 handle_irq+0xe4/0x1a0 do_IRQ+0x4d/0xf0 common_interrupt+0x162/0x162 The race can also lead to a use after free which could be similar to: general protection fault: 0000 1 SMP CPU: 71 PID: 177147 Comm: IMB-MPI1 Kdump: loaded Tainted: G W OE ------------ 3.10.0-957.el7.x86_64 #1 Hardware name: Intel Corporation S2600KP/S2600KP, BIOS SE5C610.86B.11.01.0076.C4.111920150602 11/19/2015 task: ffff9962a8098000 ti: ffff99717a508000 task.ti: ffff99717a508000 __kmalloc+0x94/0x230 Call Trace: ? hfi1_user_sdma_process_request+0x9c8/0x1250 [hfi1] hfi1_user_sdma_process_request+0x9c8/0x1250 [hfi1] hfi1_aio_write+0xba/0x110 [hfi1] do_sync_readv_writev+0x7b/0xd0 do_readv_writev+0xce/0x260 ? handle_mm_fault+0x39d/0x9b0 ? pick_next_task_fair+0x5f/0x1b0 ? sched_clock_cpu+0x85/0xc0 ? __schedule+0x13a/0x890 vfs_writev+0x35/0x60 SyS_writev+0x7f/0x110 system_call_fastpath+0x22/0x27 Use the appropriate kref API to verify access. Reorder context cleanup to ensure context removal before cleanup occurs correctly. Cc: stable@vger.kernel.org # v4.14.0+ Fixes: f683c80ca68e ("IB/hfi1: Resolve kernel panics by reference counting receive contexts") Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/hfi.h | 2 +- drivers/infiniband/hw/hfi1/init.c | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index ee2859dcceab..af550c1767e3 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1398,7 +1398,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, struct hfi1_devdata *dd, u8 hw_pidx, u8 port); void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); int hfi1_rcd_put(struct hfi1_ctxtdata *rcd); -void hfi1_rcd_get(struct hfi1_ctxtdata *rcd); +int hfi1_rcd_get(struct hfi1_ctxtdata *rcd); struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt); int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index ee5cbdfeb3ab..b7481701542e 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -215,12 +215,12 @@ static void hfi1_rcd_free(struct kref *kref) struct hfi1_ctxtdata *rcd = container_of(kref, struct hfi1_ctxtdata, kref); - hfi1_free_ctxtdata(rcd->dd, rcd); - spin_lock_irqsave(&rcd->dd->uctxt_lock, flags); rcd->dd->rcd[rcd->ctxt] = NULL; spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags); + hfi1_free_ctxtdata(rcd->dd, rcd); + kfree(rcd); } @@ -243,10 +243,13 @@ int hfi1_rcd_put(struct hfi1_ctxtdata *rcd) * @rcd: pointer to an initialized rcd data structure * * Use this to get a reference after the init. + * + * Return : reflect kref_get_unless_zero(), which returns non-zero on + * increment, otherwise 0. */ -void hfi1_rcd_get(struct hfi1_ctxtdata *rcd) +int hfi1_rcd_get(struct hfi1_ctxtdata *rcd) { - kref_get(&rcd->kref); + return kref_get_unless_zero(&rcd->kref); } /** @@ -305,7 +308,8 @@ struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt) spin_lock_irqsave(&dd->uctxt_lock, flags); if (dd->rcd[ctxt]) { rcd = dd->rcd[ctxt]; - hfi1_rcd_get(rcd); + if (!hfi1_rcd_get(rcd)) + rcd = NULL; } spin_unlock_irqrestore(&dd->uctxt_lock, flags); From b22d9f07fc559c4a9674d7d0f893899042416467 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Tue, 29 Jan 2019 16:36:18 +0530 Subject: [PATCH 2389/2608] cxl: Wrap iterations over afu slices inside 'afu_list_lock' commit edeb304f659792fb5bab90d7d6f3408b4c7301fb upstream. Within cxl module, iteration over array 'adapter->afu' may be racy at few points as it might be simultaneously read during an EEH and its contents being set to NULL while driver is being unloaded or unbound from the adapter. This might result in a NULL pointer to 'struct afu' being de-referenced during an EEH thereby causing a kernel oops. This patch fixes this by making sure that all access to the array 'adapter->afu' is wrapped within the context of spin-lock 'adapter->afu_list_lock'. Fixes: 9e8df8a21963 ("cxl: EEH support") Cc: stable@vger.kernel.org # v4.3+ Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Acked-by: Christophe Lombard Signed-off-by: Vaibhav Jain Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/guest.c | 2 ++ drivers/misc/cxl/pci.c | 39 ++++++++++++++++++++++++++++++--------- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index f58b4b6c79f2..1a64eb185cfd 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -267,6 +267,7 @@ static int guest_reset(struct cxl *adapter) int i, rc; pr_devel("Adapter reset request\n"); + spin_lock(&adapter->afu_list_lock); for (i = 0; i < adapter->slices; i++) { if ((afu = adapter->afu[i])) { pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT, @@ -283,6 +284,7 @@ static int guest_reset(struct cxl *adapter) pci_error_handlers(afu, CXL_RESUME_EVENT, 0); } } + spin_unlock(&adapter->afu_list_lock); return rc; } diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 2b3fd0a51701..cf069e11d2d2 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -2050,7 +2050,7 @@ static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu, /* There should only be one entry, but go through the list * anyway */ - if (afu->phb == NULL) + if (afu == NULL || afu->phb == NULL) return result; list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { @@ -2077,7 +2077,8 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, { struct cxl *adapter = pci_get_drvdata(pdev); struct cxl_afu *afu; - pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET, afu_result; + pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET; + pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET; int i; /* At this point, we could still have an interrupt pending. @@ -2088,6 +2089,7 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, /* If we're permanently dead, give up. */ if (state == pci_channel_io_perm_failure) { + spin_lock(&adapter->afu_list_lock); for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; /* @@ -2096,6 +2098,7 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, */ cxl_vphb_error_detected(afu, state); } + spin_unlock(&adapter->afu_list_lock); return PCI_ERS_RESULT_DISCONNECT; } @@ -2177,11 +2180,17 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, * * In slot_reset, free the old resources and allocate new ones. * * In resume, clear the flag to allow things to start. */ + + /* Make sure no one else changes the afu list */ + spin_lock(&adapter->afu_list_lock); + for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; - afu_result = cxl_vphb_error_detected(afu, state); + if (afu == NULL) + continue; + afu_result = cxl_vphb_error_detected(afu, state); cxl_context_detach_all(afu); cxl_ops->afu_deactivate_mode(afu, afu->current_mode); pci_deconfigure_afu(afu); @@ -2193,6 +2202,7 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, (result == PCI_ERS_RESULT_NEED_RESET)) result = PCI_ERS_RESULT_NONE; } + spin_unlock(&adapter->afu_list_lock); /* should take the context lock here */ if (cxl_adapter_context_lock(adapter) != 0) @@ -2225,14 +2235,18 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) */ cxl_adapter_context_unlock(adapter); + spin_lock(&adapter->afu_list_lock); for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; + if (afu == NULL) + continue; + if (pci_configure_afu(afu, adapter, pdev)) - goto err; + goto err_unlock; if (cxl_afu_select_best_mode(afu)) - goto err; + goto err_unlock; if (afu->phb == NULL) continue; @@ -2244,16 +2258,16 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) ctx = cxl_get_context(afu_dev); if (ctx && cxl_release_context(ctx)) - goto err; + goto err_unlock; ctx = cxl_dev_context_init(afu_dev); if (IS_ERR(ctx)) - goto err; + goto err_unlock; afu_dev->dev.archdata.cxl_ctx = ctx; if (cxl_ops->afu_check_and_enable(afu)) - goto err; + goto err_unlock; afu_dev->error_state = pci_channel_io_normal; @@ -2274,8 +2288,13 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) result = PCI_ERS_RESULT_DISCONNECT; } } + + spin_unlock(&adapter->afu_list_lock); return result; +err_unlock: + spin_unlock(&adapter->afu_list_lock); + err: /* All the bits that happen in both error_detected and cxl_remove * should be idempotent, so we don't need to worry about leaving a mix @@ -2296,10 +2315,11 @@ static void cxl_pci_resume(struct pci_dev *pdev) * This is not the place to be checking if everything came back up * properly, because there's no return value: do that in slot_reset. */ + spin_lock(&adapter->afu_list_lock); for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; - if (afu->phb == NULL) + if (afu == NULL || afu->phb == NULL) continue; list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { @@ -2308,6 +2328,7 @@ static void cxl_pci_resume(struct pci_dev *pdev) afu_dev->driver->err_handler->resume(afu_dev); } } + spin_unlock(&adapter->afu_list_lock); } static const struct pci_error_handlers cxl_err_handler = { From 7412f34ff819f530461bfe57773f83e1d4419421 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 29 Jan 2019 17:17:24 +0100 Subject: [PATCH 2390/2608] ext2: Fix underflow in ext2_max_size() commit 1c2d14212b15a60300a2d4f6364753e87394c521 upstream. When ext2 filesystem is created with 64k block size, ext2_max_size() will return value less than 0. Also, we cannot write any file in this fs since the sb->maxbytes is less than 0. The core of the problem is that the size of block index tree for such large block size is more than i_blocks can carry. So fix the computation to count with this possibility. File size limits computed with the new function for the full range of possible block sizes look like: bits file_size 10 17247252480 11 275415851008 12 2196873666560 13 2197948973056 14 2198486220800 15 2198754754560 16 2198888906752 CC: stable@vger.kernel.org Reported-by: yangerkun Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext2/super.c | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 726e680a3368..13f470636672 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -754,7 +754,8 @@ static loff_t ext2_max_size(int bits) { loff_t res = EXT2_NDIR_BLOCKS; int meta_blocks; - loff_t upper_limit; + unsigned int upper_limit; + unsigned int ppb = 1 << (bits-2); /* This is calculated to be the largest file size for a * dense, file such that the total number of @@ -768,24 +769,34 @@ static loff_t ext2_max_size(int bits) /* total blocks in file system block size */ upper_limit >>= (bits - 9); - - /* indirect blocks */ - meta_blocks = 1; - /* double indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)); - /* tripple indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); - - upper_limit -= meta_blocks; - upper_limit <<= bits; - + /* Compute how many blocks we can address by block tree */ res += 1LL << (bits-2); res += 1LL << (2*(bits-2)); res += 1LL << (3*(bits-2)); - res <<= bits; - if (res > upper_limit) - res = upper_limit; + /* Does block tree limit file size? */ + if (res < upper_limit) + goto check_lfs; + res = upper_limit; + /* How many metadata blocks are needed for addressing upper_limit? */ + upper_limit -= EXT2_NDIR_BLOCKS; + /* indirect blocks */ + meta_blocks = 1; + upper_limit -= ppb; + /* double indirect blocks */ + if (upper_limit < ppb * ppb) { + meta_blocks += 1 + DIV_ROUND_UP(upper_limit, ppb); + res -= meta_blocks; + goto check_lfs; + } + meta_blocks += 1 + ppb; + upper_limit -= ppb * ppb; + /* tripple indirect blocks for the rest */ + meta_blocks += 1 + DIV_ROUND_UP(upper_limit, ppb) + + DIV_ROUND_UP(upper_limit, ppb*ppb); + res -= meta_blocks; +check_lfs: + res <<= bits; if (res > MAX_LFS_FILESIZE) res = MAX_LFS_FILESIZE; From 9c105991036f1e32e3900029e98c639281818596 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Fri, 8 Feb 2019 11:25:23 +0900 Subject: [PATCH 2391/2608] clk: uniphier: Fix update register for CPU-gear commit 521282237b9d78b9bff423ec818becd4c95841c2 upstream. Need to set the update bit in UNIPHIER_CLK_CPUGEAR_UPD to update the CPU-gear value. Fixes: d08f1f0d596c ("clk: uniphier: add CPU-gear change (cpufreq) support") Cc: linux-stable@vger.kernel.org Signed-off-by: Kunihiko Hayashi Acked-by: Masahiro Yamada Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/uniphier/clk-uniphier-cpugear.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/uniphier/clk-uniphier-cpugear.c b/drivers/clk/uniphier/clk-uniphier-cpugear.c index ec11f55594ad..5d2d42b7e182 100644 --- a/drivers/clk/uniphier/clk-uniphier-cpugear.c +++ b/drivers/clk/uniphier/clk-uniphier-cpugear.c @@ -47,7 +47,7 @@ static int uniphier_clk_cpugear_set_parent(struct clk_hw *hw, u8 index) return ret; ret = regmap_write_bits(gear->regmap, - gear->regbase + UNIPHIER_CLK_CPUGEAR_SET, + gear->regbase + UNIPHIER_CLK_CPUGEAR_UPD, UNIPHIER_CLK_CPUGEAR_UPD_BIT, UNIPHIER_CLK_CPUGEAR_UPD_BIT); if (ret) From 55e62bb2c13c3e05f3ef1cbba96be1db358dd7ce Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 11 Feb 2019 14:59:07 -0800 Subject: [PATCH 2392/2608] clk: clk-twl6040: Fix imprecise external abort for pdmclk commit 5ae51d67aec95f6f9386aa8dd5db424964895575 upstream. I noticed that modprobe clk-twl6040 can fail after a cold boot with: abe_cm:clk:0010:0: failed to enable ... Unhandled fault: imprecise external abort (0x1406) at 0xbe896b20 WARNING: CPU: 1 PID: 29 at drivers/clk/clk.c:828 clk_core_disable_lock+0x18/0x24 ... (clk_core_disable_lock) from [] (_disable_clocks+0x18/0x90) (_disable_clocks) from [] (_idle+0x17c/0x244) (_idle) from [] (omap_hwmod_idle+0x24/0x44) (omap_hwmod_idle) from [] (sysc_runtime_suspend+0x48/0x108) (sysc_runtime_suspend) from [] (__rpm_callback+0x144/0x1d8) (__rpm_callback) from [] (rpm_callback+0x20/0x80) (rpm_callback) from [] (rpm_suspend+0x120/0x694) (rpm_suspend) from [] (__pm_runtime_idle+0x60/0x84) (__pm_runtime_idle) from [] (sysc_probe+0x874/0xf2c) (sysc_probe) from [] (platform_drv_probe+0x48/0x98) After searching around for a similar issue, I came across an earlier fix that never got merged upstream in the Android tree for glass-omap-xrr02. There is patch "MFD: twl6040-codec: Implement PDMCLK cold temp errata" by Misael Lopez Cruz . Based on my observations, this fix is also needed when cold booting devices, and not just for deeper idle modes. Since we now have a clock driver for pdmclk, let's fix the issue in twl6040_pdmclk_prepare(). Cc: Misael Lopez Cruz Cc: Peter Ujfalusi Signed-off-by: Tony Lindgren Acked-by: Peter Ujfalusi Cc: Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clk-twl6040.c | 53 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/drivers/clk/clk-twl6040.c b/drivers/clk/clk-twl6040.c index 7b222a5db931..82d615fe2947 100644 --- a/drivers/clk/clk-twl6040.c +++ b/drivers/clk/clk-twl6040.c @@ -41,6 +41,43 @@ static int twl6040_pdmclk_is_prepared(struct clk_hw *hw) return pdmclk->enabled; } +static int twl6040_pdmclk_reset_one_clock(struct twl6040_pdmclk *pdmclk, + unsigned int reg) +{ + const u8 reset_mask = TWL6040_HPLLRST; /* Same for HPPLL and LPPLL */ + int ret; + + ret = twl6040_set_bits(pdmclk->twl6040, reg, reset_mask); + if (ret < 0) + return ret; + + ret = twl6040_clear_bits(pdmclk->twl6040, reg, reset_mask); + if (ret < 0) + return ret; + + return 0; +} + +/* + * TWL6040A2 Phoenix Audio IC erratum #6: "PDM Clock Generation Issue At + * Cold Temperature". This affects cold boot and deeper idle states it + * seems. The workaround consists of resetting HPPLL and LPPLL. + */ +static int twl6040_pdmclk_quirk_reset_clocks(struct twl6040_pdmclk *pdmclk) +{ + int ret; + + ret = twl6040_pdmclk_reset_one_clock(pdmclk, TWL6040_REG_HPPLLCTL); + if (ret) + return ret; + + ret = twl6040_pdmclk_reset_one_clock(pdmclk, TWL6040_REG_LPPLLCTL); + if (ret) + return ret; + + return 0; +} + static int twl6040_pdmclk_prepare(struct clk_hw *hw) { struct twl6040_pdmclk *pdmclk = container_of(hw, struct twl6040_pdmclk, @@ -48,8 +85,20 @@ static int twl6040_pdmclk_prepare(struct clk_hw *hw) int ret; ret = twl6040_power(pdmclk->twl6040, 1); - if (!ret) - pdmclk->enabled = 1; + if (ret) + return ret; + + ret = twl6040_pdmclk_quirk_reset_clocks(pdmclk); + if (ret) + goto out_err; + + pdmclk->enabled = 1; + + return 0; + +out_err: + dev_err(pdmclk->dev, "%s: error %i\n", __func__, ret); + twl6040_power(pdmclk->twl6040, 0); return ret; } From 3243d364a876ca3e9e12f2ef113a63f803fdd4f2 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 27 Jan 2019 23:09:20 -0300 Subject: [PATCH 2393/2608] clk: ingenic: Fix round_rate misbehaving with non-integer dividers commit bc5d922c93491878c44c9216e9d227c7eeb81d7f upstream. Take a parent rate of 180 MHz, and a requested rate of 4.285715 MHz. This results in a theorical divider of 41.999993 which is then rounded up to 42. The .round_rate function would then return (180 MHz / 42) as the clock, rounded down, so 4.285714 MHz. Calling clk_set_rate on 4.285714 MHz would round the rate again, and give a theorical divider of 42,0000028, now rounded up to 43, and the rate returned would be (180 MHz / 43) which is 4.186046 MHz, aka. not what we requested. Fix this by rounding up the divisions. Signed-off-by: Paul Cercueil Tested-by: Maarten ter Huurne Cc: Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/ingenic/cgu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/clk/ingenic/cgu.c b/drivers/clk/ingenic/cgu.c index ab393637f7b0..a6b4b90ff227 100644 --- a/drivers/clk/ingenic/cgu.c +++ b/drivers/clk/ingenic/cgu.c @@ -364,16 +364,16 @@ ingenic_clk_round_rate(struct clk_hw *hw, unsigned long req_rate, struct ingenic_clk *ingenic_clk = to_ingenic_clk(hw); struct ingenic_cgu *cgu = ingenic_clk->cgu; const struct ingenic_cgu_clk_info *clk_info; - long rate = *parent_rate; + unsigned int div = 1; clk_info = &cgu->clock_info[ingenic_clk->idx]; if (clk_info->type & CGU_CLK_DIV) - rate /= ingenic_clk_calc_div(clk_info, *parent_rate, req_rate); + div = ingenic_clk_calc_div(clk_info, *parent_rate, req_rate); else if (clk_info->type & CGU_CLK_FIXDIV) - rate /= clk_info->fixdiv.div; + div = clk_info->fixdiv.div; - return rate; + return DIV_ROUND_UP(*parent_rate, div); } static int @@ -393,7 +393,7 @@ ingenic_clk_set_rate(struct clk_hw *hw, unsigned long req_rate, if (clk_info->type & CGU_CLK_DIV) { div = ingenic_clk_calc_div(clk_info, parent_rate, req_rate); - rate = parent_rate / div; + rate = DIV_ROUND_UP(parent_rate, div); if (rate != req_rate) return -EINVAL; From bc434ccf9a7bcf6f7b17ef0374758b958b5e7f49 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 27 Jan 2019 23:09:21 -0300 Subject: [PATCH 2394/2608] clk: ingenic: Fix doc of ingenic_cgu_div_info commit 7ca4c922aad2e3c46767a12f80d01c6b25337b59 upstream. The 'div' field does not represent a number of bits used to divide (understand: right-shift) the divider, but a number itself used to divide the divider. Signed-off-by: Paul Cercueil Signed-off-by: Maarten ter Huurne Cc: Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/ingenic/cgu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/ingenic/cgu.h b/drivers/clk/ingenic/cgu.h index e78b586536ea..74ed385309a5 100644 --- a/drivers/clk/ingenic/cgu.h +++ b/drivers/clk/ingenic/cgu.h @@ -78,7 +78,7 @@ struct ingenic_cgu_mux_info { * @reg: offset of the divider control register within the CGU * @shift: number of bits to left shift the divide value by (ie. the index of * the lowest bit of the divide value within its control register) - * @div: number of bits to divide the divider value by (i.e. if the + * @div: number to divide the divider value by (i.e. if the * effective divider value is the value written to the register * multiplied by some constant) * @bits: the size of the divide value in bits From 3d137dfc2f5ad6be6426c2e8bfe62045d00df4f8 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 24 Feb 2019 18:36:22 +0300 Subject: [PATCH 2395/2608] usb: chipidea: tegra: Fix missed ci_hdrc_remove_device() commit 563b9372f7ec57e44e8f9a8600c5107d7ffdd166 upstream. The ChipIdea's platform device need to be unregistered on Tegra's driver module removal. Fixes: dfebb5f43a78827a ("usb: chipidea: Add support for Tegra20/30/114/124") Signed-off-by: Dmitry Osipenko Acked-by: Peter Chen Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_tegra.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/chipidea/ci_hdrc_tegra.c b/drivers/usb/chipidea/ci_hdrc_tegra.c index bfcee2702d50..5cf62fa33762 100644 --- a/drivers/usb/chipidea/ci_hdrc_tegra.c +++ b/drivers/usb/chipidea/ci_hdrc_tegra.c @@ -133,6 +133,7 @@ static int tegra_udc_remove(struct platform_device *pdev) { struct tegra_udc *udc = platform_get_drvdata(pdev); + ci_hdrc_remove_device(udc->dev); usb_phy_set_suspend(udc->phy, 1); clk_disable_unprepare(udc->clk); From fe3174e3b44ab274df6cfd38169f8bc5bbbbcf4d Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 30 Jan 2019 01:23:01 +0000 Subject: [PATCH 2396/2608] nfit: acpi_nfit_ctl(): Check out_obj->type in the right place commit 43f89877f26671c6309cd87d7364b1a3e66e71cf upstream. In the case of ND_CMD_CALL, we should also check out_obj->type. The patch uses out_obj->type, which is a short alias to out_obj->package.type. Fixes: 31eca76ba2fc ("nfit, libnvdimm: limited/whitelisted dimm command marshaling mechanism") Cc: Signed-off-by: Dexuan Cui Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit/core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 4a6c5e7b6835..05fb821c2558 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -329,6 +329,13 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, return -EINVAL; } + if (out_obj->type != ACPI_TYPE_BUFFER) { + dev_dbg(dev, "%s unexpected output object type cmd: %s type: %d\n", + dimm_name, cmd_name, out_obj->type); + rc = -EINVAL; + goto out; + } + if (call_pkg) { call_pkg->nd_fw_size = out_obj->buffer.length; memcpy(call_pkg->nd_payload + call_pkg->nd_size_in, @@ -347,13 +354,6 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, return 0; } - if (out_obj->package.type != ACPI_TYPE_BUFFER) { - dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n", - __func__, dimm_name, cmd_name, out_obj->type); - rc = -EINVAL; - goto out; - } - dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__, dimm_name, cmd_name, out_obj->buffer.length); print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4, From c02fd076842636fb7506d4ca8a2bb3f91faf1af3 Mon Sep 17 00:00:00 2001 From: zhongjiang Date: Tue, 5 Mar 2019 15:41:16 -0800 Subject: [PATCH 2397/2608] mm: hwpoison: fix thp split handing in soft_offline_in_use_page() commit 46612b751c4941c5c0472ddf04027e877ae5990f upstream. When soft_offline_in_use_page() runs on a thp tail page after pmd is split, we trigger the following VM_BUG_ON_PAGE(): Memory failure: 0x3755ff: non anonymous thp __get_any_page: 0x3755ff: unknown zero refcount page type 2fffff80000000 Soft offlining pfn 0x34d805 at process virtual address 0x20fff000 page:ffffea000d360140 count:0 mapcount:0 mapping:0000000000000000 index:0x1 flags: 0x2fffff80000000() raw: 002fffff80000000 ffffea000d360108 ffffea000d360188 0000000000000000 raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: VM_BUG_ON_PAGE(page_ref_count(page) == 0) ------------[ cut here ]------------ kernel BUG at ./include/linux/mm.h:519! soft_offline_in_use_page() passed refcount and page lock from tail page to head page, which is not needed because we can pass any subpage to split_huge_page(). Naoya had fixed a similar issue in c3901e722b29 ("mm: hwpoison: fix thp split handling in memory_failure()"). But he missed fixing soft offline. Link: http://lkml.kernel.org/r/1551452476-24000-1-git-send-email-zhongjiang@huawei.com Fixes: 61f5d698cc97 ("mm: re-enable THP") Signed-off-by: zhongjiang Acked-by: Naoya Horiguchi Cc: Michal Hocko Cc: Hugh Dickins Cc: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: [4.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory-failure.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ef080fa682a6..001b6bfccbfb 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1701,19 +1701,17 @@ static int soft_offline_in_use_page(struct page *page, int flags) struct page *hpage = compound_head(page); if (!PageHuge(page) && PageTransHuge(hpage)) { - lock_page(hpage); - if (!PageAnon(hpage) || unlikely(split_huge_page(hpage))) { - unlock_page(hpage); - if (!PageAnon(hpage)) + lock_page(page); + if (!PageAnon(page) || unlikely(split_huge_page(page))) { + unlock_page(page); + if (!PageAnon(page)) pr_info("soft offline: %#lx: non anonymous thp\n", page_to_pfn(page)); else pr_info("soft offline: %#lx: thp split failed\n", page_to_pfn(page)); - put_hwpoison_page(hpage); + put_hwpoison_page(page); return -EBUSY; } - unlock_page(hpage); - get_hwpoison_page(page); - put_hwpoison_page(hpage); + unlock_page(page); } if (PageHuge(page)) From c877818ada28daa194026068232116e52427d34d Mon Sep 17 00:00:00 2001 From: Roman Penyaev Date: Tue, 5 Mar 2019 15:43:20 -0800 Subject: [PATCH 2398/2608] mm/vmalloc: fix size check for remap_vmalloc_range_partial() commit 401592d2e095947344e10ec0623adbcd58934dd4 upstream. When VM_NO_GUARD is not set area->size includes adjacent guard page, thus for correct size checking get_vm_area_size() should be used, but not area->size. This fixes possible kernel oops when userspace tries to mmap an area on 1 page bigger than was allocated by vmalloc_user() call: the size check inside remap_vmalloc_range_partial() accounts non-existing guard page also, so check successfully passes but vmalloc_to_page() returns NULL (guard page does not physically exist). The following code pattern example should trigger an oops: static int oops_mmap(struct file *file, struct vm_area_struct *vma) { void *mem; mem = vmalloc_user(4096); BUG_ON(!mem); /* Do not care about mem leak */ return remap_vmalloc_range(vma, mem, 0); } And userspace simply mmaps size + PAGE_SIZE: mmap(NULL, 8192, PROT_WRITE|PROT_READ, MAP_PRIVATE, fd, 0); Possible candidates for oops which do not have any explicit size checks: *** drivers/media/usb/stkwebcam/stk-webcam.c: v4l_stk_mmap[789] ret = remap_vmalloc_range(vma, sbuf->buffer, 0); Or the following one: *** drivers/video/fbdev/core/fbmem.c static int fb_mmap(struct file *file, struct vm_area_struct * vma) ... res = fb->fb_mmap(info, vma); Where fb_mmap callback calls remap_vmalloc_range() directly without any explicit checks: *** drivers/video/fbdev/vfb.c static int vfb_mmap(struct fb_info *info, struct vm_area_struct *vma) { return remap_vmalloc_range(vma, (void *)info->fix.smem_start, vma->vm_pgoff); } Link: http://lkml.kernel.org/r/20190103145954.16942-2-rpenyaev@suse.de Signed-off-by: Roman Penyaev Acked-by: Michal Hocko Cc: Andrey Ryabinin Cc: Joe Perches Cc: "Luis R. Rodriguez" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 9ff21a12ea00..8d9f636d0c98 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2262,7 +2262,7 @@ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, if (!(area->flags & VM_USERMAP)) return -EINVAL; - if (kaddr + size > area->addr + area->size) + if (kaddr + size > area->addr + get_vm_area_size(area)) return -EINVAL; do { From 73a79d1bb2d969e7f05fcb724098073ce661a867 Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Mon, 11 Mar 2019 23:28:02 -0700 Subject: [PATCH 2399/2608] kernel/sysctl.c: add missing range check in do_proc_dointvec_minmax_conv commit 8cf7630b29701d364f8df4a50e4f1f5e752b2778 upstream. This bug has apparently existed since the introduction of this function in the pre-git era (4500e91754d3 in Thomas Gleixner's history.git, "[NET]: Add proc_dointvec_userhz_jiffies, use it for proper handling of neighbour sysctls."). As a minimal fix we can simply duplicate the corresponding check in do_proc_dointvec_conv(). Link: http://lkml.kernel.org/r/20190207123426.9202-3-zev@bewilderbeest.net Signed-off-by: Zev Weiss Cc: Brendan Higgins Cc: Iurii Zaikin Cc: Kees Cook Cc: Luis Chamberlain Cc: [2.6.2+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/sysctl.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3ad00bf90b3d..a7acb058b776 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2530,7 +2530,16 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, { struct do_proc_dointvec_minmax_conv_param *param = data; if (write) { - int val = *negp ? -*lvalp : *lvalp; + int val; + if (*negp) { + if (*lvalp > (unsigned long) INT_MAX + 1) + return -EINVAL; + val = -*lvalp; + } else { + if (*lvalp > (unsigned long) INT_MAX) + return -EINVAL; + val = *lvalp; + } if ((param->min && *param->min > val) || (param->max && *param->max < val)) return -EINVAL; From dfdac22666d229538f499c5d56d38c05b4859f1e Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 23 Jan 2019 17:44:16 +0300 Subject: [PATCH 2400/2608] device property: Fix the length used in PROPERTY_ENTRY_STRING() commit 2b6e492467c78183bb629bb0a100ea3509b615a5 upstream. With string type property entries we need to use sizeof(const char *) instead of the number of characters as the length of the entry. If the string was shorter then sizeof(const char *), attempts to read it would have failed with -EOVERFLOW. The problem has been hidden because all build-in string properties have had a string longer then 8 characters until now. Fixes: a85f42047533 ("device property: helper macros for property entry creation") Cc: 4.5+ # 4.5+ Signed-off-by: Heikki Krogerus Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- include/linux/property.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/property.h b/include/linux/property.h index 89d94b349912..45777ec1c524 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -252,7 +252,7 @@ struct property_entry { #define PROPERTY_ENTRY_STRING(_name_, _val_) \ (struct property_entry) { \ .name = _name_, \ - .length = sizeof(_val_), \ + .length = sizeof(const char *), \ .is_string = true, \ { .value = { .str = _val_ } }, \ } From c31c530c051db1e807f4b37cfd7dc179b9af3d09 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 24 Jan 2019 15:11:53 +0200 Subject: [PATCH 2401/2608] intel_th: Don't reference unassigned outputs commit 9ed3f22223c33347ed963e7c7019cf2956dd4e37 upstream. When an output port driver is removed, also remove references to it from any masters. Failing to do this causes a NULL ptr dereference when configuring another output port: > BUG: unable to handle kernel NULL pointer dereference at 000000000000000d > RIP: 0010:master_attr_store+0x9d/0x160 [intel_th_gth] > Call Trace: > dev_attr_store+0x1b/0x30 > sysfs_kf_write+0x3c/0x50 > kernfs_fop_write+0x125/0x1a0 > __vfs_write+0x3a/0x190 > ? __vfs_write+0x5/0x190 > ? _cond_resched+0x1a/0x50 > ? rcu_all_qs+0x5/0xb0 > ? __vfs_write+0x5/0x190 > vfs_write+0xb8/0x1b0 > ksys_write+0x55/0xc0 > __x64_sys_write+0x1a/0x20 > do_syscall_64+0x5a/0x140 > entry_SYSCALL_64_after_hwframe+0x44/0xa9 Signed-off-by: Alexander Shishkin Fixes: b27a6a3f97b9 ("intel_th: Add Global Trace Hub driver") CC: stable@vger.kernel.org # v4.4+ Reported-by: Ammy Yi Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/gth.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hwtracing/intel_th/gth.c b/drivers/hwtracing/intel_th/gth.c index 018678ec3c13..bb27a3150563 100644 --- a/drivers/hwtracing/intel_th/gth.c +++ b/drivers/hwtracing/intel_th/gth.c @@ -615,6 +615,7 @@ static void intel_th_gth_unassign(struct intel_th_device *thdev, { struct gth_device *gth = dev_get_drvdata(&thdev->dev); int port = othdev->output.port; + int master; if (thdev->host_mode) return; @@ -623,6 +624,9 @@ static void intel_th_gth_unassign(struct intel_th_device *thdev, othdev->output.port = -1; othdev->output.active = false; gth->output[port].output = NULL; + for (master = 0; master < TH_CONFIGURABLE_MASTERS; master++) + if (gth->master[master] == port) + gth->master[master] = -1; spin_unlock(>h->gth_lock); } From 89541fbb582889cfe2c1ee9524b5fe12ed0a61a7 Mon Sep 17 00:00:00 2001 From: QiaoChong Date: Sat, 9 Feb 2019 20:59:07 +0000 Subject: [PATCH 2402/2608] parport_pc: fix find_superio io compare code, should use equal test. commit 21698fd57984cd28207d841dbdaa026d6061bceb upstream. In the original code before 181bf1e815a2 the loop was continuing until it finds the first matching superios[i].io and p->base. But after 181bf1e815a2 the logic changed and the loop now returns the pointer to the first mismatched array element which is then used in get_superio_dma() and get_superio_irq() and thus returning the wrong value. Fix the condition so that it now returns the correct pointer. Fixes: 181bf1e815a2 ("parport_pc: clean up the modified while loops using for") Cc: Alan Cox Cc: stable@vger.kernel.org Signed-off-by: QiaoChong [rewrite the commit message] Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/parport/parport_pc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 380916bff9e0..dee5b9e35ffd 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -1377,7 +1377,7 @@ static struct superio_struct *find_superio(struct parport *p) { int i; for (i = 0; i < NR_SUPERIOS; i++) - if (superios[i].io != p->base) + if (superios[i].io == p->base) return &superios[i]; return NULL; } From ab9b91dc416ce2f27f5c089134d2016f2f7eeee9 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Tue, 12 Feb 2019 11:06:44 -0800 Subject: [PATCH 2403/2608] i2c: tegra: fix maximum transfer size commit f4e3f4ae1d9c9330de355f432b69952e8cef650c upstream. Tegra186 and prior supports maximum 4K bytes per packet transfer including 12 bytes of packet header. This patch fixes max write length limit to account packet header size for transfers. Cc: stable@vger.kernel.org # 4.4+ Reviewed-by: Dmitry Osipenko Signed-off-by: Sowjanya Komatineni Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index ec2d11af6c78..b90f1512f59c 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -794,7 +794,7 @@ static const struct i2c_algorithm tegra_i2c_algo = { /* payload size is only 12 bit */ static const struct i2c_adapter_quirks tegra_i2c_quirks = { .max_read_len = 4096, - .max_write_len = 4096, + .max_write_len = 4096 - 12, }; static const struct tegra_i2c_hw_feature tegra20_i2c_hw = { From 8b8a3be516679ee805e5f6122853606ba6f933a4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 31 Jan 2019 23:51:42 -0800 Subject: [PATCH 2404/2608] crypto: arm64/aes-neonbs - fix returning final keystream block commit 12455e320e19e9cc7ad97f4ab89c280fe297387c upstream. The arm64 NEON bit-sliced implementation of AES-CTR fails the improved skcipher tests because it sometimes produces the wrong ciphertext. The bug is that the final keystream block isn't returned from the assembly code when the number of non-final blocks is zero. This can happen if the input data ends a few bytes after a page boundary. In this case the last bytes get "encrypted" by XOR'ing them with uninitialized memory. Fix the assembly code to return the final keystream block when needed. Fixes: 88a3f582bea9 ("crypto: arm64/aes - don't use IV buffer to return final keystream block") Cc: # v4.11+ Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/aes-neonbs-core.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index ca0472500433..3b18e3e79531 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -940,7 +940,7 @@ CPU_LE( rev x8, x8 ) 8: next_ctr v0 cbnz x4, 99b -0: st1 {v0.16b}, [x5] + st1 {v0.16b}, [x5] ldp x29, x30, [sp], #16 ret @@ -948,6 +948,9 @@ CPU_LE( rev x8, x8 ) * If we are handling the tail of the input (x6 != NULL), return the * final keystream block back to the caller. */ +0: cbz x6, 8b + st1 {v0.16b}, [x6] + b 8b 1: cbz x6, 8b st1 {v1.16b}, [x6] b 8b From c44d8741258d577e0d4d59d1ce3bcf7d4c03bc95 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 5 Mar 2019 11:04:08 +0000 Subject: [PATCH 2405/2608] drm/i915: Relax mmap VMA check [ Upstream commit ca22f32a6296cbfa29de56328c8505560a18cfa8 ] Legacy behaviour was to allow non-page-aligned mmap requests, as does the linux mmap(2) implementation by virtue of automatically rounding up for the caller. To avoid breaking legacy userspace relax the newly introduced fix. Signed-off-by: Tvrtko Ursulin Fixes: 5c4604e757ba ("drm/i915: Prevent a race during I915_GEM_MMAP ioctl with WC set") Reported-by: Guenter Roeck Cc: Adam Zabrocki Cc: Joonas Lahtinen Cc: # v4.0+ Cc: Akash Goel Cc: Chris Wilson Cc: Jani Nikula Cc: Rodrigo Vivi Cc: intel-gfx@lists.freedesktop.org Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190305110409.28633-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit a90e1948efb648f567444f87f3c19b2a0787affd) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/i915_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5d8a67c65141..727018a16cca 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1640,7 +1640,8 @@ __vma_matches(struct vm_area_struct *vma, struct file *filp, if (vma->vm_file != filp) return false; - return vma->vm_start == addr && (vma->vm_end - vma->vm_start) == size; + return vma->vm_start == addr && + (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size); } /** From 3b2a3fe67bf8884a97910f8e80fcc58ac2a582dc Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Fri, 15 Feb 2019 18:45:08 +0200 Subject: [PATCH 2406/2608] serial: uartps: Fix stuck ISR if RX disabled with non-empty FIFO commit 7abab1605139bc41442864c18f9573440f7ca105 upstream. If RX is disabled while there are still unprocessed bytes in RX FIFO, cdns_uart_handle_rx() called from interrupt handler will get stuck in the receive loop as read bytes will not get removed from the RX FIFO and CDNS_UART_SR_RXEMPTY bit will never get set. Avoid the stuck handler by checking first if RX is disabled. port->lock protects against race with RX-disabling functions. This HW behavior was mentioned by Nathan Rossi in 43e98facc4a3 ("tty: xuartps: Fix RX hang, and TX corruption in termios call") which fixed a similar issue in cdns_uart_set_termios(). The behavior can also be easily verified by e.g. setting CDNS_UART_CR_RX_DIS at the beginning of cdns_uart_handle_rx() - the following loop will then get stuck. Resetting the FIFO using RXRST would not set RXEMPTY either so simply issuing a reset after RX-disable would not work. I observe this frequently on a ZynqMP board during heavy RX load at 1M baudrate when the reader process exits and thus RX gets disabled. Fixes: 61ec9016988f ("tty/serial: add support for Xilinx PS UART") Signed-off-by: Anssi Hannula Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/xilinx_uartps.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index 217686cb4cd3..f438a2158006 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -366,7 +366,13 @@ static irqreturn_t cdns_uart_isr(int irq, void *dev_id) cdns_uart_handle_tx(dev_id); isrstatus &= ~CDNS_UART_IXR_TXEMPTY; } - if (isrstatus & CDNS_UART_IXR_RXMASK) + + /* + * Skip RX processing if RX is disabled as RXEMPTY will never be set + * as read bytes will not be removed from the FIFO. + */ + if (isrstatus & CDNS_UART_IXR_RXMASK && + !(readl(port->membase + CDNS_UART_CR) & CDNS_UART_CR_RX_DIS)) cdns_uart_handle_rx(dev_id, isrstatus); spin_unlock(&port->lock); From afb6cf5eb55bb49b110e9826a03b777f428ce2be Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Sun, 24 Feb 2019 13:00:53 +0100 Subject: [PATCH 2407/2608] serial: 8250_of: assume reg-shift of 2 for mrvl,mmp-uart commit f4817843e39ce78aace0195a57d4e8500a65a898 upstream. There are two other drivers that bind to mrvl,mmp-uart and both of them assume register shift of 2 bits. There are device trees that lack the property and rely on that assumption. If this driver wins the race to bind to those devices, it should behave the same as the older deprecated driver. Signed-off-by: Lubomir Rintel Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_of.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index 3613a6aabfb3..ec510e342e06 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -105,6 +105,10 @@ static int of_platform_serial_setup(struct platform_device *ofdev, if (of_property_read_u32(np, "reg-offset", &prop) == 0) port->mapbase += prop; + /* Compatibility with the deprecated pxa driver and 8250_pxa drivers. */ + if (of_device_is_compatible(np, "mrvl,mmp-uart")) + port->regshift = 2; + /* Check for registers offset within the devices address range */ if (of_property_read_u32(np, "reg-shift", &prop) == 0) port->regshift = prop; From bad49519b26aa493ee78420cd9d687e90178b00c Mon Sep 17 00:00:00 2001 From: Jay Dolan Date: Tue, 12 Feb 2019 21:43:11 -0800 Subject: [PATCH 2408/2608] serial: 8250_pci: Fix number of ports for ACCES serial cards commit b896b03bc7fce43a07012cc6bf5e2ab2fddf3364 upstream. Have the correct number of ports created for ACCES serial cards. Two port cards show up as four ports, and four port cards show up as eight. Fixes: c8d192428f52 ("serial: 8250: added acces i/o products quad and octal serial cards") Signed-off-by: Jay Dolan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 36 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 790375b5eeb2..6b437713564d 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -4580,10 +4580,10 @@ static const struct pci_device_id serial_pci_tbl[] = { */ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SDB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2S, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SDB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7954 }, @@ -4592,10 +4592,10 @@ static const struct pci_device_id serial_pci_tbl[] = { pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_2DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7954 }, @@ -4604,10 +4604,10 @@ static const struct pci_device_id serial_pci_tbl[] = { pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SMDB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2SM, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SMDB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7954 }, @@ -4616,13 +4616,13 @@ static const struct pci_device_id serial_pci_tbl[] = { pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7951 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7954 }, @@ -4631,16 +4631,16 @@ static const struct pci_device_id serial_pci_tbl[] = { pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2S, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7954 }, @@ -4649,13 +4649,13 @@ static const struct pci_device_id serial_pci_tbl[] = { pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2SM, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM422_4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7958 }, + pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM485_4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7958 }, + pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM422_8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7958 }, @@ -4664,19 +4664,19 @@ static const struct pci_device_id serial_pci_tbl[] = { pbn_pericom_PI7C9X7958 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7958 }, + pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7958 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SM, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7958 }, + pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_8SM, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7958 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4SM, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7958 }, + pbn_pericom_PI7C9X7954 }, /* * Topic TP560 Data/Fax/Voice 56k modem (reported by Evan Clarke) */ From 05d787ebe2f02b734e32b61d3ebfc024ae996683 Mon Sep 17 00:00:00 2001 From: Jay Dolan Date: Tue, 12 Feb 2019 21:43:12 -0800 Subject: [PATCH 2409/2608] serial: 8250_pci: Have ACCES cards that use the four port Pericom PI7C9X7954 chip use the pci_pericom_setup() commit 78d3820b9bd39028727c6aab7297b63c093db343 upstream. The four port Pericom chips have the fourth port at the wrong address. Make use of quirk to fix it. Fixes: c8d192428f52 ("serial: 8250: added acces i/o products quad and octal serial cards") Cc: stable Signed-off-by: Jay Dolan Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 105 +++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 6b437713564d..b31fed7f1679 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -2033,6 +2033,111 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .setup = pci_default_setup, .exit = pci_plx9050_exit, }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SDB, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4S, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4DB, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_4, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SMDB, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4SM, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_4, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_4, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_COM422_4, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_COM485_4, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SM, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4SM, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, /* * SBS Technologies, Inc., PMC-OCTALPRO 232 */ From 8c87c8633163f92f141b66556473332eb49e78f0 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Sun, 10 Feb 2019 23:23:04 -0500 Subject: [PATCH 2410/2608] jbd2: clear dirty flag when revoking a buffer from an older transaction commit 904cdbd41d749a476863a0ca41f6f396774f26e4 upstream. Now, we capture a data corruption problem on ext4 while we're truncating an extent index block. Imaging that if we are revoking a buffer which has been journaled by the committing transaction, the buffer's jbddirty flag will not be cleared in jbd2_journal_forget(), so the commit code will set the buffer dirty flag again after refile the buffer. fsx kjournald2 jbd2_journal_commit_transaction jbd2_journal_revoke commit phase 1~5... jbd2_journal_forget belongs to older transaction commit phase 6 jbddirty not clear __jbd2_journal_refile_buffer __jbd2_journal_unfile_buffer test_clear_buffer_jbddirty mark_buffer_dirty Finally, if the freed extent index block was allocated again as data block by some other files, it may corrupt the file data after writing cached pages later, such as during unmount time. (In general, clean_bdev_aliases() related helpers should be invoked after re-allocation to prevent the above corruption, but unfortunately we missed it when zeroout the head of extra extent blocks in ext4_ext_handle_unwritten_extents()). This patch mark buffer as freed and set j_next_transaction to the new transaction when it already belongs to the committing transaction in jbd2_journal_forget(), so that commit code knows it should clear dirty bits when it is done with the buffer. This problem can be reproduced by xfstests generic/455 easily with seeds (3246 3247 3248 3249). Signed-off-by: zhangyi (F) Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/transaction.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e42736c1fdc8..9a835da3dda9 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1581,14 +1581,21 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) /* However, if the buffer is still owned by a prior * (committing) transaction, we can't drop it yet... */ JBUFFER_TRACE(jh, "belongs to older transaction"); - /* ... but we CAN drop it from the new transaction if we - * have also modified it since the original commit. */ + /* ... but we CAN drop it from the new transaction through + * marking the buffer as freed and set j_next_transaction to + * the new transaction, so that not only the commit code + * knows it should clear dirty bits when it is done with the + * buffer, but also the buffer can be checkpointed only + * after the new transaction commits. */ - if (jh->b_next_transaction) { - J_ASSERT(jh->b_next_transaction == transaction); + set_buffer_freed(bh); + + if (!jh->b_next_transaction) { spin_lock(&journal->j_list_lock); - jh->b_next_transaction = NULL; + jh->b_next_transaction = transaction; spin_unlock(&journal->j_list_lock); + } else { + J_ASSERT(jh->b_next_transaction == transaction); /* * only drop a reference if this transaction modified From e1e3101a0c142c2234f7fded816685e3e4a437df Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Thu, 21 Feb 2019 11:24:09 -0500 Subject: [PATCH 2411/2608] jbd2: fix compile warning when using JBUFFER_TRACE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 01215d3edb0f384ddeaa5e4a22c1ae5ff634149f upstream. The jh pointer may be used uninitialized in the two cases below and the compiler complain about it when enabling JBUFFER_TRACE macro, fix them. In file included from fs/jbd2/transaction.c:19:0: fs/jbd2/transaction.c: In function ‘jbd2_journal_get_undo_access’: ./include/linux/jbd2.h:1637:38: warning: ‘jh’ is used uninitialized in this function [-Wuninitialized] #define JBUFFER_TRACE(jh, info) do { printk("%s: %d\n", __func__, jh->b_jcount);} while (0) ^ fs/jbd2/transaction.c:1219:23: note: ‘jh’ was declared here struct journal_head *jh; ^ In file included from fs/jbd2/transaction.c:19:0: fs/jbd2/transaction.c: In function ‘jbd2_journal_dirty_metadata’: ./include/linux/jbd2.h:1637:38: warning: ‘jh’ may be used uninitialized in this function [-Wmaybe-uninitialized] #define JBUFFER_TRACE(jh, info) do { printk("%s: %d\n", __func__, jh->b_jcount);} while (0) ^ fs/jbd2/transaction.c:1332:23: note: ‘jh’ was declared here struct journal_head *jh; ^ Signed-off-by: zhangyi (F) Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Reviewed-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/transaction.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 9a835da3dda9..650927f0a2dc 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1224,11 +1224,12 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) struct journal_head *jh; char *committed_data = NULL; - JBUFFER_TRACE(jh, "entry"); if (jbd2_write_access_granted(handle, bh, true)) return 0; jh = jbd2_journal_add_journal_head(bh); + JBUFFER_TRACE(jh, "entry"); + /* * Do this first --- it can drop the journal lock, so we want to * make sure that obtaining the committed_data is done @@ -1339,15 +1340,17 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) if (is_handle_aborted(handle)) return -EROFS; - if (!buffer_jbd(bh)) { - ret = -EUCLEAN; - goto out; - } + if (!buffer_jbd(bh)) + return -EUCLEAN; + /* * We don't grab jh reference here since the buffer must be part * of the running transaction. */ jh = bh2jh(bh); + jbd_debug(5, "journal_head %p\n", jh); + JBUFFER_TRACE(jh, "entry"); + /* * This and the following assertions are unreliable since we may see jh * in inconsistent state unless we grab bh_state lock. But this is @@ -1381,9 +1384,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) } journal = transaction->t_journal; - jbd_debug(5, "journal_head %p\n", jh); - JBUFFER_TRACE(jh, "entry"); - jbd_lock_bh_state(bh); if (jh->b_modified == 0) { From af9e57baf5210fe0263f41bfa20394ecb355be67 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 5 Mar 2019 16:17:58 -0500 Subject: [PATCH 2412/2608] security/selinux: fix SECURITY_LSM_NATIVE_LABELS on reused superblock commit 3815a245b50124f0865415dcb606a034e97494d4 upstream. In the case when we're reusing a superblock, selinux_sb_clone_mnt_opts() fails to set set_kern_flags, with the result that nfs_clone_sb_security() incorrectly clears NFS_CAP_SECURITY_LABEL. The result is that if you mount the same NFS filesystem twice, NFS security labels are turned off, even if they would work fine if you mounted the filesystem only once. ("fixes" may be not exactly the right tag, it may be more like "fixed-other-cases-but-missed-this-one".) Cc: Scott Mayhew Cc: stable@vger.kernel.org Fixes: 0b4d3452b8b4 "security/selinux: allow security_sb_clone_mnt_opts..." Signed-off-by: J. Bruce Fields Acked-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/hooks.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index d6b9ed34ceae..a5d9c0146ac3 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1000,8 +1000,11 @@ static int selinux_sb_clone_mnt_opts(const struct super_block *oldsb, BUG_ON(!(oldsbsec->flags & SE_SBINITIALIZED)); /* if fs is reusing a sb, make sure that the contexts match */ - if (newsbsec->flags & SE_SBINITIALIZED) + if (newsbsec->flags & SE_SBINITIALIZED) { + if ((kern_flags & SECURITY_LSM_NATIVE_LABELS) && !set_context) + *set_kern_flags |= SECURITY_LSM_NATIVE_LABELS; return selinux_cmp_sb_context(oldsb, newsb); + } mutex_lock(&newsbsec->lock); From 3725c7cc05f0a6ab73e3ecc915df3378d6b34862 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 27 Feb 2019 11:45:30 +0000 Subject: [PATCH 2413/2608] powerpc/32: Clear on-stack exception marker upon exception return commit 9580b71b5a7863c24a9bd18bcd2ad759b86b1eff upstream. Clear the on-stack STACK_FRAME_REGS_MARKER on exception exit in order to avoid confusing stacktrace like the one below. Call Trace: [c0e9dca0] [c01c42a0] print_address_description+0x64/0x2bc (unreliable) [c0e9dcd0] [c01c4684] kasan_report+0xfc/0x180 [c0e9dd10] [c0895130] memchr+0x24/0x74 [c0e9dd30] [c00a9e38] msg_print_text+0x124/0x574 [c0e9dde0] [c00ab710] console_unlock+0x114/0x4f8 [c0e9de40] [c00adc60] vprintk_emit+0x188/0x1c4 --- interrupt: c0e9df00 at 0x400f330 LR = init_stack+0x1f00/0x2000 [c0e9de80] [c00ae3c4] printk+0xa8/0xcc (unreliable) [c0e9df20] [c0c27e44] early_irq_init+0x38/0x108 [c0e9df50] [c0c15434] start_kernel+0x310/0x488 [c0e9dff0] [00003484] 0x3484 With this patch the trace becomes: Call Trace: [c0e9dca0] [c01c42c0] print_address_description+0x64/0x2bc (unreliable) [c0e9dcd0] [c01c46a4] kasan_report+0xfc/0x180 [c0e9dd10] [c0895150] memchr+0x24/0x74 [c0e9dd30] [c00a9e58] msg_print_text+0x124/0x574 [c0e9dde0] [c00ab730] console_unlock+0x114/0x4f8 [c0e9de40] [c00adc80] vprintk_emit+0x188/0x1c4 [c0e9de80] [c00ae3e4] printk+0xa8/0xcc [c0e9df20] [c0c27e44] early_irq_init+0x38/0x108 [c0e9df50] [c0c15434] start_kernel+0x310/0x488 [c0e9dff0] [00003484] 0x3484 Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/entry_32.S | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index e780e1fbf6c2..4ae464b9d490 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -726,6 +726,9 @@ fast_exception_return: mtcr r10 lwz r10,_LINK(r11) mtlr r10 + /* Clear the exception_marker on the stack to avoid confusing stacktrace */ + li r10, 0 + stw r10, 8(r11) REST_GPR(10, r11) #ifdef CONFIG_PPC_8xx_PERF_EVENT mtspr SPRN_NRI, r0 @@ -963,6 +966,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) mtcrf 0xFF,r10 mtlr r11 + /* Clear the exception_marker on the stack to avoid confusing stacktrace */ + li r10, 0 + stw r10, 8(r1) /* * Once we put values in SRR0 and SRR1, we are in a state * where exceptions are not recoverable, since taking an @@ -1002,6 +1008,9 @@ exc_exit_restart_end: mtlr r11 lwz r10,_CCR(r1) mtcrf 0xff,r10 + /* Clear the exception_marker on the stack to avoid confusing stacktrace */ + li r10, 0 + stw r10, 8(r1) REST_2GPRS(9, r1) .globl exc_exit_restart exc_exit_restart: From 39394decd8f4d48ab558726f86589bf5c7d3e4da Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 21 Feb 2019 19:08:37 +0000 Subject: [PATCH 2414/2608] powerpc/wii: properly disable use of BATs when requested. commit 6d183ca8baec983dc4208ca45ece3c36763df912 upstream. 'nobats' kernel parameter or some options like CONFIG_DEBUG_PAGEALLOC deny the use of BATS for mapping memory. This patch makes sure that the specific wii RAM mapping function takes it into account as well. Fixes: de32400dd26e ("wii: use both mem1 and mem2 as ram") Cc: stable@vger.kernel.org Reviewed-by: Jonathan Neuschafer Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/embedded6xx/wii.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 3fd683e40bc9..2914529c0695 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -104,6 +104,10 @@ unsigned long __init wii_mmu_mapin_mem2(unsigned long top) /* MEM2 64MB@0x10000000 */ delta = wii_hole_start + wii_hole_size; size = top - delta; + + if (__map_without_bats) + return delta; + for (bl = 128<<10; bl < max_size; bl <<= 1) { if (bl * 2 > size) break; From b49eb6061584f9a715c0353b1455e5803269138a Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Wed, 27 Feb 2019 14:02:29 +1100 Subject: [PATCH 2415/2608] powerpc/powernv: Make opal log only readable by root commit 7b62f9bd2246b7d3d086e571397c14ba52645ef1 upstream. Currently the opal log is globally readable. It is kernel policy to limit the visibility of physical addresses / kernel pointers to root. Given this and the fact the opal log may contain this information it would be better to limit the readability to root. Fixes: bfc36894a48b ("powerpc/powernv: Add OPAL message log interface") Cc: stable@vger.kernel.org # v3.15+ Signed-off-by: Jordan Niethe Reviewed-by: Stewart Smith Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/opal-msglog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index 7a9cde0cfbd1..2ee7af22138e 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -98,7 +98,7 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, } static struct bin_attribute opal_msglog_attr = { - .attr = {.name = "msglog", .mode = 0444}, + .attr = {.name = "msglog", .mode = 0400}, .read = opal_msglog_read }; From 4214580444f0a17a56e676f34dc0da0eb0ddd83b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 25 Jan 2019 12:03:55 +0000 Subject: [PATCH 2416/2608] powerpc/83xx: Also save/restore SPRG4-7 during suspend commit 36da5ff0bea2dc67298150ead8d8471575c54c7d upstream. The 83xx has 8 SPRG registers and uses at least SPRG4 for DTLB handling LRU. Fixes: 2319f1239592 ("powerpc/mm: e300c2/c3/c4 TLB errata workaround") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/83xx/suspend-asm.S | 34 ++++++++++++++++++----- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S b/arch/powerpc/platforms/83xx/suspend-asm.S index 3d1ecd211776..8137f77abad5 100644 --- a/arch/powerpc/platforms/83xx/suspend-asm.S +++ b/arch/powerpc/platforms/83xx/suspend-asm.S @@ -26,13 +26,13 @@ #define SS_MSR 0x74 #define SS_SDR1 0x78 #define SS_LR 0x7c -#define SS_SPRG 0x80 /* 4 SPRGs */ -#define SS_DBAT 0x90 /* 8 DBATs */ -#define SS_IBAT 0xd0 /* 8 IBATs */ -#define SS_TB 0x110 -#define SS_CR 0x118 -#define SS_GPREG 0x11c /* r12-r31 */ -#define STATE_SAVE_SIZE 0x16c +#define SS_SPRG 0x80 /* 8 SPRGs */ +#define SS_DBAT 0xa0 /* 8 DBATs */ +#define SS_IBAT 0xe0 /* 8 IBATs */ +#define SS_TB 0x120 +#define SS_CR 0x128 +#define SS_GPREG 0x12c /* r12-r31 */ +#define STATE_SAVE_SIZE 0x17c .section .data .align 5 @@ -103,6 +103,16 @@ _GLOBAL(mpc83xx_enter_deep_sleep) stw r7, SS_SPRG+12(r3) stw r8, SS_SDR1(r3) + mfspr r4, SPRN_SPRG4 + mfspr r5, SPRN_SPRG5 + mfspr r6, SPRN_SPRG6 + mfspr r7, SPRN_SPRG7 + + stw r4, SS_SPRG+16(r3) + stw r5, SS_SPRG+20(r3) + stw r6, SS_SPRG+24(r3) + stw r7, SS_SPRG+28(r3) + mfspr r4, SPRN_DBAT0U mfspr r5, SPRN_DBAT0L mfspr r6, SPRN_DBAT1U @@ -493,6 +503,16 @@ mpc83xx_deep_resume: mtspr SPRN_IBAT7U, r6 mtspr SPRN_IBAT7L, r7 + lwz r4, SS_SPRG+16(r3) + lwz r5, SS_SPRG+20(r3) + lwz r6, SS_SPRG+24(r3) + lwz r7, SS_SPRG+28(r3) + + mtspr SPRN_SPRG4, r4 + mtspr SPRN_SPRG5, r5 + mtspr SPRN_SPRG6, r6 + mtspr SPRN_SPRG7, r7 + lwz r4, SS_SPRG+0(r3) lwz r5, SS_SPRG+4(r3) lwz r6, SS_SPRG+8(r3) From 20913ee182e1edb944e58148061c36b369d6bcb8 Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Fri, 8 Feb 2019 14:33:19 +0000 Subject: [PATCH 2417/2608] powerpc: Fix 32-bit KVM-PR lockup and host crash with MacOS guest commit fe1ef6bcdb4fca33434256a802a3ed6aacf0bd2f upstream. Commit 8792468da5e1 "powerpc: Add the ability to save FPU without giving it up" unexpectedly removed the MSR_FE0 and MSR_FE1 bits from the bitmask used to update the MSR of the previous thread in __giveup_fpu() causing a KVM-PR MacOS guest to lockup and panic the host kernel. Leaving FE0/1 enabled means unrelated processes might receive FPEs when they're not expecting them and crash. In particular if this happens to init the host will then panic. eg (transcribed): qemu-system-ppc[837]: unhandled signal 8 at 12cc9ce4 nip 12cc9ce4 lr 12cc9ca4 code 0 systemd[1]: unhandled signal 8 at 202f02e0 nip 202f02e0 lr 001003d4 code 0 Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b Reinstate these bits to the MSR bitmask to enable MacOS guests to run under 32-bit KVM-PR once again without issue. Fixes: 8792468da5e1 ("powerpc: Add the ability to save FPU without giving it up") Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Mark Cave-Ayland Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a0c74bbf3454..b10531372d7f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -156,7 +156,7 @@ void __giveup_fpu(struct task_struct *tsk) save_fpu(tsk); msr = tsk->thread.regs->msr; - msr &= ~MSR_FP; + msr &= ~(MSR_FP|MSR_FE0|MSR_FE1); #ifdef CONFIG_VSX if (cpu_has_feature(CPU_FTR_VSX)) msr &= ~MSR_VSX; From 1f9c3f20a3327358e8cbfcfd30178627fa90d382 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 14 Feb 2019 11:08:29 +1100 Subject: [PATCH 2418/2608] powerpc/ptrace: Simplify vr_get/set() to avoid GCC warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ca6d5149d2ad0a8d2f9c28cbe379802260a0a5e0 upstream. GCC 8 warns about the logic in vr_get/set(), which with -Werror breaks the build: In function ‘user_regset_copyin’, inlined from ‘vr_set’ at arch/powerpc/kernel/ptrace.c:628:9: include/linux/regset.h:295:4: error: ‘memcpy’ offset [-527, -529] is out of the bounds [0, 16] of object ‘vrsave’ with type ‘union ’ [-Werror=array-bounds] arch/powerpc/kernel/ptrace.c: In function ‘vr_set’: arch/powerpc/kernel/ptrace.c:623:5: note: ‘vrsave’ declared here } vrsave; This has been identified as a regression in GCC, see GCC bug 88273. However we can avoid the warning and also simplify the logic and make it more robust. Currently we pass -1 as end_pos to user_regset_copyout(). This says "copy up to the end of the regset". The definition of the regset is: [REGSET_VMX] = { .core_note_type = NT_PPC_VMX, .n = 34, .size = sizeof(vector128), .align = sizeof(vector128), .active = vr_active, .get = vr_get, .set = vr_set }, The end is calculated as (n * size), ie. 34 * sizeof(vector128). In vr_get/set() we pass start_pos as 33 * sizeof(vector128), meaning we can copy up to sizeof(vector128) into/out-of vrsave. The on-stack vrsave is defined as: union { elf_vrreg_t reg; u32 word; } vrsave; And elf_vrreg_t is: typedef __vector128 elf_vrreg_t; So there is no bug, but we rely on all those sizes lining up, otherwise we would have a kernel stack exposure/overwrite on our hands. Rather than relying on that we can pass an explict end_pos based on the sizeof(vrsave). The result should be exactly the same but it's more obviously not over-reading/writing the stack and it avoids the compiler warning. Reported-by: Meelis Roos Reported-by: Mathieu Malaterre Cc: stable@vger.kernel.org Tested-by: Mathieu Malaterre Tested-by: Meelis Roos Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/ptrace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 81750d9624ab..bfc5f59d9f1b 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -547,6 +547,7 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset, /* * Copy out only the low-order word of vrsave. */ + int start, end; union { elf_vrreg_t reg; u32 word; @@ -555,8 +556,10 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset, vrsave.word = target->thread.vrsave; + start = 33 * sizeof(vector128); + end = start + sizeof(vrsave); ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, - 33 * sizeof(vector128), -1); + start, end); } return ret; @@ -594,6 +597,7 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset, /* * We use only the first word of vrsave. */ + int start, end; union { elf_vrreg_t reg; u32 word; @@ -602,8 +606,10 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset, vrsave.word = target->thread.vrsave; + start = 33 * sizeof(vector128); + end = start + sizeof(vrsave); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, - 33 * sizeof(vector128), -1); + start, end); if (!ret) target->thread.vrsave = vrsave.word; } From a22b34c99a67d2222cc2df86211d63fe1b352a15 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 22 Feb 2019 22:55:31 +0530 Subject: [PATCH 2419/2608] powerpc/hugetlb: Don't do runtime allocation of 16G pages in LPAR configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 35f2806b481f5b9207f25e1886cba5d1c4d12cc7 upstream. We added runtime allocation of 16G pages in commit 4ae279c2c96a ("powerpc/mm/hugetlb: Allow runtime allocation of 16G.") That was done to enable 16G allocation on PowerNV and KVM config. In case of KVM config, we mostly would have the entire guest RAM backed by 16G hugetlb pages for this to work. PAPR do support partial backing of guest RAM with hugepages via ibm,expected#pages node of memory node in the device tree. This means rest of the guest RAM won't be backed by 16G contiguous pages in the host and hence a hash page table insertion can fail in such case. An example error message will look like hash-mmu: mm: Hashing failure ! EA=0x7efc00000000 access=0x8000000000000006 current=readback hash-mmu: trap=0x300 vsid=0x67af789 ssize=1 base psize=14 psize 14 pte=0xc000000400000386 readback[12260]: unhandled signal 7 at 00007efc00000000 nip 00000000100012d0 lr 000000001000127c code 2 This patch address that by preventing runtime allocation of 16G hugepages in LPAR config. To allocate 16G hugetlb one need to kernel command line hugepagesz=16G hugepages= With radix translation mode we don't run into this issue. This change will prevent runtime allocation of 16G hugetlb pages on kvm with hash translation mode. However, with the current upstream it was observed that 16G hugetlbfs backed guest doesn't boot at all. We observe boot failure with the below message: [131354.647546] KVM: map_vrma at 0 failed, ret=-4 That means this patch is not resulting in an observable regression. Once we fix the boot issue with 16G hugetlb backed memory, we need to use ibm,expected#pages memory node attribute to indicate 16G page reservation to the guest. This will also enable partial backing of guest RAM with 16G pages. Fixes: 4ae279c2c96a ("powerpc/mm/hugetlb: Allow runtime allocation of 16G.") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/book3s/64/hugetlb.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index c459f937d484..8438df443540 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -55,6 +55,14 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE static inline bool gigantic_page_supported(void) { + /* + * We used gigantic page reservation with hypervisor assist in some case. + * We cannot use runtime allocation of gigantic pages in those platforms + * This is hash translation mode LPARs. + */ + if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled()) + return false; + return true; } #endif From bfb576fa7c56fabae9ac645e30d7cd7db6fee4cf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Jan 2019 14:11:24 +0000 Subject: [PATCH 2420/2608] powerpc/traps: fix recoverability of machine check handling on book3s/32 commit 0bbea75c476b77fa7d7811d6be911cc7583e640f upstream. Looks like book3s/32 doesn't set RI on machine check, so checking RI before calling die() will always be fatal allthought this is not an issue in most cases. Fixes: b96672dd840f ("powerpc: Machine check interrupt is a non-maskable interrupt") Fixes: daf00ae71dad ("powerpc/traps: restore recoverability of machine_check interrupts") Signed-off-by: Christophe Leroy Cc: stable@vger.kernel.org Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/traps.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index a5f2b7593976..0322d5689461 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -694,15 +694,15 @@ void machine_check_exception(struct pt_regs *regs) if (check_io_access(regs)) goto bail; - /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) - nmi_panic(regs, "Unrecoverable Machine check"); - if (!nested) nmi_exit(); die("Machine check", regs, SIGBUS); + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + nmi_panic(regs, "Unrecoverable Machine check"); + return; bail: From a8cae6144a25054875d0ab6e31f550af7e531500 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 29 Jan 2019 16:37:55 +0000 Subject: [PATCH 2421/2608] powerpc/traps: Fix the message printed when stack overflows commit 9bf3d3c4e4fd82c7174f4856df372ab2a71005b9 upstream. Today's message is useless: [ 42.253267] Kernel stack overflow in process (ptrval), r1=c65500b0 This patch fixes it: [ 66.905235] Kernel stack overflow in process sh[356], r1=c65560b0 Fixes: ad67b74d2469 ("printk: hash addresses printed with %p") Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Christophe Leroy [mpe: Use task_pid_nr()] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 0322d5689461..3c9457420aee 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1292,8 +1292,8 @@ void slb_miss_bad_addr(struct pt_regs *regs) void StackOverflow(struct pt_regs *regs) { - printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n", - current, regs->gpr[1]); + pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n", + current->comm, task_pid_nr(current), regs->gpr[1]); debugger(regs); show_regs(regs); panic("kernel stack overflow"); From 6f419070c83cb27dc1ad4085181938712a583d2f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 3 Jan 2019 14:14:08 -0600 Subject: [PATCH 2422/2608] ARM: s3c24xx: Fix boolean expressions in osiris_dvs_notify commit e2477233145f2156434afb799583bccd878f3e9f upstream. Fix boolean expressions by using logical AND operator '&&' instead of bitwise operator '&'. This issue was detected with the help of Coccinelle. Fixes: 4fa084af28ca ("ARM: OSIRIS: DVS (Dynamic Voltage Scaling) supoort.") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva [krzk: Fix -Wparentheses warning] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-s3c24xx/mach-osiris-dvs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-s3c24xx/mach-osiris-dvs.c b/arch/arm/mach-s3c24xx/mach-osiris-dvs.c index 6cac7da15e2b..2e8ad83beda8 100644 --- a/arch/arm/mach-s3c24xx/mach-osiris-dvs.c +++ b/arch/arm/mach-s3c24xx/mach-osiris-dvs.c @@ -70,16 +70,16 @@ static int osiris_dvs_notify(struct notifier_block *nb, switch (val) { case CPUFREQ_PRECHANGE: - if (old_dvs & !new_dvs || - cur_dvs & !new_dvs) { + if ((old_dvs && !new_dvs) || + (cur_dvs && !new_dvs)) { pr_debug("%s: exiting dvs\n", __func__); cur_dvs = false; gpio_set_value(OSIRIS_GPIO_DVS, 1); } break; case CPUFREQ_POSTCHANGE: - if (!old_dvs & new_dvs || - !cur_dvs & new_dvs) { + if ((!old_dvs && new_dvs) || + (!cur_dvs && new_dvs)) { pr_debug("entering dvs\n"); cur_dvs = true; gpio_set_value(OSIRIS_GPIO_DVS, 0); From a8aac659b9652430ccf898dd61bc6f996e3aef9d Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 31 Jan 2019 14:58:39 +0000 Subject: [PATCH 2423/2608] arm64: Fix HCR.TGE status for NMI contexts commit 5870970b9a828d8693aa6d15742573289d7dbcd0 upstream. When using VHE, the host needs to clear HCR_EL2.TGE bit in order to interact with guest TLBs, switching from EL2&0 translation regime to EL1&0. However, some non-maskable asynchronous event could happen while TGE is cleared like SDEI. Because of this address translation operations relying on EL2&0 translation regime could fail (tlb invalidation, userspace access, ...). Fix this by properly setting HCR_EL2.TGE when entering NMI context and clear it if necessary when returning to the interrupted context. Signed-off-by: Julien Thierry Suggested-by: Marc Zyngier Reviewed-by: Marc Zyngier Reviewed-by: James Morse Cc: Arnd Bergmann Cc: Will Deacon Cc: Marc Zyngier Cc: James Morse Cc: linux-arch@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/hardirq.h | 31 +++++++++++++++++++++++++++++++ arch/arm64/kernel/irq.c | 3 +++ include/linux/hardirq.h | 7 +++++++ 3 files changed, 41 insertions(+) diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 1473fc2f7ab7..89691c86640a 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -17,8 +17,12 @@ #define __ASM_HARDIRQ_H #include +#include #include +#include #include +#include +#include #define NR_IPI 7 @@ -37,6 +41,33 @@ u64 smp_irq_stat_cpu(unsigned int cpu); #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 +struct nmi_ctx { + u64 hcr; +}; + +DECLARE_PER_CPU(struct nmi_ctx, nmi_contexts); + +#define arch_nmi_enter() \ + do { \ + if (is_kernel_in_hyp_mode()) { \ + struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ + nmi_ctx->hcr = read_sysreg(hcr_el2); \ + if (!(nmi_ctx->hcr & HCR_TGE)) { \ + write_sysreg(nmi_ctx->hcr | HCR_TGE, hcr_el2); \ + isb(); \ + } \ + } \ + } while (0) + +#define arch_nmi_exit() \ + do { \ + if (is_kernel_in_hyp_mode()) { \ + struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ + if (!(nmi_ctx->hcr & HCR_TGE)) \ + write_sysreg(nmi_ctx->hcr, hcr_el2); \ + } \ + } while (0) + static inline void ack_bad_irq(unsigned int irq) { extern unsigned long irq_err_count; diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 713561e5bcab..b91abb8f7cd4 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -32,6 +32,9 @@ unsigned long irq_err_count; +/* Only access this in an NMI enter/exit */ +DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts); + DEFINE_PER_CPU(unsigned long *, irq_stack_ptr); int arch_show_interrupts(struct seq_file *p, int prec) diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 0fbbcdf0c178..da0af631ded5 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -60,8 +60,14 @@ extern void irq_enter(void); */ extern void irq_exit(void); +#ifndef arch_nmi_enter +#define arch_nmi_enter() do { } while (0) +#define arch_nmi_exit() do { } while (0) +#endif + #define nmi_enter() \ do { \ + arch_nmi_enter(); \ printk_nmi_enter(); \ lockdep_off(); \ ftrace_nmi_enter(); \ @@ -80,6 +86,7 @@ extern void irq_exit(void); ftrace_nmi_exit(); \ lockdep_on(); \ printk_nmi_exit(); \ + arch_nmi_exit(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ From d5ed64073c4bc4254c0fe32d2b3d013d45299b89 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 1 Mar 2019 13:28:01 +0000 Subject: [PATCH 2424/2608] arm64: debug: Ensure debug handlers check triggering exception level commit 6bd288569b50bc89fa5513031086746968f585cb upstream. Debug exception handlers may be called for exceptions generated both by user and kernel code. In many cases, this is checked explicitly, but in other cases things either happen to work by happy accident or they go slightly wrong. For example, executing 'brk #4' from userspace will enter the kprobes code and be ignored, but the instruction will be retried forever in userspace instead of delivering a SIGTRAP. Fix this issue in the most stable-friendly fashion by simply adding explicit checks of the triggering exception level to all of our debug exception handlers. Cc: Reviewed-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/kgdb.c | 14 ++++++++++---- arch/arm64/kernel/probes/kprobes.c | 6 ++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 2122cd187f19..470afb3a04ca 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -233,27 +233,33 @@ int kgdb_arch_handle_exception(int exception_vector, int signo, static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) { + if (user_mode(regs)) + return DBG_HOOK_ERROR; + kgdb_handle_exception(1, SIGTRAP, 0, regs); - return 0; + return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_brk_fn) static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) { + if (user_mode(regs)) + return DBG_HOOK_ERROR; + compiled_break = 1; kgdb_handle_exception(1, SIGTRAP, 0, regs); - return 0; + return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_compiled_brk_fn); static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) { - if (!kgdb_single_step) + if (user_mode(regs) || !kgdb_single_step) return DBG_HOOK_ERROR; kgdb_handle_exception(1, SIGTRAP, 0, regs); - return 0; + return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_step_brk_fn); diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 7d8c33279e9f..6a6d661f38fb 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -458,6 +458,9 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); int retval; + if (user_mode(regs)) + return DBG_HOOK_ERROR; + /* return error if this is not our step */ retval = kprobe_ss_hit(kcb, instruction_pointer(regs)); @@ -474,6 +477,9 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) int __kprobes kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) { + if (user_mode(regs)) + return DBG_HOOK_ERROR; + kprobe_handler(regs); return DBG_HOOK_HANDLED; } From 5b8749bd1154c4b3425d2aa6060ebaa21c866603 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 21 Feb 2019 11:42:32 +0000 Subject: [PATCH 2425/2608] arm64: KVM: Fix architecturally invalid reset value for FPEXC32_EL2 commit c88b093693ccbe41991ef2e9b1d251945e6e54ed upstream. Due to what looks like a typo dating back to the original addition of FPEXC32_EL2 handling, KVM currently initialises this register to an architecturally invalid value. As a result, the VECITR field (RES1) in bits [10:8] is initialised with 0, and the two reserved (RES0) bits [6:5] are initialised with 1. (In the Common VFP Subarchitecture as specified by ARMv7-A, these two bits were IMP DEF. ARMv8-A removes them.) This patch changes the reset value from 0x70 to 0x700, which reflects the architectural constraints and is presumably what was originally intended. Cc: # 4.12.x- Cc: Christoffer Dall Fixes: 62a89c44954f ("arm64: KVM: 32bit handling of coprocessor traps") Signed-off-by: Dave Martin Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/sys_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 2e070d3baf9f..cfbf7bd0dfba 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1079,7 +1079,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 }, { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 }, - { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x70 }, + { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 }, }; static bool trap_dbgidr(struct kvm_vcpu *vcpu, From 8259d5615683288dd06e46ee6ece2fb225ae9ff7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 6 Jan 2019 21:06:25 +1100 Subject: [PATCH 2426/2608] dm: fix to_sector() for 32bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0bdb50c531f7377a9da80d3ce2d61f389c84cb30 upstream. A dm-raid array with devices larger than 4GB won't assemble on a 32 bit host since _check_data_dev_sectors() was added in 4.16. This is because to_sector() treats its argument as an "unsigned long" which is 32bits (4GB) on a 32bit host. Using "unsigned long long" is more correct. Kernels as early as 4.2 can have other problems due to to_sector() being used on the size of a device. Fixes: 0cf4503174c1 ("dm raid: add support for the MD RAID0 personality") cc: stable@vger.kernel.org (v4.2+) Reported-and-tested-by: Guillaume Perréal Signed-off-by: NeilBrown Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- include/linux/device-mapper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a5538433c927..91a063a1f3b3 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -630,7 +630,7 @@ do { \ */ #define dm_target_offset(ti, sector) ((sector) - (ti)->begin) -static inline sector_t to_sector(unsigned long n) +static inline sector_t to_sector(unsigned long long n) { return (n >> SECTOR_SHIFT); } From e71d3d35c8fa02d400d197f32cf3137aca491d79 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 6 Mar 2019 08:29:34 -0500 Subject: [PATCH 2427/2608] dm integrity: limit the rate of error messages commit 225557446856448039a9e495da37b72c20071ef2 upstream. When using dm-integrity underneath md-raid, some tests with raid auto-correction trigger large amounts of integrity failures - and all these failures print an error message. These messages can bring the system to a halt if the system is using serial console. Fix this by limiting the rate of error messages - it improves the speed of raid recovery and avoids the hang. Fixes: 7eada909bfd7a ("dm: add integrity target") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index b10e4c5641ea..da4baea9cf83 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1276,8 +1276,8 @@ again: checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { if (r > 0) { - DMERR("Checksum failed at sector 0x%llx", - (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size))); + DMERR_LIMIT("Checksum failed at sector 0x%llx", + (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size))); r = -EILSEQ; atomic64_inc(&ic->number_of_mismatches); } @@ -1469,8 +1469,8 @@ retry_kmap: integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack); if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) { - DMERR("Checksum failed when reading from journal, at sector 0x%llx", - (unsigned long long)logical_sector); + DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx", + (unsigned long long)logical_sector); } } #endif From 00648da11e418f8fa96ec213dbbe44a6dc8a0710 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 27 Dec 2018 20:52:21 +0100 Subject: [PATCH 2428/2608] cpcap-charger: generate events for userspace commit fd10606f93a149a9f3d37574e5385b083b4a7b32 upstream. The driver doesn't generate uevents on charger connect/disconnect. This leads to UPower not detecting when AC is on or off... and that is bad. Reported by Arthur D. on github ( https://github.com/maemo-leste/bugtracker/issues/206 ), thanks to Merlijn Wajer for suggesting a fix. Cc: stable@kernel.org Signed-off-by: Pavel Machek Acked-by: Tony Lindgren Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/cpcap-charger.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/supply/cpcap-charger.c b/drivers/power/supply/cpcap-charger.c index 11a07633de6c..aa469ccc3b14 100644 --- a/drivers/power/supply/cpcap-charger.c +++ b/drivers/power/supply/cpcap-charger.c @@ -458,6 +458,7 @@ static void cpcap_usb_detect(struct work_struct *work) goto out_err; } + power_supply_changed(ddata->usb); return; out_err: From d6bce5aed476ff66ab91d732c4c7220aa95e68c3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Feb 2019 09:21:38 -0500 Subject: [PATCH 2429/2608] NFS: Fix I/O request leakages commit f57dcf4c72113c745d83f1c65f7291299f65c14f upstream. When we fail to add the request to the I/O queue, we currently leave it to the caller to free the failed request. However since some of the requests that fail are actually created by nfs_pageio_add_request() itself, and are not passed back the caller, this leads to a leakage issue, which can again cause page locks to leak. This commit addresses the leakage by freeing the created requests on error, using desc->pg_completion_ops->error_cleanup() Signed-off-by: Trond Myklebust Fixes: a7d42ddb30997 ("nfs: add mirroring support to pgio layer") Cc: stable@vger.kernel.org # v4.0: c18b96a1b862: nfs: clean up rest of reqs Cc: stable@vger.kernel.org # v4.0: d600ad1f2bdb: NFS41: pop some layoutget Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pagelist.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 37f20d7a26ed..67c4eaf50311 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -988,6 +988,17 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) } } +static void +nfs_pageio_cleanup_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) +{ + LIST_HEAD(head); + + nfs_list_remove_request(req); + nfs_list_add_request(req, &head); + desc->pg_completion_ops->error_cleanup(&head); +} + /** * nfs_pageio_add_request - Attempt to coalesce a request into a page list. * @desc: destination io descriptor @@ -1025,10 +1036,8 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, nfs_page_group_unlock(req); desc->pg_moreio = 1; nfs_pageio_doio(desc); - if (desc->pg_error < 0) - return 0; - if (mirror->pg_recoalesce) - return 0; + if (desc->pg_error < 0 || mirror->pg_recoalesce) + goto out_cleanup_subreq; /* retry add_request for this subreq */ nfs_page_group_lock(req); continue; @@ -1061,6 +1070,10 @@ err_ptr: desc->pg_error = PTR_ERR(subreq); nfs_page_group_unlock(req); return 0; +out_cleanup_subreq: + if (req != subreq) + nfs_pageio_cleanup_request(desc, subreq); + return 0; } static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) @@ -1168,11 +1181,14 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, if (nfs_pgio_has_mirroring(desc)) desc->pg_mirror_idx = midx; if (!nfs_pageio_add_request_mirror(desc, dupreq)) - goto out_failed; + goto out_cleanup_subreq; } return 1; +out_cleanup_subreq: + if (req != dupreq) + nfs_pageio_cleanup_request(desc, dupreq); out_failed: /* remember fatal errors */ if (nfs_error_is_fatal(desc->pg_error)) From 86e9e3773445044ec227b0ace13772bc43d9fc24 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Feb 2019 14:59:52 -0500 Subject: [PATCH 2430/2608] NFS: Fix an I/O request leakage in nfs_do_recoalesce commit 4d91969ed4dbcefd0e78f77494f0cb8fada9048a upstream. Whether we need to exit early, or just reprocess the list, we must not lost track of the request which failed to get recoalesced. Fixes: 03d5eb65b538 ("NFS: Fix a memory leak in nfs_do_recoalesce") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pagelist.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 67c4eaf50311..dfed48dc216f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1092,7 +1092,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) struct nfs_page *req; req = list_first_entry(&head, struct nfs_page, wb_list); - nfs_list_remove_request(req); if (__nfs_pageio_add_request(desc, req)) continue; if (desc->pg_error < 0) { From 9aaa2ef05a43367fdf9ec35cf7f586631c824ac8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Feb 2019 16:08:25 -0500 Subject: [PATCH 2431/2608] NFS: Don't recoalesce on error in nfs_pageio_complete_mirror() commit 8127d82705998568b52ac724e28e00941538083d upstream. If the I/O completion failed with a fatal error, then we should just exit nfs_pageio_complete_mirror() rather than try to recoalesce. Fixes: a7d42ddb3099 ("nfs: add mirroring support to pgio layer") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pagelist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index dfed48dc216f..28b013d1d44a 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1213,7 +1213,7 @@ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, desc->pg_mirror_idx = mirror_idx; for (;;) { nfs_pageio_doio(desc); - if (!mirror->pg_recoalesce) + if (desc->pg_error < 0 || !mirror->pg_recoalesce) break; if (!nfs_do_recoalesce(desc)) break; From c0edc300600aceb76909c503898668cdac9326f4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 4 Mar 2019 14:08:22 +1100 Subject: [PATCH 2432/2608] nfsd: fix memory corruption caused by readdir commit b602345da6cbb135ba68cf042df8ec9a73da7981 upstream. If the result of an NFSv3 readdir{,plus} request results in the "offset" on one entry having to be split across 2 pages, and is sized so that the next directory entry doesn't fit in the requested size, then memory corruption can happen. When encode_entry() is called after encoding the last entry that fits, it notices that ->offset and ->offset1 are set, and so stores the offset value in the two pages as required. It clears ->offset1 but *does not* clear ->offset. Normally this omission doesn't matter as encode_entry_baggage() will be called, and will set ->offset to a suitable value (not on a page boundary). But in the case where cd->buflen < elen and nfserr_toosmall is returned, ->offset is not reset. This means that nfsd3proc_readdirplus will see ->offset with a value 4 bytes before the end of a page, and ->offset1 set to NULL. It will try to write 8bytes to ->offset. If we are lucky, the next page will be read-only, and the system will BUG: unable to handle kernel paging request at... If we are unlucky, some innocent page will have the first 4 bytes corrupted. nfsd3proc_readdir() doesn't even check for ->offset1, it just blindly writes 8 bytes to the offset wherever it is. Fix this by clearing ->offset after it is used, and copying the ->offset handling code from nfsd3_proc_readdirplus into nfsd3_proc_readdir. (Note that the commit hash in the Fixes tag is from the 'history' tree - this bug predates git). Fixes: 0b1d57cf7654 ("[PATCH] kNFSd: Fix nfs3 dentry encoding") Fixes-URL: https://git.kernel.org/pub/scm/linux/kernel/git/history/history.git/commit/?id=0b1d57cf7654 Cc: stable@vger.kernel.org (v2.6.12+) Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs3proc.c | 16 ++++++++++++++-- fs/nfsd/nfs3xdr.c | 1 + 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 1d0ce3c57d93..c0de4d6cd857 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -446,8 +446,19 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) &resp->common, nfs3svc_encode_entry); memcpy(resp->verf, argp->verf, 8); resp->count = resp->buffer - argp->buffer; - if (resp->offset) - xdr_encode_hyper(resp->offset, argp->cookie); + if (resp->offset) { + loff_t offset = argp->cookie; + + if (unlikely(resp->offset1)) { + /* we ended up with offset on a page boundary */ + *resp->offset = htonl(offset >> 32); + *resp->offset1 = htonl(offset & 0xffffffff); + resp->offset1 = NULL; + } else { + xdr_encode_hyper(resp->offset, offset); + } + resp->offset = NULL; + } RETURN_STATUS(nfserr); } @@ -516,6 +527,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) } else { xdr_encode_hyper(resp->offset, offset); } + resp->offset = NULL; } RETURN_STATUS(nfserr); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index f38acd905441..ef3e7878456c 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -922,6 +922,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, } else { xdr_encode_hyper(cd->offset, offset64); } + cd->offset = NULL; } /* From 2fa903f97d335362c7def3bca5781656afe32920 Mon Sep 17 00:00:00 2001 From: Yihao Wu Date: Wed, 6 Mar 2019 21:03:50 +0800 Subject: [PATCH 2433/2608] nfsd: fix wrong check in write_v4_end_grace() commit dd838821f0a29781b185cd8fb8e48d5c177bd838 upstream. Commit 62a063b8e7d1 "nfsd4: fix crash on writing v4_end_grace before nfsd startup" is trying to fix a NULL dereference issue, but it mistakenly checks if the nfsd server is started. So fix it. Fixes: 62a063b8e7d1 "nfsd4: fix crash on writing v4_end_grace before nfsd startup" Cc: stable@vger.kernel.org Reviewed-by: Joseph Qi Signed-off-by: Yihao Wu Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfsctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 4b8ebcc6b183..d44402241d9e 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1126,7 +1126,7 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) case 'Y': case 'y': case '1': - if (nn->nfsd_serv) + if (!nn->nfsd_serv) return -EBUSY; nfsd4_end_grace(nn); break; From 086135b25b925a8142b9c612eddc87d2d19c2faa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 1 Mar 2019 12:13:34 -0500 Subject: [PATCH 2434/2608] NFSv4.1: Reinitialise sequence results before retransmitting a request commit c1dffe0bf7f9c3d57d9f237a7cb2a81e62babd2b upstream. If we have to retransmit a request, we should ensure that we reinitialise the sequence results structure, since in the event of a signal we need to treat the request as if it had not been sent. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a3b67d3b1dfb..9041a892701f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -914,6 +914,13 @@ nfs4_sequence_process_interrupted(struct nfs_client *client, #endif /* !CONFIG_NFS_V4_1 */ +static void nfs41_sequence_res_init(struct nfs4_sequence_res *res) +{ + res->sr_timestamp = jiffies; + res->sr_status_flags = 0; + res->sr_status = 1; +} + static void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, @@ -925,10 +932,6 @@ void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args, args->sa_slot = slot; res->sr_slot = slot; - res->sr_timestamp = jiffies; - res->sr_status_flags = 0; - res->sr_status = 1; - } int nfs4_setup_sequence(struct nfs_client *client, @@ -974,6 +977,7 @@ int nfs4_setup_sequence(struct nfs_client *client, trace_nfs4_setup_sequence(session, args); out_start: + nfs41_sequence_res_init(res); rpc_call_start(task); return 0; From 4e2a61f23c6f7882fec43b9e24eccdad2bee49ec Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 8 Mar 2019 15:23:11 +0530 Subject: [PATCH 2435/2608] PM / wakeup: Rework wakeup source timer cancellation commit 1fad17fb1bbcd73159c2b992668a6957ecc5af8a upstream. If wakeup_source_add() is called right after wakeup_source_remove() for the same wakeup source, timer_setup() may be called for a potentially scheduled timer which is incorrect. To avoid that, move the wakeup source timer cancellation from wakeup_source_drop() to wakeup_source_remove(). Moreover, make wakeup_source_remove() clear the timer function after canceling the timer to let wakeup_source_not_registered() treat unregistered wakeup sources in the same way as the ones that have never been registered. Signed-off-by: Viresh Kumar Cc: 4.4+ # 4.4+ [ rjw: Subject, changelog, merged two patches together ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/wakeup.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index cdd6f256da59..df53e2b3296b 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -113,7 +113,6 @@ void wakeup_source_drop(struct wakeup_source *ws) if (!ws) return; - del_timer_sync(&ws->timer); __pm_relax(ws); } EXPORT_SYMBOL_GPL(wakeup_source_drop); @@ -201,6 +200,13 @@ void wakeup_source_remove(struct wakeup_source *ws) list_del_rcu(&ws->entry); spin_unlock_irqrestore(&events_lock, flags); synchronize_srcu(&wakeup_srcu); + + del_timer_sync(&ws->timer); + /* + * Clear timer.function to make wakeup_source_not_registered() treat + * this wakeup source as not registered. + */ + ws->timer.function = NULL; } EXPORT_SYMBOL_GPL(wakeup_source_remove); From c78123c7ff469fbd4cd5069b2a80d7bfb07047d6 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Sat, 9 Feb 2019 12:52:53 +0800 Subject: [PATCH 2436/2608] bcache: never writeback a discard operation commit 9951379b0ca88c95876ad9778b9099e19a95d566 upstream. Some users see panics like the following when performing fstrim on a bcached volume: [ 529.803060] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 530.183928] #PF error: [normal kernel read fault] [ 530.412392] PGD 8000001f42163067 P4D 8000001f42163067 PUD 1f42168067 PMD 0 [ 530.750887] Oops: 0000 [#1] SMP PTI [ 530.920869] CPU: 10 PID: 4167 Comm: fstrim Kdump: loaded Not tainted 5.0.0-rc1+ #3 [ 531.290204] Hardware name: HP ProLiant DL360 Gen9/ProLiant DL360 Gen9, BIOS P89 12/27/2015 [ 531.693137] RIP: 0010:blk_queue_split+0x148/0x620 [ 531.922205] Code: 60 38 89 55 a0 45 31 db 45 31 f6 45 31 c9 31 ff 89 4d 98 85 db 0f 84 7f 04 00 00 44 8b 6d 98 4c 89 ee 48 c1 e6 04 49 03 70 78 <8b> 46 08 44 8b 56 0c 48 8b 16 44 29 e0 39 d8 48 89 55 a8 0f 47 c3 [ 532.838634] RSP: 0018:ffffb9b708df39b0 EFLAGS: 00010246 [ 533.093571] RAX: 00000000ffffffff RBX: 0000000000046000 RCX: 0000000000000000 [ 533.441865] RDX: 0000000000000200 RSI: 0000000000000000 RDI: 0000000000000000 [ 533.789922] RBP: ffffb9b708df3a48 R08: ffff940d3b3fdd20 R09: 0000000000000000 [ 534.137512] R10: ffffb9b708df3958 R11: 0000000000000000 R12: 0000000000000000 [ 534.485329] R13: 0000000000000000 R14: 0000000000000000 R15: ffff940d39212020 [ 534.833319] FS: 00007efec26e3840(0000) GS:ffff940d1f480000(0000) knlGS:0000000000000000 [ 535.224098] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 535.504318] CR2: 0000000000000008 CR3: 0000001f4e256004 CR4: 00000000001606e0 [ 535.851759] Call Trace: [ 535.970308] ? mempool_alloc_slab+0x15/0x20 [ 536.174152] ? bch_data_insert+0x42/0xd0 [bcache] [ 536.403399] blk_mq_make_request+0x97/0x4f0 [ 536.607036] generic_make_request+0x1e2/0x410 [ 536.819164] submit_bio+0x73/0x150 [ 536.980168] ? submit_bio+0x73/0x150 [ 537.149731] ? bio_associate_blkg_from_css+0x3b/0x60 [ 537.391595] ? _cond_resched+0x1a/0x50 [ 537.573774] submit_bio_wait+0x59/0x90 [ 537.756105] blkdev_issue_discard+0x80/0xd0 [ 537.959590] ext4_trim_fs+0x4a9/0x9e0 [ 538.137636] ? ext4_trim_fs+0x4a9/0x9e0 [ 538.324087] ext4_ioctl+0xea4/0x1530 [ 538.497712] ? _copy_to_user+0x2a/0x40 [ 538.679632] do_vfs_ioctl+0xa6/0x600 [ 538.853127] ? __do_sys_newfstat+0x44/0x70 [ 539.051951] ksys_ioctl+0x6d/0x80 [ 539.212785] __x64_sys_ioctl+0x1a/0x20 [ 539.394918] do_syscall_64+0x5a/0x110 [ 539.568674] entry_SYSCALL_64_after_hwframe+0x44/0xa9 We have observed it where both: 1) LVM/devmapper is involved (bcache backing device is LVM volume) and 2) writeback cache is involved (bcache cache_mode is writeback) On one machine, we can reliably reproduce it with: # echo writeback > /sys/block/bcache0/bcache/cache_mode (not sure whether above line is required) # mount /dev/bcache0 /test # for i in {0..10}; do file="$(mktemp /test/zero.XXX)" dd if=/dev/zero of="$file" bs=1M count=256 sync rm $file done # fstrim -v /test Observing this with tracepoints on, we see the following writes: fstrim-18019 [022] .... 91107.302026: bcache_write: 73f95583-561c-408f-a93a-4cbd2498f5c8 inode 0 DS 4260112 + 196352 hit 0 bypass 1 fstrim-18019 [022] .... 91107.302050: bcache_write: 73f95583-561c-408f-a93a-4cbd2498f5c8 inode 0 DS 4456464 + 262144 hit 0 bypass 1 fstrim-18019 [022] .... 91107.302075: bcache_write: 73f95583-561c-408f-a93a-4cbd2498f5c8 inode 0 DS 4718608 + 81920 hit 0 bypass 1 fstrim-18019 [022] .... 91107.302094: bcache_write: 73f95583-561c-408f-a93a-4cbd2498f5c8 inode 0 DS 5324816 + 180224 hit 0 bypass 1 fstrim-18019 [022] .... 91107.302121: bcache_write: 73f95583-561c-408f-a93a-4cbd2498f5c8 inode 0 DS 5505040 + 262144 hit 0 bypass 1 fstrim-18019 [022] .... 91107.302145: bcache_write: 73f95583-561c-408f-a93a-4cbd2498f5c8 inode 0 DS 5767184 + 81920 hit 0 bypass 1 fstrim-18019 [022] .... 91107.308777: bcache_write: 73f95583-561c-408f-a93a-4cbd2498f5c8 inode 0 DS 6373392 + 180224 hit 1 bypass 0 Note the final one has different hit/bypass flags. This is because in should_writeback(), we were hitting a case where the partial stripe condition was returning true and so should_writeback() was returning true early. If that hadn't been the case, it would have hit the would_skip test, and as would_skip == s->iop.bypass == true, should_writeback() would have returned false. Looking at the git history from 'commit 72c270612bd3 ("bcache: Write out full stripes")', it looks like the idea was to optimise for raid5/6: * If a stripe is already dirty, force writes to that stripe to writeback mode - to help build up full stripes of dirty data To fix this issue, make sure that should_writeback() on a discard op never returns true. More details of debugging: https://www.spinics.net/lists/linux-bcache/msg06996.html Previous reports: - https://bugzilla.kernel.org/show_bug.cgi?id=201051 - https://bugzilla.kernel.org/show_bug.cgi?id=196103 - https://www.spinics.net/lists/linux-bcache/msg06885.html (Coly Li: minor modification to follow maximum 75 chars per line rule) Cc: Kent Overstreet Cc: stable@vger.kernel.org Fixes: 72c270612bd3 ("bcache: Write out full stripes") Signed-off-by: Daniel Axtens Signed-off-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/writeback.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 151544740148..973847e027a8 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -69,6 +69,9 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, in_use > CUTOFF_WRITEBACK_SYNC) return false; + if (bio_op(bio) == REQ_OP_DISCARD) + return false; + if (dc->partial_stripes_expensive && bcache_dev_stripe_dirty(dc, bio->bi_iter.bi_sector, bio_sectors(bio))) From f51b6322ff023358d81a41a1718d7d6c95f9c87c Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 6 Mar 2019 11:07:24 -0600 Subject: [PATCH 2437/2608] x86/unwind/orc: Fix ORC unwind table alignment commit f76a16adc485699f95bb71fce114f97c832fe664 upstream. The .orc_unwind section is a packed array of 6-byte structs. It's currently aligned to 6 bytes, which is causing warnings in the LLD linker. Six isn't a power of two, so it's not a valid alignment value. The actual alignment doesn't matter much because it's an array of packed structs. An alignment of two is sufficient. In reality it always gets aligned to four bytes because it comes immediately after the 4-byte-aligned .orc_unwind_ip section. Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") Reported-by: Nick Desaulniers Reported-by: Dmitry Golovin Reported-by: Sedat Dilek Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Sedat Dilek Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://github.com/ClangBuiltLinux/linux/issues/218 Link: https://lkml.kernel.org/r/d55027ee95fe73e952dcd8be90aebd31b0095c45.1551892041.git.jpoimboe@redhat.com Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index fcec26d60d8c..c229ffbed6d4 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -696,7 +696,7 @@ KEEP(*(.orc_unwind_ip)) \ VMLINUX_SYMBOL(__stop_orc_unwind_ip) = .; \ } \ - . = ALIGN(6); \ + . = ALIGN(2); \ .orc_unwind : AT(ADDR(.orc_unwind) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_orc_unwind) = .; \ KEEP(*(.orc_unwind)) \ From 0bc95384bb8271b237c753c4dbef9dd32273a2db Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 6 Feb 2019 12:39:45 +0200 Subject: [PATCH 2438/2608] perf intel-pt: Fix CYC timestamp calculation after OVF commit 03997612904866abe7cdcc992784ef65cb3a4b81 upstream. CYC packet timestamp calculation depends upon CBR which was being cleared upon overflow (OVF). That can cause errors due to failing to synchronize with sideband events. Even if a CBR change has been lost, the old CBR is still a better estimate than zero. So remove the clearing of CBR. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20190206103947.15750-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index d404bed7003a..2a385cf68b73 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1389,7 +1389,6 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) { intel_pt_log("ERROR: Buffer overflow\n"); intel_pt_clear_tx_flags(decoder); - decoder->cbr = 0; decoder->timestamp_insn_cnt = 0; decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; decoder->overflow = true; From 23cdc3b1654cb1be40502156d7a54bce54c3f7ec Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 6 Feb 2019 12:39:43 +0200 Subject: [PATCH 2439/2608] perf auxtrace: Define auxtrace record alignment commit c3fcadf0bb765faf45d6d562246e1d08885466df upstream. Define auxtrace record alignment so that it can be referenced elsewhere. Note this is preparation for patch "perf intel-pt: Fix overlap calculation for padding" Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20190206103947.15750-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/auxtrace.c | 4 ++-- tools/perf/util/auxtrace.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index bbb9823e93b9..44c8bcefe224 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1264,9 +1264,9 @@ static int __auxtrace_mmap__read(struct auxtrace_mmap *mm, } /* padding must be written by fn() e.g. record__process_auxtrace() */ - padding = size & 7; + padding = size & (PERF_AUXTRACE_RECORD_ALIGNMENT - 1); if (padding) - padding = 8 - padding; + padding = PERF_AUXTRACE_RECORD_ALIGNMENT - padding; memset(&ev, 0, sizeof(ev)); ev.auxtrace.header.type = PERF_RECORD_AUXTRACE; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 33b5e6cdf38c..d273bb47b3e3 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -38,6 +38,9 @@ struct record_opts; struct auxtrace_info_event; struct events_stats; +/* Auxtrace records must have the same alignment as perf event records */ +#define PERF_AUXTRACE_RECORD_ALIGNMENT 8 + enum auxtrace_type { PERF_AUXTRACE_UNKNOWN, PERF_AUXTRACE_INTEL_PT, From da4f86ffdbb2571e2626e126e75f39d5d147b69b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 6 Feb 2019 12:39:44 +0200 Subject: [PATCH 2440/2608] perf intel-pt: Fix overlap calculation for padding commit 5a99d99e3310a565b0cf63f785b347be9ee0da45 upstream. Auxtrace records might have up to 7 bytes of padding appended. Adjust the overlap accordingly. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20190206103947.15750-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- .../util/intel-pt-decoder/intel-pt-decoder.c | 36 +++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 2a385cf68b73..f3db68abbd9a 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -26,6 +26,7 @@ #include "../cache.h" #include "../util.h" +#include "../auxtrace.h" #include "intel-pt-insn-decoder.h" #include "intel-pt-pkt-decoder.h" @@ -2558,6 +2559,34 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) } } +#define MAX_PADDING (PERF_AUXTRACE_RECORD_ALIGNMENT - 1) + +/** + * adj_for_padding - adjust overlap to account for padding. + * @buf_b: second buffer + * @buf_a: first buffer + * @len_a: size of first buffer + * + * @buf_a might have up to 7 bytes of padding appended. Adjust the overlap + * accordingly. + * + * Return: A pointer into @buf_b from where non-overlapped data starts + */ +static unsigned char *adj_for_padding(unsigned char *buf_b, + unsigned char *buf_a, size_t len_a) +{ + unsigned char *p = buf_b - MAX_PADDING; + unsigned char *q = buf_a + len_a - MAX_PADDING; + int i; + + for (i = MAX_PADDING; i; i--, p++, q++) { + if (*p != *q) + break; + } + + return p; +} + /** * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data * using TSC. @@ -2608,8 +2637,11 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, /* Same TSC, so buffers are consecutive */ if (!cmp && rem_b >= rem_a) { + unsigned char *start; + *consecutive = true; - return buf_b + len_b - (rem_b - rem_a); + start = buf_b + len_b - (rem_b - rem_a); + return adj_for_padding(start, buf_a, len_a); } if (cmp < 0) return buf_b; /* tsc_a < tsc_b => no overlap */ @@ -2672,7 +2704,7 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, found = memmem(buf_a, len_a, buf_b, len_a); if (found) { *consecutive = true; - return buf_b + len_a; + return adj_for_padding(buf_b + len_a, buf_a, len_a); } /* Try again at next PSB in buffer 'a' */ From 73e0b59ffb7b3d63d1ab7b7127684fde1f763678 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 1 Mar 2019 12:35:36 +0200 Subject: [PATCH 2441/2608] perf intel-pt: Fix divide by zero when TSC is not available commit 076333870c2f5bdd9b6d31e7ca1909cf0c84cbfa upstream. When TSC is not available, "timeless" decoding is used but a divide by zero occurs if perf_time_to_tsc() is called. Ensure the divisor is not zero. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org # v4.9+ Link: https://lkml.kernel.org/n/tip-1i4j0wqoc8vlbkcizqqxpsf4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/intel-pt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 3b118fa9da89..e8e05e7838b2 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -2545,6 +2545,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, } pt->timeless_decoding = intel_pt_timeless_decoding(pt); + if (pt->timeless_decoding && !pt->tc.time_mult) + pt->tc.time_mult = 1; pt->have_tsc = intel_pt_have_tsc(pt); pt->sampling_mode = false; pt->est_tsc = !pt->timeless_decoding; From d287b65e265d3879f34805e931f7a25b62dab813 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Mon, 4 Mar 2019 16:48:54 -0600 Subject: [PATCH 2442/2608] md: Fix failed allocation of md_register_thread commit e406f12dde1a8375d77ea02d91f313fb1a9c6aec upstream. mddev->sync_thread can be set to NULL on kzalloc failure downstream. The patch checks for such a scenario and frees allocated resources. Committer node: Added similar fix to raid5.c, as suggested by Guoqing. Cc: stable@vger.kernel.org # v3.16+ Acked-by: Guoqing Jiang Signed-off-by: Aditya Pakki Signed-off-by: Song Liu Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 2 ++ drivers/md/raid5.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ed1b7bf1ec0e..433e78f453da 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3821,6 +3821,8 @@ static int raid10_run(struct mddev *mddev) set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); mddev->sync_thread = md_register_thread(md_do_sync, mddev, "reshape"); + if (!mddev->sync_thread) + goto out_free_conf; } return 0; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7dbb74cd506a..77a482c6eeda 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7370,6 +7370,8 @@ static int raid5_run(struct mddev *mddev) set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); mddev->sync_thread = md_register_thread(md_do_sync, mddev, "reshape"); + if (!mddev->sync_thread) + goto abort; } /* Ok, everything is just fine now */ From f19ef6579b0e4bef308e4daae955eed856985f29 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 4 Feb 2019 15:59:43 +0200 Subject: [PATCH 2443/2608] tpm/tpm_crb: Avoid unaligned reads in crb_recv() commit 3d7a850fdc1a2e4d2adbc95cc0fc962974725e88 upstream. The current approach to read first 6 bytes from the response and then tail of the response, can cause the 2nd memcpy_fromio() to do an unaligned read (e.g. read 32-bit word from address aligned to a 16-bits), depending on how memcpy_fromio() is implemented. If this happens, the read will fail and the memory controller will fill the read with 1's. This was triggered by 170d13ca3a2f, which should be probably refined to check and react to the address alignment. Before that commit, on x86 memcpy_fromio() turned out to be memcpy(). By a luck GCC has done the right thing (from tpm_crb's perspective) for us so far, but we should not rely on that. Thus, it makes sense to fix this also in tpm_crb, not least because the fix can be then backported to stable kernels and make them more robust when compiled in differing environments. Cc: stable@vger.kernel.org Cc: James Morris Cc: Tomas Winkler Cc: Jerry Snitselaar Fixes: 30fc8d138e91 ("tpm: TPM 2.0 CRB Interface") Signed-off-by: Jarkko Sakkinen Reviewed-by: Jerry Snitselaar Acked-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_crb.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index b4ad169836e9..f978738554d5 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -288,19 +288,29 @@ static int crb_recv(struct tpm_chip *chip, u8 *buf, size_t count) struct crb_priv *priv = dev_get_drvdata(&chip->dev); unsigned int expected; - /* sanity check */ - if (count < 6) + /* A sanity check that the upper layer wants to get at least the header + * as that is the minimum size for any TPM response. + */ + if (count < TPM_HEADER_SIZE) return -EIO; + /* If this bit is set, according to the spec, the TPM is in + * unrecoverable condition. + */ if (ioread32(&priv->regs_t->ctrl_sts) & CRB_CTRL_STS_ERROR) return -EIO; - memcpy_fromio(buf, priv->rsp, 6); - expected = be32_to_cpup((__be32 *) &buf[2]); - if (expected > count || expected < 6) + /* Read the first 8 bytes in order to get the length of the response. + * We read exactly a quad word in order to make sure that the remaining + * reads will be aligned. + */ + memcpy_fromio(buf, priv->rsp, 8); + + expected = be32_to_cpup((__be32 *)&buf[2]); + if (expected > count || expected < TPM_HEADER_SIZE) return -EIO; - memcpy_fromio(&buf[6], &priv->rsp[6], expected - 6); + memcpy_fromio(&buf[8], &priv->rsp[8], expected - 8); return expected; } From b61c06e3ecb6b62cd3ff70a1256a0938fa0fba5c Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 8 Feb 2019 18:30:58 +0200 Subject: [PATCH 2444/2608] tpm: Unify the send callback behaviour commit f5595f5baa30e009bf54d0d7653a9a0cc465be60 upstream. The send() callback should never return length as it does not in every driver except tpm_crb in the success case. The reason is that the main transmit functionality only cares about whether the transmit was successful or not and ignores the count completely. Suggested-by: Stefan Berger Cc: stable@vger.kernel.org Signed-off-by: Jarkko Sakkinen Reviewed-by: Stefan Berger Reviewed-by: Jerry Snitselaar Tested-by: Alexander Steffen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/st33zp24/st33zp24.c | 2 +- drivers/char/tpm/tpm-interface.c | 11 ++++++++++- drivers/char/tpm/tpm_atmel.c | 2 +- drivers/char/tpm/tpm_i2c_atmel.c | 6 +++++- drivers/char/tpm/tpm_i2c_infineon.c | 2 +- drivers/char/tpm/tpm_i2c_nuvoton.c | 2 +- drivers/char/tpm/tpm_ibmvtpm.c | 8 ++++---- drivers/char/tpm/tpm_infineon.c | 2 +- drivers/char/tpm/tpm_nsc.c | 2 +- drivers/char/tpm/tpm_tis_core.c | 2 +- drivers/char/tpm/tpm_vtpm_proxy.c | 3 +-- drivers/char/tpm/xen-tpmfront.c | 2 +- 12 files changed, 28 insertions(+), 16 deletions(-) diff --git a/drivers/char/tpm/st33zp24/st33zp24.c b/drivers/char/tpm/st33zp24/st33zp24.c index f95b9c75175b..77f3fa10db12 100644 --- a/drivers/char/tpm/st33zp24/st33zp24.c +++ b/drivers/char/tpm/st33zp24/st33zp24.c @@ -438,7 +438,7 @@ static int st33zp24_send(struct tpm_chip *chip, unsigned char *buf, goto out_err; } - return len; + return 0; out_err: st33zp24_cancel(chip); release_locality(chip); diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 038b91bcbd31..e3beeb2a93dc 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -497,10 +497,19 @@ static ssize_t tpm_try_transmit(struct tpm_chip *chip, if (rc < 0) { if (rc != -EPIPE) dev_err(&chip->dev, - "%s: tpm_send: error %d\n", __func__, rc); + "%s: send(): error %d\n", __func__, rc); goto out; } + /* A sanity check. send() should just return zero on success e.g. + * not the command length. + */ + if (rc > 0) { + dev_warn(&chip->dev, + "%s: send(): invalid value %d\n", __func__, rc); + rc = 0; + } + if (chip->flags & TPM_CHIP_FLAG_IRQ) goto out_recv; diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c index 66a14526aaf4..a290b30a0c35 100644 --- a/drivers/char/tpm/tpm_atmel.c +++ b/drivers/char/tpm/tpm_atmel.c @@ -105,7 +105,7 @@ static int tpm_atml_send(struct tpm_chip *chip, u8 *buf, size_t count) iowrite8(buf[i], priv->iobase); } - return count; + return 0; } static void tpm_atml_cancel(struct tpm_chip *chip) diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c index 95ce2e9ccdc6..32a8e27c5382 100644 --- a/drivers/char/tpm/tpm_i2c_atmel.c +++ b/drivers/char/tpm/tpm_i2c_atmel.c @@ -65,7 +65,11 @@ static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len) dev_dbg(&chip->dev, "%s(buf=%*ph len=%0zx) -> sts=%d\n", __func__, (int)min_t(size_t, 64, len), buf, len, status); - return status; + + if (status < 0) + return status; + + return 0; } static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c index c619e76ce827..94bdb8ec372e 100644 --- a/drivers/char/tpm/tpm_i2c_infineon.c +++ b/drivers/char/tpm/tpm_i2c_infineon.c @@ -587,7 +587,7 @@ static int tpm_tis_i2c_send(struct tpm_chip *chip, u8 *buf, size_t len) /* go and do it */ iic_tpm_write(TPM_STS(tpm_dev.locality), &sts, 1); - return len; + return 0; out_err: tpm_tis_i2c_ready(chip); /* The TPM needs some time to clean up here, diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c index f74f451baf6a..b8defdfdf2dc 100644 --- a/drivers/char/tpm/tpm_i2c_nuvoton.c +++ b/drivers/char/tpm/tpm_i2c_nuvoton.c @@ -469,7 +469,7 @@ static int i2c_nuvoton_send(struct tpm_chip *chip, u8 *buf, size_t len) } dev_dbg(dev, "%s() -> %zd\n", __func__, len); - return len; + return 0; } static bool i2c_nuvoton_req_canceled(struct tpm_chip *chip, u8 status) diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 25f6e2665385..77e47dc5aacc 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -141,14 +141,14 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) } /** - * tpm_ibmvtpm_send - Send tpm request - * + * tpm_ibmvtpm_send() - Send a TPM command * @chip: tpm chip struct * @buf: buffer contains data to send * @count: size of buffer * * Return: - * Number of bytes sent or < 0 on error. + * 0 on success, + * -errno on error */ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) { @@ -194,7 +194,7 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) rc = 0; ibmvtpm->tpm_processing_cmd = false; } else - rc = count; + rc = 0; spin_unlock(&ibmvtpm->rtce_lock); return rc; diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c index d8f10047fbba..97f6d4fe0aee 100644 --- a/drivers/char/tpm/tpm_infineon.c +++ b/drivers/char/tpm/tpm_infineon.c @@ -354,7 +354,7 @@ static int tpm_inf_send(struct tpm_chip *chip, u8 * buf, size_t count) for (i = 0; i < count; i++) { wait_and_send(chip, buf[i]); } - return count; + return 0; } static void tpm_inf_cancel(struct tpm_chip *chip) diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c index 5d6cce74cd3f..9bee3c5eb4bf 100644 --- a/drivers/char/tpm/tpm_nsc.c +++ b/drivers/char/tpm/tpm_nsc.c @@ -226,7 +226,7 @@ static int tpm_nsc_send(struct tpm_chip *chip, u8 * buf, size_t count) } outb(NSC_COMMAND_EOC, priv->base + NSC_COMMAND); - return count; + return 0; } static void tpm_nsc_cancel(struct tpm_chip *chip) diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index 58123df6b5f6..a7d9c0c53fcd 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -379,7 +379,7 @@ static int tpm_tis_send_main(struct tpm_chip *chip, const u8 *buf, size_t len) goto out_err; } } - return len; + return 0; out_err: tpm_tis_ready(chip); return rc; diff --git a/drivers/char/tpm/tpm_vtpm_proxy.c b/drivers/char/tpm/tpm_vtpm_proxy.c index 1d877cc9af97..94a539384619 100644 --- a/drivers/char/tpm/tpm_vtpm_proxy.c +++ b/drivers/char/tpm/tpm_vtpm_proxy.c @@ -335,7 +335,6 @@ static int vtpm_proxy_is_driver_command(struct tpm_chip *chip, static int vtpm_proxy_tpm_op_send(struct tpm_chip *chip, u8 *buf, size_t count) { struct proxy_dev *proxy_dev = dev_get_drvdata(&chip->dev); - int rc = 0; if (count > sizeof(proxy_dev->buffer)) { dev_err(&chip->dev, @@ -366,7 +365,7 @@ static int vtpm_proxy_tpm_op_send(struct tpm_chip *chip, u8 *buf, size_t count) wake_up_interruptible(&proxy_dev->wq); - return rc; + return 0; } static void vtpm_proxy_tpm_op_cancel(struct tpm_chip *chip) diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index 2cffaf567d99..538c9297dee1 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -112,7 +112,7 @@ static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) return -ETIME; } - return count; + return 0; } static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) From 61cc5315ef6ef6d2fce104d9e4c8563429f698a1 Mon Sep 17 00:00:00 2001 From: "Zhang, Jun" Date: Tue, 18 Dec 2018 06:55:01 -0800 Subject: [PATCH 2445/2608] rcu: Do RCU GP kthread self-wakeup from softirq and interrupt commit 1d1f898df6586c5ea9aeaf349f13089c6fa37903 upstream. The rcu_gp_kthread_wake() function is invoked when it might be necessary to wake the RCU grace-period kthread. Because self-wakeups are normally a useless waste of CPU cycles, if rcu_gp_kthread_wake() is invoked from this kthread, it naturally refuses to do the wakeup. Unfortunately, natural though it might be, this heuristic fails when rcu_gp_kthread_wake() is invoked from an interrupt or softirq handler that interrupted the grace-period kthread just after the final check of the wait-event condition but just before the schedule() call. In this case, a wakeup is required, even though the call to rcu_gp_kthread_wake() is within the RCU grace-period kthread's context. Failing to provide this wakeup can result in grace periods failing to start, which in turn results in out-of-memory conditions. This race window is quite narrow, but it actually did happen during real testing. It would of course need to be fixed even if it was strictly theoretical in nature. This patch does not Cc stable because it does not apply cleanly to earlier kernel versions. Fixes: 48a7639ce80c ("rcu: Make callers awaken grace-period kthread") Reported-by: "He, Bo" Co-developed-by: "Zhang, Jun" Co-developed-by: "He, Bo" Co-developed-by: "xiao, jin" Co-developed-by: Bai, Jie A Signed-off: "Zhang, Jun" Signed-off: "He, Bo" Signed-off: "xiao, jin" Signed-off: Bai, Jie A Signed-off-by: "Zhang, Jun" [ paulmck: Switch from !in_softirq() to "!in_interrupt() && !in_serving_softirq() to avoid redundant wakeups and to also handle the interrupt-handler scenario as well as the softirq-handler scenario that actually occurred in testing. ] Signed-off-by: Paul E. McKenney Link: https://lkml.kernel.org/r/CD6925E8781EFD4D8E11882D20FC406D52A11F61@SHSMSX104.ccr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- kernel/rcu/tree.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 710ce1d6b982..fb051fa99b67 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1789,15 +1789,23 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) } /* - * Awaken the grace-period kthread for the specified flavor of RCU. - * Don't do a self-awaken, and don't bother awakening when there is - * nothing for the grace-period kthread to do (as in several CPUs - * raced to awaken, and we lost), and finally don't try to awaken - * a kthread that has not yet been created. + * Awaken the grace-period kthread. Don't do a self-awaken (unless in + * an interrupt or softirq handler), and don't bother awakening when there + * is nothing for the grace-period kthread to do (as in several CPUs raced + * to awaken, and we lost), and finally don't try to awaken a kthread that + * has not yet been created. If all those checks are passed, track some + * debug information and awaken. + * + * So why do the self-wakeup when in an interrupt or softirq handler + * in the grace-period kthread's context? Because the kthread might have + * been interrupted just as it was going to sleep, and just after the final + * pre-sleep check of the awaken condition. In this case, a wakeup really + * is required, and is therefore supplied. */ static void rcu_gp_kthread_wake(struct rcu_state *rsp) { - if (current == rsp->gp_kthread || + if ((current == rsp->gp_kthread && + !in_interrupt() && !in_serving_softirq()) || !READ_ONCE(rsp->gp_flags) || !rsp->gp_kthread) return; From ebebfe3410444733ee6ba84bc0b7cc86bab066c4 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Mon, 21 Jan 2019 21:35:52 -0200 Subject: [PATCH 2446/2608] media: imx: prpencvf: Stop upstream before disabling IDMA channel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a19c22677377b87e4354f7306f46ad99bc982a9f upstream. Upstream must be stopped immediately after receiving the last EOF and before disabling the IDMA channel. This can be accomplished by moving upstream stream off to just after receiving the last EOF completion in prp_stop(). For symmetry also move upstream stream on to end of prp_start(). This fixes a complete system hard lockup on the SabreAuto when streaming from the ADV7180, by repeatedly sending a stream off immediately followed by stream on: while true; do v4l2-ctl -d1 --stream-mmap --stream-count=3; done Eventually this either causes the system lockup or EOF timeouts at all subsequent stream on, until a system reset. The lockup occurs when disabling the IDMA channel at stream off. Stopping the video data stream entering the IDMA channel before disabling the channel itself appears to be a reliable fix for the hard lockup. Fixes: f0d9c8924e2c3 ("[media] media: imx: Add IC subdev drivers") Reported-by: Gaël PORTAY Tested-by: Gaël PORTAY Signed-off-by: Steve Longerbeam Cc: stable@vger.kernel.org # for 4.13 and up Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/imx/imx-ic-prpencvf.c | 26 ++++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/staging/media/imx/imx-ic-prpencvf.c b/drivers/staging/media/imx/imx-ic-prpencvf.c index 111afd34aa3c..22149957afd0 100644 --- a/drivers/staging/media/imx/imx-ic-prpencvf.c +++ b/drivers/staging/media/imx/imx-ic-prpencvf.c @@ -676,12 +676,23 @@ static int prp_start(struct prp_priv *priv) goto out_free_nfb4eof_irq; } + /* start upstream */ + ret = v4l2_subdev_call(priv->src_sd, video, s_stream, 1); + ret = (ret && ret != -ENOIOCTLCMD) ? ret : 0; + if (ret) { + v4l2_err(&ic_priv->sd, + "upstream stream on failed: %d\n", ret); + goto out_free_eof_irq; + } + /* start the EOF timeout timer */ mod_timer(&priv->eof_timeout_timer, jiffies + msecs_to_jiffies(IMX_MEDIA_EOF_TIMEOUT)); return 0; +out_free_eof_irq: + devm_free_irq(ic_priv->dev, priv->eof_irq, priv); out_free_nfb4eof_irq: devm_free_irq(ic_priv->dev, priv->nfb4eof_irq, priv); out_unsetup: @@ -713,6 +724,12 @@ static void prp_stop(struct prp_priv *priv) if (ret == 0) v4l2_warn(&ic_priv->sd, "wait last EOF timeout\n"); + /* stop upstream */ + ret = v4l2_subdev_call(priv->src_sd, video, s_stream, 0); + if (ret && ret != -ENOIOCTLCMD) + v4l2_warn(&ic_priv->sd, + "upstream stream off failed: %d\n", ret); + devm_free_irq(ic_priv->dev, priv->eof_irq, priv); devm_free_irq(ic_priv->dev, priv->nfb4eof_irq, priv); @@ -1144,15 +1161,6 @@ static int prp_s_stream(struct v4l2_subdev *sd, int enable) if (ret) goto out; - /* start/stop upstream */ - ret = v4l2_subdev_call(priv->src_sd, video, s_stream, enable); - ret = (ret && ret != -ENOIOCTLCMD) ? ret : 0; - if (ret) { - if (enable) - prp_stop(priv); - goto out; - } - update_count: priv->stream_count += enable ? 1 : -1; if (priv->stream_count < 0) From c1ce0ea53e5bc2ea4b4d918d8fb0fac3e46b8c46 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 30 Jan 2019 05:09:41 -0500 Subject: [PATCH 2447/2608] media: uvcvideo: Avoid NULL pointer dereference at the end of streaming commit 9dd0627d8d62a7ddb001a75f63942d92b5336561 upstream. The UVC video driver converts the timestamp from hardware specific unit to one known by the kernel at the time when the buffer is dequeued. This is fine in general, but the streamoff operation consists of the following steps (among other things): 1. uvc_video_clock_cleanup --- the hardware clock sample array is released and the pointer to the array is set to NULL, 2. buffers in active state are returned to the user and 3. buf_finish callback is called on buffers that are prepared. buf_finish includes calling uvc_video_clock_update that accesses the hardware clock sample array. The above is serialised by a queue specific mutex. Address the problem by skipping the clock conversion if the hardware clock sample array is already released. Fixes: 9c0863b1cc48 ("[media] vb2: call buf_finish from __queue_cancel") Reported-by: Chiranjeevi Rapolu Tested-by: Chiranjeevi Rapolu Signed-off-by: Sakari Ailus Cc: stable@vger.kernel.org Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/uvc/uvc_video.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c index a6d800291883..393371916381 100644 --- a/drivers/media/usb/uvc/uvc_video.c +++ b/drivers/media/usb/uvc/uvc_video.c @@ -638,6 +638,14 @@ void uvc_video_clock_update(struct uvc_streaming *stream, if (!uvc_hw_timestamps_param) return; + /* + * We will get called from __vb2_queue_cancel() if there are buffers + * done but not dequeued by the user, but the sample array has already + * been released at that time. Just bail out in that case. + */ + if (!clock->samples) + return; + spin_lock_irqsave(&clock->lock, flags); if (clock->count < clock->size) From 77fbb561bb09f56877dd84318212da393909975f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20A=2E=20M=2E=20Magalh=C3=A3es?= Date: Mon, 21 Jan 2019 20:05:01 -0500 Subject: [PATCH 2448/2608] media: vimc: Add vimc-streamer for stream control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit adc589d2a20808fb99d46a78175cd023f2040338 upstream. Add a linear pipeline logic for the stream control. It's created by walking backwards on the entity graph. When the stream starts it will simply loop through the pipeline calling the respective process_frame function of each entity. Fixes: f2fe89061d797 ("vimc: Virtual Media Controller core, capture and sensor") Cc: stable@vger.kernel.org # for v4.20 Signed-off-by: Lucas A. M. Magalhães Acked-by: Helen Koike Signed-off-by: Hans Verkuil [hverkuil-cisco@xs4all.nl: fixed small space-after-tab issue in the patch] Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/vimc/Makefile | 3 +- drivers/media/platform/vimc/vimc-capture.c | 18 +- drivers/media/platform/vimc/vimc-common.c | 35 ---- drivers/media/platform/vimc/vimc-common.h | 15 +- drivers/media/platform/vimc/vimc-debayer.c | 26 +-- drivers/media/platform/vimc/vimc-scaler.c | 28 +-- drivers/media/platform/vimc/vimc-sensor.c | 56 ++---- drivers/media/platform/vimc/vimc-streamer.c | 188 ++++++++++++++++++++ drivers/media/platform/vimc/vimc-streamer.h | 38 ++++ 9 files changed, 260 insertions(+), 147 deletions(-) create mode 100644 drivers/media/platform/vimc/vimc-streamer.c create mode 100644 drivers/media/platform/vimc/vimc-streamer.h diff --git a/drivers/media/platform/vimc/Makefile b/drivers/media/platform/vimc/Makefile index 4b2e3de7856e..c4fc8e7d365a 100644 --- a/drivers/media/platform/vimc/Makefile +++ b/drivers/media/platform/vimc/Makefile @@ -5,6 +5,7 @@ vimc_common-objs := vimc-common.o vimc_debayer-objs := vimc-debayer.o vimc_scaler-objs := vimc-scaler.o vimc_sensor-objs := vimc-sensor.o +vimc_streamer-objs := vimc-streamer.o obj-$(CONFIG_VIDEO_VIMC) += vimc.o vimc_capture.o vimc_common.o vimc-debayer.o \ - vimc_scaler.o vimc_sensor.o + vimc_scaler.o vimc_sensor.o vimc_streamer.o diff --git a/drivers/media/platform/vimc/vimc-capture.c b/drivers/media/platform/vimc/vimc-capture.c index 88a1e5670c72..a078ad18909a 100644 --- a/drivers/media/platform/vimc/vimc-capture.c +++ b/drivers/media/platform/vimc/vimc-capture.c @@ -23,6 +23,7 @@ #include #include "vimc-common.h" +#include "vimc-streamer.h" #define VIMC_CAP_DRV_NAME "vimc-capture" @@ -43,7 +44,7 @@ struct vimc_cap_device { spinlock_t qlock; struct mutex lock; u32 sequence; - struct media_pipeline pipe; + struct vimc_stream stream; }; static const struct v4l2_pix_format fmt_default = { @@ -247,14 +248,13 @@ static int vimc_cap_start_streaming(struct vb2_queue *vq, unsigned int count) vcap->sequence = 0; /* Start the media pipeline */ - ret = media_pipeline_start(entity, &vcap->pipe); + ret = media_pipeline_start(entity, &vcap->stream.pipe); if (ret) { vimc_cap_return_all_buffers(vcap, VB2_BUF_STATE_QUEUED); return ret; } - /* Enable streaming from the pipe */ - ret = vimc_pipeline_s_stream(&vcap->vdev.entity, 1); + ret = vimc_streamer_s_stream(&vcap->stream, &vcap->ved, 1); if (ret) { media_pipeline_stop(entity); vimc_cap_return_all_buffers(vcap, VB2_BUF_STATE_QUEUED); @@ -272,8 +272,7 @@ static void vimc_cap_stop_streaming(struct vb2_queue *vq) { struct vimc_cap_device *vcap = vb2_get_drv_priv(vq); - /* Disable streaming from the pipe */ - vimc_pipeline_s_stream(&vcap->vdev.entity, 0); + vimc_streamer_s_stream(&vcap->stream, &vcap->ved, 0); /* Stop the media pipeline */ media_pipeline_stop(&vcap->vdev.entity); @@ -354,8 +353,8 @@ static void vimc_cap_comp_unbind(struct device *comp, struct device *master, kfree(vcap); } -static void vimc_cap_process_frame(struct vimc_ent_device *ved, - struct media_pad *sink, const void *frame) +static void *vimc_cap_process_frame(struct vimc_ent_device *ved, + const void *frame) { struct vimc_cap_device *vcap = container_of(ved, struct vimc_cap_device, ved); @@ -369,7 +368,7 @@ static void vimc_cap_process_frame(struct vimc_ent_device *ved, typeof(*vimc_buf), list); if (!vimc_buf) { spin_unlock(&vcap->qlock); - return; + return ERR_PTR(-EAGAIN); } /* Remove this entry from the list */ @@ -390,6 +389,7 @@ static void vimc_cap_process_frame(struct vimc_ent_device *ved, vb2_set_plane_payload(&vimc_buf->vb2.vb2_buf, 0, vcap->format.sizeimage); vb2_buffer_done(&vimc_buf->vb2.vb2_buf, VB2_BUF_STATE_DONE); + return NULL; } static int vimc_cap_comp_bind(struct device *comp, struct device *master, diff --git a/drivers/media/platform/vimc/vimc-common.c b/drivers/media/platform/vimc/vimc-common.c index 9d63c84a9876..743554de724d 100644 --- a/drivers/media/platform/vimc/vimc-common.c +++ b/drivers/media/platform/vimc/vimc-common.c @@ -207,41 +207,6 @@ const struct vimc_pix_map *vimc_pix_map_by_pixelformat(u32 pixelformat) } EXPORT_SYMBOL_GPL(vimc_pix_map_by_pixelformat); -int vimc_propagate_frame(struct media_pad *src, const void *frame) -{ - struct media_link *link; - - if (!(src->flags & MEDIA_PAD_FL_SOURCE)) - return -EINVAL; - - /* Send this frame to all sink pads that are direct linked */ - list_for_each_entry(link, &src->entity->links, list) { - if (link->source == src && - (link->flags & MEDIA_LNK_FL_ENABLED)) { - struct vimc_ent_device *ved = NULL; - struct media_entity *entity = link->sink->entity; - - if (is_media_entity_v4l2_subdev(entity)) { - struct v4l2_subdev *sd = - container_of(entity, struct v4l2_subdev, - entity); - ved = v4l2_get_subdevdata(sd); - } else if (is_media_entity_v4l2_video_device(entity)) { - struct video_device *vdev = - container_of(entity, - struct video_device, - entity); - ved = video_get_drvdata(vdev); - } - if (ved && ved->process_frame) - ved->process_frame(ved, link->sink, frame); - } - } - - return 0; -} -EXPORT_SYMBOL_GPL(vimc_propagate_frame); - /* Helper function to allocate and initialize pads */ struct media_pad *vimc_pads_init(u16 num_pads, const unsigned long *pads_flag) { diff --git a/drivers/media/platform/vimc/vimc-common.h b/drivers/media/platform/vimc/vimc-common.h index dca528a316e7..d7c5f4616abb 100644 --- a/drivers/media/platform/vimc/vimc-common.h +++ b/drivers/media/platform/vimc/vimc-common.h @@ -108,23 +108,12 @@ struct vimc_pix_map { struct vimc_ent_device { struct media_entity *ent; struct media_pad *pads; - void (*process_frame)(struct vimc_ent_device *ved, - struct media_pad *sink, const void *frame); + void * (*process_frame)(struct vimc_ent_device *ved, + const void *frame); void (*vdev_get_format)(struct vimc_ent_device *ved, struct v4l2_pix_format *fmt); }; -/** - * vimc_propagate_frame - propagate a frame through the topology - * - * @src: the source pad where the frame is being originated - * @frame: the frame to be propagated - * - * This function will call the process_frame callback from the vimc_ent_device - * struct of the nodes directly connected to the @src pad - */ -int vimc_propagate_frame(struct media_pad *src, const void *frame); - /** * vimc_pads_init - initialize pads * diff --git a/drivers/media/platform/vimc/vimc-debayer.c b/drivers/media/platform/vimc/vimc-debayer.c index 4d663e89d33f..c4e674f665b2 100644 --- a/drivers/media/platform/vimc/vimc-debayer.c +++ b/drivers/media/platform/vimc/vimc-debayer.c @@ -320,7 +320,6 @@ static void vimc_deb_set_rgb_mbus_fmt_rgb888_1x24(struct vimc_deb_device *vdeb, static int vimc_deb_s_stream(struct v4l2_subdev *sd, int enable) { struct vimc_deb_device *vdeb = v4l2_get_subdevdata(sd); - int ret; if (enable) { const struct vimc_pix_map *vpix; @@ -350,22 +349,10 @@ static int vimc_deb_s_stream(struct v4l2_subdev *sd, int enable) if (!vdeb->src_frame) return -ENOMEM; - /* Turn the stream on in the subdevices directly connected */ - ret = vimc_pipeline_s_stream(&vdeb->sd.entity, 1); - if (ret) { - vfree(vdeb->src_frame); - vdeb->src_frame = NULL; - return ret; - } } else { if (!vdeb->src_frame) return 0; - /* Disable streaming from the pipe */ - ret = vimc_pipeline_s_stream(&vdeb->sd.entity, 0); - if (ret) - return ret; - vfree(vdeb->src_frame); vdeb->src_frame = NULL; } @@ -479,9 +466,8 @@ static void vimc_deb_calc_rgb_sink(struct vimc_deb_device *vdeb, } } -static void vimc_deb_process_frame(struct vimc_ent_device *ved, - struct media_pad *sink, - const void *sink_frame) +static void *vimc_deb_process_frame(struct vimc_ent_device *ved, + const void *sink_frame) { struct vimc_deb_device *vdeb = container_of(ved, struct vimc_deb_device, ved); @@ -490,7 +476,7 @@ static void vimc_deb_process_frame(struct vimc_ent_device *ved, /* If the stream in this node is not active, just return */ if (!vdeb->src_frame) - return; + return ERR_PTR(-EINVAL); for (i = 0; i < vdeb->sink_fmt.height; i++) for (j = 0; j < vdeb->sink_fmt.width; j++) { @@ -498,12 +484,8 @@ static void vimc_deb_process_frame(struct vimc_ent_device *ved, vdeb->set_rgb_src(vdeb, i, j, rgb); } - /* Propagate the frame through all source pads */ - for (i = 1; i < vdeb->sd.entity.num_pads; i++) { - struct media_pad *pad = &vdeb->sd.entity.pads[i]; + return vdeb->src_frame; - vimc_propagate_frame(pad, vdeb->src_frame); - } } static void vimc_deb_comp_unbind(struct device *comp, struct device *master, diff --git a/drivers/media/platform/vimc/vimc-scaler.c b/drivers/media/platform/vimc/vimc-scaler.c index e1602e0bc230..b763d87f4b4b 100644 --- a/drivers/media/platform/vimc/vimc-scaler.c +++ b/drivers/media/platform/vimc/vimc-scaler.c @@ -216,7 +216,6 @@ static const struct v4l2_subdev_pad_ops vimc_sca_pad_ops = { static int vimc_sca_s_stream(struct v4l2_subdev *sd, int enable) { struct vimc_sca_device *vsca = v4l2_get_subdevdata(sd); - int ret; if (enable) { const struct vimc_pix_map *vpix; @@ -244,22 +243,10 @@ static int vimc_sca_s_stream(struct v4l2_subdev *sd, int enable) if (!vsca->src_frame) return -ENOMEM; - /* Turn the stream on in the subdevices directly connected */ - ret = vimc_pipeline_s_stream(&vsca->sd.entity, 1); - if (ret) { - vfree(vsca->src_frame); - vsca->src_frame = NULL; - return ret; - } } else { if (!vsca->src_frame) return 0; - /* Disable streaming from the pipe */ - ret = vimc_pipeline_s_stream(&vsca->sd.entity, 0); - if (ret) - return ret; - vfree(vsca->src_frame); vsca->src_frame = NULL; } @@ -345,26 +332,19 @@ static void vimc_sca_fill_src_frame(const struct vimc_sca_device *const vsca, vimc_sca_scale_pix(vsca, i, j, sink_frame); } -static void vimc_sca_process_frame(struct vimc_ent_device *ved, - struct media_pad *sink, - const void *sink_frame) +static void *vimc_sca_process_frame(struct vimc_ent_device *ved, + const void *sink_frame) { struct vimc_sca_device *vsca = container_of(ved, struct vimc_sca_device, ved); - unsigned int i; /* If the stream in this node is not active, just return */ if (!vsca->src_frame) - return; + return ERR_PTR(-EINVAL); vimc_sca_fill_src_frame(vsca, sink_frame); - /* Propagate the frame through all source pads */ - for (i = 1; i < vsca->sd.entity.num_pads; i++) { - struct media_pad *pad = &vsca->sd.entity.pads[i]; - - vimc_propagate_frame(pad, vsca->src_frame); - } + return vsca->src_frame; }; static void vimc_sca_comp_unbind(struct device *comp, struct device *master, diff --git a/drivers/media/platform/vimc/vimc-sensor.c b/drivers/media/platform/vimc/vimc-sensor.c index 02e68c8fc02b..70cee5c0c89a 100644 --- a/drivers/media/platform/vimc/vimc-sensor.c +++ b/drivers/media/platform/vimc/vimc-sensor.c @@ -16,8 +16,6 @@ */ #include -#include -#include #include #include #include @@ -197,38 +195,27 @@ static const struct v4l2_subdev_pad_ops vimc_sen_pad_ops = { .set_fmt = vimc_sen_set_fmt, }; -static int vimc_sen_tpg_thread(void *data) +static void *vimc_sen_process_frame(struct vimc_ent_device *ved, + const void *sink_frame) { - struct vimc_sen_device *vsen = data; - unsigned int i; + struct vimc_sen_device *vsen = container_of(ved, struct vimc_sen_device, + ved); + const struct vimc_pix_map *vpix; + unsigned int frame_size; - set_freezable(); - set_current_state(TASK_UNINTERRUPTIBLE); + /* Calculate the frame size */ + vpix = vimc_pix_map_by_code(vsen->mbus_format.code); + frame_size = vsen->mbus_format.width * vpix->bpp * + vsen->mbus_format.height; - for (;;) { - try_to_freeze(); - if (kthread_should_stop()) - break; - - tpg_fill_plane_buffer(&vsen->tpg, 0, 0, vsen->frame); - - /* Send the frame to all source pads */ - for (i = 0; i < vsen->sd.entity.num_pads; i++) - vimc_propagate_frame(&vsen->sd.entity.pads[i], - vsen->frame); - - /* 60 frames per second */ - schedule_timeout(HZ/60); - } - - return 0; + tpg_fill_plane_buffer(&vsen->tpg, 0, 0, vsen->frame); + return vsen->frame; } static int vimc_sen_s_stream(struct v4l2_subdev *sd, int enable) { struct vimc_sen_device *vsen = container_of(sd, struct vimc_sen_device, sd); - int ret; if (enable) { const struct vimc_pix_map *vpix; @@ -254,26 +241,8 @@ static int vimc_sen_s_stream(struct v4l2_subdev *sd, int enable) /* configure the test pattern generator */ vimc_sen_tpg_s_format(vsen); - /* Initialize the image generator thread */ - vsen->kthread_sen = kthread_run(vimc_sen_tpg_thread, vsen, - "%s-sen", vsen->sd.v4l2_dev->name); - if (IS_ERR(vsen->kthread_sen)) { - dev_err(vsen->dev, "%s: kernel_thread() failed\n", - vsen->sd.name); - vfree(vsen->frame); - vsen->frame = NULL; - return PTR_ERR(vsen->kthread_sen); - } } else { - if (!vsen->kthread_sen) - return 0; - /* Stop image generator */ - ret = kthread_stop(vsen->kthread_sen); - if (ret) - return ret; - - vsen->kthread_sen = NULL; vfree(vsen->frame); vsen->frame = NULL; return 0; @@ -325,6 +294,7 @@ static int vimc_sen_comp_bind(struct device *comp, struct device *master, if (ret) goto err_free_vsen; + vsen->ved.process_frame = vimc_sen_process_frame; dev_set_drvdata(comp, &vsen->ved); vsen->dev = comp; diff --git a/drivers/media/platform/vimc/vimc-streamer.c b/drivers/media/platform/vimc/vimc-streamer.c new file mode 100644 index 000000000000..fcc897fb247b --- /dev/null +++ b/drivers/media/platform/vimc/vimc-streamer.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * vimc-streamer.c Virtual Media Controller Driver + * + * Copyright (C) 2018 Lucas A. M. Magalhães + * + */ + +#include +#include +#include +#include + +#include "vimc-streamer.h" + +/** + * vimc_get_source_entity - get the entity connected with the first sink pad + * + * @ent: reference media_entity + * + * Helper function that returns the media entity containing the source pad + * linked with the first sink pad from the given media entity pad list. + */ +static struct media_entity *vimc_get_source_entity(struct media_entity *ent) +{ + struct media_pad *pad; + int i; + + for (i = 0; i < ent->num_pads; i++) { + if (ent->pads[i].flags & MEDIA_PAD_FL_SOURCE) + continue; + pad = media_entity_remote_pad(&ent->pads[i]); + return pad ? pad->entity : NULL; + } + return NULL; +} + +/* + * vimc_streamer_pipeline_terminate - Disable stream in all ved in stream + * + * @stream: the pointer to the stream structure with the pipeline to be + * disabled. + * + * Calls s_stream to disable the stream in each entity of the pipeline + * + */ +static void vimc_streamer_pipeline_terminate(struct vimc_stream *stream) +{ + struct media_entity *entity; + struct v4l2_subdev *sd; + + while (stream->pipe_size) { + stream->pipe_size--; + entity = stream->ved_pipeline[stream->pipe_size]->ent; + entity = vimc_get_source_entity(entity); + stream->ved_pipeline[stream->pipe_size] = NULL; + + if (!is_media_entity_v4l2_subdev(entity)) + continue; + + sd = media_entity_to_v4l2_subdev(entity); + v4l2_subdev_call(sd, video, s_stream, 0); + } +} + +/* + * vimc_streamer_pipeline_init - initializes the stream structure + * + * @stream: the pointer to the stream structure to be initialized + * @ved: the pointer to the vimc entity initializing the stream + * + * Initializes the stream structure. Walks through the entity graph to + * construct the pipeline used later on the streamer thread. + * Calls s_stream to enable stream in all entities of the pipeline. + */ +static int vimc_streamer_pipeline_init(struct vimc_stream *stream, + struct vimc_ent_device *ved) +{ + struct media_entity *entity; + struct video_device *vdev; + struct v4l2_subdev *sd; + int ret = 0; + + stream->pipe_size = 0; + while (stream->pipe_size < VIMC_STREAMER_PIPELINE_MAX_SIZE) { + if (!ved) { + vimc_streamer_pipeline_terminate(stream); + return -EINVAL; + } + stream->ved_pipeline[stream->pipe_size++] = ved; + + entity = vimc_get_source_entity(ved->ent); + /* Check if the end of the pipeline was reached*/ + if (!entity) + return 0; + + if (is_media_entity_v4l2_subdev(entity)) { + sd = media_entity_to_v4l2_subdev(entity); + ret = v4l2_subdev_call(sd, video, s_stream, 1); + if (ret && ret != -ENOIOCTLCMD) { + vimc_streamer_pipeline_terminate(stream); + return ret; + } + ved = v4l2_get_subdevdata(sd); + } else { + vdev = container_of(entity, + struct video_device, + entity); + ved = video_get_drvdata(vdev); + } + } + + vimc_streamer_pipeline_terminate(stream); + return -EINVAL; +} + +static int vimc_streamer_thread(void *data) +{ + struct vimc_stream *stream = data; + int i; + + set_freezable(); + set_current_state(TASK_UNINTERRUPTIBLE); + + for (;;) { + try_to_freeze(); + if (kthread_should_stop()) + break; + + for (i = stream->pipe_size - 1; i >= 0; i--) { + stream->frame = stream->ved_pipeline[i]->process_frame( + stream->ved_pipeline[i], + stream->frame); + if (!stream->frame) + break; + if (IS_ERR(stream->frame)) + break; + } + //wait for 60hz + schedule_timeout(HZ / 60); + } + + return 0; +} + +int vimc_streamer_s_stream(struct vimc_stream *stream, + struct vimc_ent_device *ved, + int enable) +{ + int ret; + + if (!stream || !ved) + return -EINVAL; + + if (enable) { + if (stream->kthread) + return 0; + + ret = vimc_streamer_pipeline_init(stream, ved); + if (ret) + return ret; + + stream->kthread = kthread_run(vimc_streamer_thread, stream, + "vimc-streamer thread"); + + if (IS_ERR(stream->kthread)) + return PTR_ERR(stream->kthread); + + } else { + if (!stream->kthread) + return 0; + + ret = kthread_stop(stream->kthread); + if (ret) + return ret; + + stream->kthread = NULL; + + vimc_streamer_pipeline_terminate(stream); + } + + return 0; +} +EXPORT_SYMBOL_GPL(vimc_streamer_s_stream); + +MODULE_DESCRIPTION("Virtual Media Controller Driver (VIMC) Streamer"); +MODULE_AUTHOR("Lucas A. M. Magalhães "); +MODULE_LICENSE("GPL"); diff --git a/drivers/media/platform/vimc/vimc-streamer.h b/drivers/media/platform/vimc/vimc-streamer.h new file mode 100644 index 000000000000..752af2e2d5a2 --- /dev/null +++ b/drivers/media/platform/vimc/vimc-streamer.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * vimc-streamer.h Virtual Media Controller Driver + * + * Copyright (C) 2018 Lucas A. M. Magalhães + * + */ + +#ifndef _VIMC_STREAMER_H_ +#define _VIMC_STREAMER_H_ + +#include + +#include "vimc-common.h" + +#define VIMC_STREAMER_PIPELINE_MAX_SIZE 16 + +struct vimc_stream { + struct media_pipeline pipe; + struct vimc_ent_device *ved_pipeline[VIMC_STREAMER_PIPELINE_MAX_SIZE]; + unsigned int pipe_size; + u8 *frame; + struct task_struct *kthread; +}; + +/** + * vimc_streamer_s_streamer - start/stop the stream + * + * @stream: the pointer to the stream to start or stop + * @ved: The last entity of the streamer pipeline + * @enable: any non-zero number start the stream, zero stop + * + */ +int vimc_streamer_s_stream(struct vimc_stream *stream, + struct vimc_ent_device *ved, + int enable); + +#endif //_VIMC_STREAMER_H_ From 273bef060472e2678d8c3ec50d361afa0e2c2b6c Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Mon, 21 Jan 2019 21:35:50 -0200 Subject: [PATCH 2449/2608] media: imx: csi: Disable CSI immediately after last EOF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2e0fe66e0a136252f4d89dbbccdcb26deb867eb8 upstream. Disable the CSI immediately after receiving the last EOF before stream off (and thus before disabling the IDMA channel). Do this by moving the wait for EOF completion into a new function csi_idmac_wait_last_eof(). This fixes a complete system hard lockup on the SabreAuto when streaming from the ADV7180, by repeatedly sending a stream off immediately followed by stream on: while true; do v4l2-ctl -d4 --stream-mmap --stream-count=3; done Eventually this either causes the system lockup or EOF timeouts at all subsequent stream on, until a system reset. The lockup occurs when disabling the IDMA channel at stream off. Disabling the CSI before disabling the IDMA channel appears to be a reliable fix for the hard lockup. Fixes: 4a34ec8e470cb ("[media] media: imx: Add CSI subdev driver") Reported-by: Gaël PORTAY Signed-off-by: Steve Longerbeam Cc: stable@vger.kernel.org # for 4.13 and up Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/imx/imx-media-csi.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/staging/media/imx/imx-media-csi.c b/drivers/staging/media/imx/imx-media-csi.c index 83ecb5b2fb9e..5bc8f84d09a2 100644 --- a/drivers/staging/media/imx/imx-media-csi.c +++ b/drivers/staging/media/imx/imx-media-csi.c @@ -538,7 +538,7 @@ out_put_ipu: return ret; } -static void csi_idmac_stop(struct csi_priv *priv) +static void csi_idmac_wait_last_eof(struct csi_priv *priv) { unsigned long flags; int ret; @@ -555,7 +555,10 @@ static void csi_idmac_stop(struct csi_priv *priv) &priv->last_eof_comp, msecs_to_jiffies(IMX_MEDIA_EOF_TIMEOUT)); if (ret == 0) v4l2_warn(&priv->sd, "wait last EOF timeout\n"); +} +static void csi_idmac_stop(struct csi_priv *priv) +{ devm_free_irq(priv->dev, priv->eof_irq, priv); devm_free_irq(priv->dev, priv->nfb4eof_irq, priv); @@ -681,6 +684,16 @@ idmac_stop: static void csi_stop(struct csi_priv *priv) { + if (priv->dest == IPU_CSI_DEST_IDMAC) + csi_idmac_wait_last_eof(priv); + + /* + * Disable the CSI asap, after syncing with the last EOF. + * Doing so after the IDMA channel is disabled has shown to + * create hard system-wide hangs. + */ + ipu_csi_disable(priv->csi); + if (priv->dest == IPU_CSI_DEST_IDMAC) { csi_idmac_stop(priv); @@ -688,8 +701,6 @@ static void csi_stop(struct csi_priv *priv) if (priv->fim) imx_media_fim_set_stream(priv->fim, NULL, false); } - - ipu_csi_disable(priv->csi); } static const struct csi_skip_desc csi_skip[12] = { From 9e0bb722d4f3807a971207fe35b9a358e124a858 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Mon, 21 Jan 2019 21:35:51 -0200 Subject: [PATCH 2450/2608] media: imx: csi: Stop upstream before disabling IDMA channel commit 4bc1ab41eee9d02ad2483bf8f51a7b72e3504eba upstream. Move upstream stream off to just after receiving the last EOF completion and disabling the CSI (and thus before disabling the IDMA channel) in csi_stop(). For symmetry also move upstream stream on to beginning of csi_start(). Doing this makes csi_s_stream() more symmetric with prp_s_stream() which will require the same change to fix a hard lockup. Signed-off-by: Steve Longerbeam Cc: stable@vger.kernel.org # for 4.13 and up Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/imx/imx-media-csi.c | 25 ++++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/staging/media/imx/imx-media-csi.c b/drivers/staging/media/imx/imx-media-csi.c index 5bc8f84d09a2..69df8b23227a 100644 --- a/drivers/staging/media/imx/imx-media-csi.c +++ b/drivers/staging/media/imx/imx-media-csi.c @@ -648,10 +648,16 @@ static int csi_start(struct csi_priv *priv) usleep_range(delay_usec, delay_usec + 1000); } + /* start upstream */ + ret = v4l2_subdev_call(priv->src_sd, video, s_stream, 1); + ret = (ret && ret != -ENOIOCTLCMD) ? ret : 0; + if (ret) + return ret; + if (priv->dest == IPU_CSI_DEST_IDMAC) { ret = csi_idmac_start(priv); if (ret) - return ret; + goto stop_upstream; } ret = csi_setup(priv); @@ -679,6 +685,8 @@ fim_off: idmac_stop: if (priv->dest == IPU_CSI_DEST_IDMAC) csi_idmac_stop(priv); +stop_upstream: + v4l2_subdev_call(priv->src_sd, video, s_stream, 0); return ret; } @@ -694,6 +702,9 @@ static void csi_stop(struct csi_priv *priv) */ ipu_csi_disable(priv->csi); + /* stop upstream */ + v4l2_subdev_call(priv->src_sd, video, s_stream, 0); + if (priv->dest == IPU_CSI_DEST_IDMAC) { csi_idmac_stop(priv); @@ -861,23 +872,13 @@ static int csi_s_stream(struct v4l2_subdev *sd, int enable) goto update_count; if (enable) { - /* upstream must be started first, before starting CSI */ - ret = v4l2_subdev_call(priv->src_sd, video, s_stream, 1); - ret = (ret && ret != -ENOIOCTLCMD) ? ret : 0; - if (ret) - goto out; - dev_dbg(priv->dev, "stream ON\n"); ret = csi_start(priv); - if (ret) { - v4l2_subdev_call(priv->src_sd, video, s_stream, 0); + if (ret) goto out; - } } else { dev_dbg(priv->dev, "stream OFF\n"); - /* CSI must be stopped first, then stop upstream */ csi_stop(priv); - v4l2_subdev_call(priv->src_sd, video, s_stream, 0); } update_count: From 1e6e41f5fcddac8a9334009fec068f4f7ec20723 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 15 Feb 2019 14:29:26 -0600 Subject: [PATCH 2451/2608] drm/radeon/evergreen_cs: fix missing break in switch statement commit cc5034a5d293dd620484d1d836aa16c6764a1c8c upstream. Add missing break statement in order to prevent the code from falling through to case CB_TARGET_MASK. This bug was found thanks to the ongoing efforts to enable -Wimplicit-fallthrough. Fixes: dd220a00e8bd ("drm/radeon/kms: add support for streamout v7") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/evergreen_cs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 54324330b91f..2f0a5bd50174 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -1299,6 +1299,7 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); + break; case CB_TARGET_MASK: track->cb_target_mask = radeon_get_ib_value(p, idx); track->cb_dirty = true; From 89dce6e457a14aa53fc0a83ec8f4206748a5c87a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Feb 2019 12:54:17 -0800 Subject: [PATCH 2452/2608] KVM: Call kvm_arch_memslots_updated() before updating memslots commit 152482580a1b0accb60676063a1ac57b2d12daf6 upstream. kvm_arch_memslots_updated() is at this point in time an x86-specific hook for handling MMIO generation wraparound. x86 stashes 19 bits of the memslots generation number in its MMIO sptes in order to avoid full page fault walks for repeat faults on emulated MMIO addresses. Because only 19 bits are used, wrapping the MMIO generation number is possible, if unlikely. kvm_arch_memslots_updated() alerts x86 that the generation has changed so that it can invalidate all MMIO sptes in case the effective MMIO generation has wrapped so as to avoid using a stale spte, e.g. a (very) old spte that was created with generation==0. Given that the purpose of kvm_arch_memslots_updated() is to prevent consuming stale entries, it needs to be called before the new generation is propagated to memslots. Invalidating the MMIO sptes after updating memslots means that there is a window where a vCPU could dereference the new memslots generation, e.g. 0, and incorrectly reuse an old MMIO spte that was created with (pre-wrap) generation==0. Fixes: e59dbe09f8e6 ("KVM: Introduce kvm_arch_memslots_updated()") Cc: Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/kvm_host.h | 2 +- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/s390/include/asm/kvm_host.h | 2 +- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 4 ++-- arch/x86/kvm/x86.c | 4 ++-- include/linux/kvm_host.h | 2 +- virt/kvm/arm/mmu.c | 2 +- virt/kvm/kvm_main.c | 7 +++++-- 9 files changed, 15 insertions(+), 12 deletions(-) diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index a9af1d2dcd69..673049bf29b6 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -1132,7 +1132,7 @@ static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e372ed871c51..e3ba58f64c3d 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -809,7 +809,7 @@ struct kvm_vcpu_arch { static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_exit(void) {} diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d660e784e445..3fdc0bb974d9 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -784,7 +784,7 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 72fac8646e9b..d2ae93faafe8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1121,7 +1121,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); -void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots); +void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 364d9895dd56..516e707961d7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5418,13 +5418,13 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); } -void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots) +void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) { /* * The very rare case: if the generation-number is round, * zap all shadow pages. */ - if (unlikely((slots->generation & MMIO_GEN_MASK) == 0)) { + if (unlikely((gen & MMIO_GEN_MASK) == 0)) { kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n"); kvm_mmu_invalidate_zap_all_pages(kvm); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b0e7621ddf01..ce5b3dc348ce 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8524,13 +8524,13 @@ out_free: return -ENOMEM; } -void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) +void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) { /* * memslots->generation has been incremented. * mmio generation may have reached its maximum value. */ - kvm_mmu_invalidate_mmio_sptes(kvm, slots); + kvm_mmu_invalidate_mmio_sptes(kvm, gen); } int kvm_arch_prepare_memory_region(struct kvm *kvm, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4f7f19c1dc0a..753c16633bac 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -625,7 +625,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont); int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages); -void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots); +void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, const struct kvm_userspace_memory_region *mem, diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index ec275b8472a9..225dc671ae31 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1955,7 +1955,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, return 0; } -void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) +void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) { } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9b79818758dc..66cc315efa6d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -856,6 +856,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, int as_id, struct kvm_memslots *slots) { struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id); + u64 gen; /* * Set the low bit in the generation, which disables SPTE caching @@ -878,9 +879,11 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, * space 0 will use generations 0, 4, 8, ... while * address space 1 will * use generations 2, 6, 10, 14, ... */ - slots->generation += KVM_ADDRESS_SPACE_NUM * 2 - 1; + gen = slots->generation + KVM_ADDRESS_SPACE_NUM * 2 - 1; - kvm_arch_memslots_updated(kvm, slots); + kvm_arch_memslots_updated(kvm, gen); + + slots->generation = gen; return old_memslots; } From c9ae735f52ffb2c17d884a65cd76559da1d64443 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Feb 2019 13:01:12 -0800 Subject: [PATCH 2453/2608] KVM: x86/mmu: Detect MMIO generation wrap in any address space commit e1359e2beb8b0a1188abc997273acbaedc8ee791 upstream. The check to detect a wrap of the MMIO generation explicitly looks for a generation number of zero. Now that unique memslots generation numbers are assigned to each address space, only address space 0 will get a generation number of exactly zero when wrapping. E.g. when address space 1 goes from 0x7fffe to 0x80002, the MMIO generation number will wrap to 0x2. Adjust the MMIO generation to strip the address space modifier prior to checking for a wrap. Fixes: 4bd518f1598d ("KVM: use separate generations for each address space") Cc: Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 516e707961d7..f97b533bc6e6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5420,11 +5420,28 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) { + gen &= MMIO_GEN_MASK; + /* - * The very rare case: if the generation-number is round, + * Shift to eliminate the "update in-progress" flag, which isn't + * included in the spte's generation number. + */ + gen >>= 1; + + /* + * Generation numbers are incremented in multiples of the number of + * address spaces in order to provide unique generations across all + * address spaces. Strip what is effectively the address space + * modifier prior to checking for a wrap of the MMIO generation so + * that a wrap in any address space is detected. + */ + gen &= ~((u64)KVM_ADDRESS_SPACE_NUM - 1); + + /* + * The very rare case: if the MMIO generation number has wrapped, * zap all shadow pages. */ - if (unlikely((gen & MMIO_GEN_MASK) == 0)) { + if (unlikely(gen == 0)) { kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n"); kvm_mmu_invalidate_zap_all_pages(kvm); } From 8dc2217d2feefe41d8a94e8d9046332b9ba882d3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Feb 2019 13:01:13 -0800 Subject: [PATCH 2454/2608] KVM: x86/mmu: Do not cache MMIO accesses while memslots are in flux commit ddfd1730fd829743e41213e32ccc8b4aa6dc8325 upstream. When installing new memslots, KVM sets bit 0 of the generation number to indicate that an update is in-progress. Until the update is complete, there are no guarantees as to whether a vCPU will see the old or the new memslots. Explicity prevent caching MMIO accesses so as to avoid using an access cached from the old memslots after the new memslots have been installed. Note that it is unclear whether or not disabling caching during the update window is strictly necessary as there is no definitive documentation as to what ordering guarantees KVM provides with respect to updating memslots. That being said, the MMIO spte code does not allow reusing sptes created while an update is in-progress, and the associated documentation explicitly states: We do not want to use an MMIO sptes created with an odd generation number, ... If KVM is unlucky and creates an MMIO spte while the low bit is 1, the next access to the spte will always be a cache miss. At the very least, disabling the per-vCPU MMIO cache during updates will make its behavior consistent with the MMIO spte behavior and documentation. Fixes: 56f17dd3fbc4 ("kvm: x86: fix stale mmio cache bug") Cc: Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index d4b59cf0dc51..c88305d997b0 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -136,6 +136,11 @@ static inline bool emul_is_noncanonical_address(u64 la, static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, unsigned access) { + u64 gen = kvm_memslots(vcpu->kvm)->generation; + + if (unlikely(gen & 1)) + return; + /* * If this is a shadow nested page table, the "GVA" is * actually a nGPA. @@ -143,7 +148,7 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK; vcpu->arch.access = access; vcpu->arch.mmio_gfn = gfn; - vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; + vcpu->arch.mmio_gen = gen; } static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu) From 3700eac374135d0e0839ba2839f362a1d127dc58 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 23 Jan 2019 14:39:23 -0800 Subject: [PATCH 2455/2608] KVM: nVMX: Sign extend displacements of VMX instr's mem operands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 946c522b603f281195af1df91837a1d4d1eb3bc9 upstream. The VMCS.EXIT_QUALIFCATION field reports the displacements of memory operands for various instructions, including VMX instructions, as a naturally sized unsigned value, but masks the value by the addr size, e.g. given a ModRM encoded as -0x28(%ebp), the -0x28 displacement is reported as 0xffffffd8 for a 32-bit address size. Despite some weird wording regarding sign extension, the SDM explicitly states that bits beyond the instructions address size are undefined: In all cases, bits of this field beyond the instruction’s address size are undefined. Failure to sign extend the displacement results in KVM incorrectly treating a negative displacement as a large positive displacement when the address size of the VMX instruction is smaller than KVM's native size, e.g. a 32-bit address size on a 64-bit KVM. The very original decoding, added by commit 064aea774768 ("KVM: nVMX: Decoding memory operands of VMX instructions"), sort of modeled sign extension by truncating the final virtual/linear address for a 32-bit address size. I.e. it messed up the effective address but made it work by adjusting the final address. When segmentation checks were added, the truncation logic was kept as-is and no sign extension logic was introduced. In other words, it kept calculating the wrong effective address while mostly generating the correct virtual/linear address. As the effective address is what's used in the segment limit checks, this results in KVM incorreclty injecting #GP/#SS faults due to non-existent segment violations when a nested VMM uses negative displacements with an address size smaller than KVM's native address size. Using the -0x28(%ebp) example, an EBP value of 0x1000 will result in KVM using 0x100000fd8 as the effective address when checking for a segment limit violation. This causes a 100% failure rate when running a 32-bit KVM build as L1 on top of a 64-bit KVM L0. Fixes: f9eb4af67c9d ("KVM: nVMX: VMX instructions: add checks for #GP/#SS exceptions") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8e5a977bf50e..21145cfd0ff0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7446,6 +7446,10 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, /* Addr = segment_base + offset */ /* offset = base + [index * scale] + displacement */ off = exit_qualification; /* holds the displacement */ + if (addr_size == 1) + off = (gva_t)sign_extend64(off, 31); + else if (addr_size == 0) + off = (gva_t)sign_extend64(off, 15); if (base_is_valid) off += kvm_register_read(vcpu, base_reg); if (index_is_valid) From 32d42b0fb24ad4e37eb0808ddc9d0f129ee3f477 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 23 Jan 2019 14:39:24 -0800 Subject: [PATCH 2456/2608] KVM: nVMX: Apply addr size mask to effective address for VMX instructions commit 8570f9e881e3fde98801bb3a47eef84dd934d405 upstream. The address size of an instruction affects the effective address, not the virtual/linear address. The final address may still be truncated, e.g. to 32-bits outside of long mode, but that happens irrespective of the address size, e.g. a 32-bit address size can yield a 64-bit virtual address when using FS/GS with a non-zero base. Fixes: 064aea774768 ("KVM: nVMX: Decoding memory operands of VMX instructions") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 21145cfd0ff0..3aafed4db34c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7455,20 +7455,41 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, if (index_is_valid) off += kvm_register_read(vcpu, index_reg)< Date: Wed, 23 Jan 2019 14:39:25 -0800 Subject: [PATCH 2457/2608] KVM: nVMX: Ignore limit checks on VMX instructions using flat segments commit 34333cc6c2cb021662fd32e24e618d1b86de95bf upstream. Regarding segments with a limit==0xffffffff, the SDM officially states: When the effective limit is FFFFFFFFH (4 GBytes), these accesses may or may not cause the indicated exceptions. Behavior is implementation-specific and may vary from one execution to another. In practice, all CPUs that support VMX ignore limit checks for "flat segments", i.e. an expand-up data or code segment with base=0 and limit=0xffffffff. This is subtly different than wrapping the effective address calculation based on the address size, as the flat segment behavior also applies to accesses that would wrap the 4g boundary, e.g. a 4-byte access starting at 0xffffffff will access linear addresses 0xffffffff, 0x0, 0x1 and 0x2. Fixes: f9eb4af67c9d ("KVM: nVMX: VMX instructions: add checks for #GP/#SS exceptions") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3aafed4db34c..229d5e39f5c0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7513,10 +7513,16 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, /* Protected mode: #GP(0)/#SS(0) if the segment is unusable. */ exn = (s.unusable != 0); - /* Protected mode: #GP(0)/#SS(0) if the memory - * operand is outside the segment limit. + + /* + * Protected mode: #GP(0)/#SS(0) if the memory operand is + * outside the segment limit. All CPUs that support VMX ignore + * limit checks for flat segments, i.e. segments with base==0, + * limit==0xffffffff and of type expand-up data or code. */ - exn = exn || (off + sizeof(u64) > s.limit); + if (!(s.base == 0 && s.limit == 0xffffffff && + ((s.type & 8) || !(s.type & 4)))) + exn = exn || (off + sizeof(u64) > s.limit); } if (exn) { kvm_queue_exception_e(vcpu, From b3aef39ba142ec1a1f3236770590c9f6e490841f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 18 Feb 2019 18:10:08 +0100 Subject: [PATCH 2458/2608] s390/setup: fix boot crash for machine without EDAT-1 commit 86a86804e4f18fc3880541b3d5a07f4df0fe29cb upstream. The fix to make WARN work in the early boot code created a problem on older machines without EDAT-1. The setup_lowcore_dat_on function uses the pointer from lowcore_ptr[0] to set the DAT bit in the new PSWs. That does not work if the kernel page table is set up with 4K pages as the prefix address maps to absolute zero. To make this work the PSWs need to be changed with via address 0 in form of the S390_lowcore definition. Reported-by: Guenter Roeck Tested-by: Cornelia Huck Fixes: 94f85ed3e2f8 ("s390/setup: fix early warning messages") Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/setup.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index a9f5323f2f51..5c2558cc6977 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -390,13 +390,12 @@ static void __init setup_lowcore_dat_off(void) static void __init setup_lowcore_dat_on(void) { - struct lowcore *lc; - - lc = lowcore_ptr[0]; - lc->external_new_psw.mask |= PSW_MASK_DAT; - lc->svc_new_psw.mask |= PSW_MASK_DAT; - lc->program_new_psw.mask |= PSW_MASK_DAT; - lc->io_new_psw.mask |= PSW_MASK_DAT; + __ctl_clear_bit(0, 28); + S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT; + S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT; + S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT; + S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT; + __ctl_set_bit(0, 28); } static struct resource code_resource = { From dee55b062e6b7d802f7549e8542ca245ea57c003 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 23 Mar 2019 14:35:32 +0100 Subject: [PATCH 2459/2608] Linux 4.14.108 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e3e2121718a8..170411b62525 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 107 +SUBLEVEL = 108 EXTRAVERSION = NAME = Petit Gorille From 3ce190bbca8cbacf3796aacad2f2f441725799d7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 7 Mar 2019 11:09:19 +0100 Subject: [PATCH 2460/2608] mmc: pxamci: fix enum type confusion commit e60a582bcde01158a64ff948fb799f21f5d31a11 upstream. clang points out several instances of mismatched types in this drivers, all coming from a single declaration: drivers/mmc/host/pxamci.c:193:15: error: implicit conversion from enumeration type 'enum dma_transfer_direction' to different enumeration type 'enum dma_data_direction' [-Werror,-Wenum-conversion] direction = DMA_DEV_TO_MEM; ~ ^~~~~~~~~~~~~~ drivers/mmc/host/pxamci.c:212:62: error: implicit conversion from enumeration type 'enum dma_data_direction' to different enumeration type 'enum dma_transfer_direction' [-Werror,-Wenum-conversion] tx = dmaengine_prep_slave_sg(chan, data->sg, host->dma_len, direction, The behavior is correct, so this must be a simply typo from dma_data_direction and dma_transfer_direction being similarly named types with a similar purpose. Fixes: 6464b7140951 ("mmc: pxamci: switch over to dmaengine use") Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Acked-by: Robert Jarzmik Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/pxamci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index c763b404510f..3e139692fe8f 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -181,7 +181,7 @@ static void pxamci_dma_irq(void *param); static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data) { struct dma_async_tx_descriptor *tx; - enum dma_data_direction direction; + enum dma_transfer_direction direction; struct dma_slave_config config; struct dma_chan *chan; unsigned int nob = data->blocks; From 47248fde59a60a31a55d489f4851836f0ac94295 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 18 Mar 2019 15:47:58 +0100 Subject: [PATCH 2461/2608] drm/vmwgfx: Don't double-free the mode stored in par->set_mode commit c2d311553855395764e2e5bf401d987ba65c2056 upstream. When calling vmw_fb_set_par(), the mode stored in par->set_mode gets free'd twice. The first free is in vmw_fb_kms_detach(), the second is near the end of vmw_fb_set_par() under the name of 'old_mode'. The mode-setting code only works correctly if the mode doesn't actually change. Removing 'old_mode' in favor of using par->set_mode directly fixes the problem. Cc: Fixes: a278724aa23c ("drm/vmwgfx: Implement fbdev on kms v2") Signed-off-by: Thomas Zimmermann Reviewed-by: Deepak Rawat Signed-off-by: Thomas Hellstrom Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_fb.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index d23a18aae476..3ba9b6ad0281 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -588,11 +588,9 @@ static int vmw_fb_set_par(struct fb_info *info) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }; - struct drm_display_mode *old_mode; struct drm_display_mode *mode; int ret; - old_mode = par->set_mode; mode = drm_mode_duplicate(vmw_priv->dev, &new_mode); if (!mode) { DRM_ERROR("Could not create new fb mode.\n"); @@ -603,11 +601,7 @@ static int vmw_fb_set_par(struct fb_info *info) mode->vdisplay = var->yres; vmw_guess_mode_timing(mode); - if (old_mode && drm_mode_equal(old_mode, mode)) { - drm_mode_destroy(vmw_priv->dev, mode); - mode = old_mode; - old_mode = NULL; - } else if (!vmw_kms_validate_mode_vram(vmw_priv, + if (!vmw_kms_validate_mode_vram(vmw_priv, mode->hdisplay * DIV_ROUND_UP(var->bits_per_pixel, 8), mode->vdisplay)) { @@ -677,8 +671,8 @@ static int vmw_fb_set_par(struct fb_info *info) schedule_delayed_work(&par->local_work, 0); out_unlock: - if (old_mode) - drm_mode_destroy(vmw_priv->dev, old_mode); + if (par->set_mode) + drm_mode_destroy(vmw_priv->dev, par->set_mode); par->set_mode = mode; drm_modeset_unlock_all(vmw_priv->dev); From 1e7b9a3143ad2a24c909351acecfdeafe17d0a9d Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 13 Mar 2019 10:03:17 +0100 Subject: [PATCH 2462/2608] iommu/amd: fix sg->dma_address for sg->offset bigger than PAGE_SIZE commit 4e50ce03976fbc8ae995a000c4b10c737467beaa upstream. Take into account that sg->offset can be bigger than PAGE_SIZE when setting segment sg->dma_address. Otherwise sg->dma_address will point at diffrent page, what makes DMA not possible with erros like this: xhci_hcd 0000:38:00.3: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0000 address=0x00000000fdaa70c0 flags=0x0020] xhci_hcd 0000:38:00.3: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0000 address=0x00000000fdaa7040 flags=0x0020] xhci_hcd 0000:38:00.3: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0000 address=0x00000000fdaa7080 flags=0x0020] xhci_hcd 0000:38:00.3: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0000 address=0x00000000fdaa7100 flags=0x0020] xhci_hcd 0000:38:00.3: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0000 address=0x00000000fdaa7000 flags=0x0020] Additinally with wrong sg->dma_address unmap_sg will free wrong pages, what what can cause crashes like this: Feb 28 19:27:45 kernel: BUG: Bad page state in process cinnamon pfn:39e8b1 Feb 28 19:27:45 kernel: Disabling lock debugging due to kernel taint Feb 28 19:27:45 kernel: flags: 0x2ffff0000000000() Feb 28 19:27:45 kernel: raw: 02ffff0000000000 0000000000000000 ffffffff00000301 0000000000000000 Feb 28 19:27:45 kernel: raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 Feb 28 19:27:45 kernel: page dumped because: nonzero _refcount Feb 28 19:27:45 kernel: Modules linked in: ccm fuse arc4 nct6775 hwmon_vid amdgpu nls_iso8859_1 nls_cp437 edac_mce_amd vfat fat kvm_amd ccp rng_core kvm mt76x0u mt76x0_common mt76x02_usb irqbypass mt76_usb mt76x02_lib mt76 crct10dif_pclmul crc32_pclmul chash mac80211 amd_iommu_v2 ghash_clmulni_intel gpu_sched i2c_algo_bit ttm wmi_bmof snd_hda_codec_realtek snd_hda_codec_generic drm_kms_helper snd_hda_codec_hdmi snd_hda_intel drm snd_hda_codec aesni_intel snd_hda_core snd_hwdep aes_x86_64 crypto_simd snd_pcm cfg80211 cryptd mousedev snd_timer glue_helper pcspkr r8169 input_leds realtek agpgart libphy rfkill snd syscopyarea sysfillrect sysimgblt fb_sys_fops soundcore sp5100_tco k10temp i2c_piix4 wmi evdev gpio_amdpt pinctrl_amd mac_hid pcc_cpufreq acpi_cpufreq sg ip_tables x_tables ext4(E) crc32c_generic(E) crc16(E) mbcache(E) jbd2(E) fscrypto(E) sd_mod(E) hid_generic(E) usbhid(E) hid(E) dm_mod(E) serio_raw(E) atkbd(E) libps2(E) crc32c_intel(E) ahci(E) libahci(E) libata(E) xhci_pci(E) xhci_hcd(E) Feb 28 19:27:45 kernel: scsi_mod(E) i8042(E) serio(E) bcache(E) crc64(E) Feb 28 19:27:45 kernel: CPU: 2 PID: 896 Comm: cinnamon Tainted: G B W E 4.20.12-arch1-1-custom #1 Feb 28 19:27:45 kernel: Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./B450M Pro4, BIOS P1.20 06/26/2018 Feb 28 19:27:45 kernel: Call Trace: Feb 28 19:27:45 kernel: dump_stack+0x5c/0x80 Feb 28 19:27:45 kernel: bad_page.cold.29+0x7f/0xb2 Feb 28 19:27:45 kernel: __free_pages_ok+0x2c0/0x2d0 Feb 28 19:27:45 kernel: skb_release_data+0x96/0x180 Feb 28 19:27:45 kernel: __kfree_skb+0xe/0x20 Feb 28 19:27:45 kernel: tcp_recvmsg+0x894/0xc60 Feb 28 19:27:45 kernel: ? reuse_swap_page+0x120/0x340 Feb 28 19:27:45 kernel: ? ptep_set_access_flags+0x23/0x30 Feb 28 19:27:45 kernel: inet_recvmsg+0x5b/0x100 Feb 28 19:27:45 kernel: __sys_recvfrom+0xc3/0x180 Feb 28 19:27:45 kernel: ? handle_mm_fault+0x10a/0x250 Feb 28 19:27:45 kernel: ? syscall_trace_enter+0x1d3/0x2d0 Feb 28 19:27:45 kernel: ? __audit_syscall_exit+0x22a/0x290 Feb 28 19:27:45 kernel: __x64_sys_recvfrom+0x24/0x30 Feb 28 19:27:45 kernel: do_syscall_64+0x5b/0x170 Feb 28 19:27:45 kernel: entry_SYSCALL_64_after_hwframe+0x44/0xa9 Cc: stable@vger.kernel.org Reported-and-tested-by: Jan Viktorin Reviewed-by: Alexander Duyck Signed-off-by: Stanislaw Gruszka Fixes: 80187fd39dcb ('iommu/amd: Optimize map_sg and unmap_sg') Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 78b97f31a1f2..bd339bfe0d15 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2548,7 +2548,12 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, /* Everything is mapped - write the right values into s->dma_address */ for_each_sg(sglist, s, nelems, i) { - s->dma_address += address + s->offset; + /* + * Add in the remaining piece of the scatter-gather offset that + * was masked out when we were determining the physical address + * via (sg_phys(s) & PAGE_MASK) earlier. + */ + s->dma_address += address + (s->offset & ~PAGE_MASK); s->dma_length = s->length; } From 2e5522ad5c1cca8848525721643f824cc651ca16 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 20 Mar 2019 09:46:58 +0100 Subject: [PATCH 2463/2608] libceph: wait for latest osdmap in ceph_monc_blacklist_add() commit bb229bbb3bf63d23128e851a1f3b85c083178fa1 upstream. Because map updates are distributed lazily, an OSD may not know about the new blacklist for quite some time after "osd blacklist add" command is completed. This makes it possible for a blacklisted but still alive client to overwrite a post-blacklist update, resulting in data corruption. Waiting for latest osdmap in ceph_monc_blacklist_add() and thus using the post-blacklist epoch for all post-blacklist requests ensures that all such requests "wait" for the blacklist to come into force on their respective OSDs. Cc: stable@vger.kernel.org Fixes: 6305a3b41515 ("libceph: support for blacklisting clients") Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman Signed-off-by: Greg Kroah-Hartman --- include/linux/ceph/libceph.h | 2 ++ net/ceph/ceph_common.c | 18 +++++++++++++++++- net/ceph/mon_client.c | 9 +++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index d3b04f9589a9..c311cd13ea7d 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -291,6 +291,8 @@ extern void ceph_destroy_client(struct ceph_client *client); extern int __ceph_open_session(struct ceph_client *client, unsigned long started); extern int ceph_open_session(struct ceph_client *client); +int ceph_wait_for_latest_osdmap(struct ceph_client *client, + unsigned long timeout); /* pagevec.c */ extern void ceph_release_page_vector(struct page **pages, int num_pages); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index cdb5b693a135..1001377ae428 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -720,7 +720,6 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started) } EXPORT_SYMBOL(__ceph_open_session); - int ceph_open_session(struct ceph_client *client) { int ret; @@ -736,6 +735,23 @@ int ceph_open_session(struct ceph_client *client) } EXPORT_SYMBOL(ceph_open_session); +int ceph_wait_for_latest_osdmap(struct ceph_client *client, + unsigned long timeout) +{ + u64 newest_epoch; + int ret; + + ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch); + if (ret) + return ret; + + if (client->osdc.osdmap->epoch >= newest_epoch) + return 0; + + ceph_osdc_maybe_request_map(&client->osdc); + return ceph_monc_wait_osdmap(&client->monc, newest_epoch, timeout); +} +EXPORT_SYMBOL(ceph_wait_for_latest_osdmap); static int __init init_ceph_lib(void) { diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index f14498a7eaec..daca0af59942 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -922,6 +922,15 @@ int ceph_monc_blacklist_add(struct ceph_mon_client *monc, mutex_unlock(&monc->mutex); ret = wait_generic_request(req); + if (!ret) + /* + * Make sure we have the osdmap that includes the blacklist + * entry. This is needed to ensure that the OSDs pick up the + * new blacklist before processing any future requests from + * this client. + */ + ret = ceph_wait_for_latest_osdmap(monc->client, 0); + out: put_generic_request(req); return ret; From 6a502107f716ba502d681d23afa779a28b83ca0f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 11 Mar 2019 15:04:18 +0100 Subject: [PATCH 2464/2608] udf: Fix crash on IO error during truncate commit d3ca4651d05c0ff7259d087d8c949bcf3e14fb46 upstream. When truncate(2) hits IO error when reading indirect extent block the code just bugs with: kernel BUG at linux-4.15.0/fs/udf/truncate.c:249! ... Fix the problem by bailing out cleanly in case of IO error. CC: stable@vger.kernel.org Reported-by: jean-luc malet Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/udf/truncate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 42b8c57795cb..c6ce7503a329 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -260,6 +260,9 @@ void udf_truncate_extents(struct inode *inode) epos.block = eloc; epos.bh = udf_tread(sb, udf_get_lb_pblock(sb, &eloc, 0)); + /* Error reading indirect block? */ + if (!epos.bh) + return; if (elen) indirect_ext_len = (elen + sb->s_blocksize - 1) >> From a3c6248c8a2ea18a151da77ea09bbbf7dc96ce59 Mon Sep 17 00:00:00 2001 From: Yifeng Li Date: Tue, 5 Mar 2019 06:00:22 +0800 Subject: [PATCH 2465/2608] mips: loongson64: lemote-2f: Add IRQF_NO_SUSPEND to "cascade" irqaction. commit 5f5f67da9781770df0403269bc57d7aae608fecd upstream. Timekeeping IRQs from CS5536 MFGPT are routed to i8259, which then triggers the "cascade" IRQ on MIPS CPU. Without IRQF_NO_SUSPEND in cascade_irqaction, MFGPT interrupts will be masked in suspend mode, and the machine would be unable to resume once suspended. Previously, MIPS IRQs were not disabled properly, so the original code appeared to work. Commit a3e6c1eff5 ("MIPS: IRQ: Fix disable_irq on CPU IRQs") uncovers the bug. To fix it, add IRQF_NO_SUSPEND to cascade_irqaction. This commit is functionally identical to 0add9c2f1cff ("MIPS: Loongson-3: Add IRQF_NO_SUSPEND to Cascade irqaction"), but it forgot to apply the same fix to Loongson2. Signed-off-by: Yifeng Li Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: Jiaxun Yang Cc: Huacai Chen Cc: Ralf Baechle Cc: James Hogan Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v3.19+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/loongson64/lemote-2f/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/loongson64/lemote-2f/irq.c b/arch/mips/loongson64/lemote-2f/irq.c index 9e33e45aa17c..b213cecb8e3a 100644 --- a/arch/mips/loongson64/lemote-2f/irq.c +++ b/arch/mips/loongson64/lemote-2f/irq.c @@ -103,7 +103,7 @@ static struct irqaction ip6_irqaction = { static struct irqaction cascade_irqaction = { .handler = no_action, .name = "cascade", - .flags = IRQF_NO_THREAD, + .flags = IRQF_NO_THREAD | IRQF_NO_SUSPEND, }; void __init mach_init_irq(void) From 005b0a33163e4436e0d80d24d35d38f782fc80a7 Mon Sep 17 00:00:00 2001 From: Yasha Cherikovsky Date: Fri, 8 Mar 2019 14:58:51 +0200 Subject: [PATCH 2466/2608] MIPS: Ensure ELF appended dtb is relocated commit 3f0a53bc6482fb09770982a8447981260ea258dc upstream. This fixes booting with the combination of CONFIG_RELOCATABLE=y and CONFIG_MIPS_ELF_APPENDED_DTB=y. Sections that appear after the relocation table are not relocated on system boot (except .bss, which has special handling). With CONFIG_MIPS_ELF_APPENDED_DTB, the dtb is part of the vmlinux ELF, so it must be relocated together with everything else. Fixes: 069fd766271d ("MIPS: Reserve space for relocation table") Signed-off-by: Yasha Cherikovsky Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: Paul Burton Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v4.7+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/vmlinux.lds.S | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 971a504001c2..36f2e860ba3e 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -140,6 +140,13 @@ SECTIONS PERCPU_SECTION(1 << CONFIG_MIPS_L1_CACHE_SHIFT) #endif +#ifdef CONFIG_MIPS_ELF_APPENDED_DTB + .appended_dtb : AT(ADDR(.appended_dtb) - LOAD_OFFSET) { + *(.appended_dtb) + KEEP(*(.appended_dtb)) + } +#endif + #ifdef CONFIG_RELOCATABLE . = ALIGN(4); @@ -164,11 +171,6 @@ SECTIONS __appended_dtb = .; /* leave space for appended DTB */ . += 0x100000; -#elif defined(CONFIG_MIPS_ELF_APPENDED_DTB) - .appended_dtb : AT(ADDR(.appended_dtb) - LOAD_OFFSET) { - *(.appended_dtb) - KEEP(*(.appended_dtb)) - } #endif /* * Align to 64K in attempt to eliminate holes before the From 88870813ee29e346b5e831987f677060fda384f9 Mon Sep 17 00:00:00 2001 From: Archer Yan Date: Fri, 8 Mar 2019 03:29:19 +0000 Subject: [PATCH 2467/2608] MIPS: Fix kernel crash for R6 in jump label branch function commit 47c25036b60f27b86ab44b66a8861bcf81cde39b upstream. Insert Branch instruction instead of NOP to make sure assembler don't patch code in forbidden slot. In jump label function, it might be possible to patch Control Transfer Instructions(CTIs) into forbidden slot, which will generate Reserved Instruction exception in MIPS release 6. Signed-off-by: Archer Yan Reviewed-by: Paul Burton [paul.burton@mips.com: - Add MIPS prefix to subject. - Mark for stable from v4.0, which introduced r6 support, onwards.] Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/jump_label.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index e77672539e8e..e4456e450f94 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -21,15 +21,15 @@ #endif #ifdef CONFIG_CPU_MICROMIPS -#define NOP_INSN "nop32" +#define B_INSN "b32" #else -#define NOP_INSN "nop" +#define B_INSN "b" #endif static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\t" NOP_INSN "\n\t" - "nop\n\t" + asm_volatile_goto("1:\t" B_INSN " 2f\n\t" + "2:\tnop\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" ".popsection\n\t" From 9d00ccc555ff606d6a9fc1473447946962fb54f0 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 20 Mar 2019 13:41:50 -0500 Subject: [PATCH 2468/2608] scsi: ibmvscsi: Protect ibmvscsi_head from concurrent modificaiton commit 7205981e045e752ccf96cf6ddd703a98c59d4339 upstream. For each ibmvscsi host created during a probe or destroyed during a remove we either add or remove that host to/from the global ibmvscsi_head list. This runs the risk of concurrent modification. This patch adds a simple spinlock around the list modification calls to prevent concurrent updates as is done similarly in the ibmvfc driver and ipr driver. Fixes: 32d6e4b6e4ea ("scsi: ibmvscsi: add vscsi hosts to global list_head") Cc: # v4.10+ Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi/ibmvscsi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 53eb27731373..213c565f4bbf 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -96,6 +96,7 @@ static int client_reserve = 1; static char partition_name[96] = "UNKNOWN"; static unsigned int partition_number = -1; static LIST_HEAD(ibmvscsi_head); +static DEFINE_SPINLOCK(ibmvscsi_driver_lock); static struct scsi_transport_template *ibmvscsi_transport_template; @@ -2274,7 +2275,9 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) } dev_set_drvdata(&vdev->dev, hostdata); + spin_lock(&ibmvscsi_driver_lock); list_add_tail(&hostdata->host_list, &ibmvscsi_head); + spin_unlock(&ibmvscsi_driver_lock); return 0; add_srp_port_failed: @@ -2296,7 +2299,9 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) static int ibmvscsi_remove(struct vio_dev *vdev) { struct ibmvscsi_host_data *hostdata = dev_get_drvdata(&vdev->dev); + spin_lock(&ibmvscsi_driver_lock); list_del(&hostdata->host_list); + spin_unlock(&ibmvscsi_driver_lock); unmap_persist_bufs(hostdata); release_event_pool(&hostdata->pool, hostdata); ibmvscsi_release_crq_queue(&hostdata->queue, hostdata, From 95712a194a80d54817a5edaf54cf761e002e6d55 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 20 Mar 2019 13:41:51 -0500 Subject: [PATCH 2469/2608] scsi: ibmvscsi: Fix empty event pool access during host removal commit 7f5203c13ba8a7b7f9f6ecfe5a4d5567188d7835 upstream. The event pool used for queueing commands is destroyed fairly early in the ibmvscsi_remove() code path. Since, this happens prior to the call so scsi_remove_host() it is possible for further calls to queuecommand to be processed which manifest as a panic due to a NULL pointer dereference as seen here: PANIC: "Unable to handle kernel paging request for data at address 0x00000000" Context process backtrace: DSISR: 0000000042000000 ????Syscall Result: 0000000000000000 4 [c000000002cb3820] memcpy_power7 at c000000000064204 [Link Register] [c000000002cb3820] ibmvscsi_send_srp_event at d000000003ed14a4 5 [c000000002cb3920] ibmvscsi_send_srp_event at d000000003ed14a4 [ibmvscsi] ?(unreliable) 6 [c000000002cb39c0] ibmvscsi_queuecommand at d000000003ed2388 [ibmvscsi] 7 [c000000002cb3a70] scsi_dispatch_cmd at d00000000395c2d8 [scsi_mod] 8 [c000000002cb3af0] scsi_request_fn at d00000000395ef88 [scsi_mod] 9 [c000000002cb3be0] __blk_run_queue at c000000000429860 10 [c000000002cb3c10] blk_delay_work at c00000000042a0ec 11 [c000000002cb3c40] process_one_work at c0000000000dac30 12 [c000000002cb3cd0] worker_thread at c0000000000db110 13 [c000000002cb3d80] kthread at c0000000000e3378 14 [c000000002cb3e30] ret_from_kernel_thread at c00000000000982c The kernel buffer log is overfilled with this log: [11261.952732] ibmvscsi: found no event struct in pool! This patch reorders the operations during host teardown. Start by calling the SRP transport and Scsi_Host remove functions to flush any outstanding work and set the host offline. LLDD teardown follows including destruction of the event pool, freeing the Command Response Queue (CRQ), and unmapping any persistent buffers. The event pool destruction is protected by the scsi_host lock, and the pool is purged prior of any requests for which we never received a response. Finally, move the removal of the scsi host from our global list to the end so that the host is easily locatable for debugging purposes during teardown. Cc: # v2.6.12+ Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi/ibmvscsi.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 213c565f4bbf..07c23bbd968c 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -2299,17 +2299,27 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) static int ibmvscsi_remove(struct vio_dev *vdev) { struct ibmvscsi_host_data *hostdata = dev_get_drvdata(&vdev->dev); - spin_lock(&ibmvscsi_driver_lock); - list_del(&hostdata->host_list); - spin_unlock(&ibmvscsi_driver_lock); - unmap_persist_bufs(hostdata); + unsigned long flags; + + srp_remove_host(hostdata->host); + scsi_remove_host(hostdata->host); + + purge_requests(hostdata, DID_ERROR); + + spin_lock_irqsave(hostdata->host->host_lock, flags); release_event_pool(&hostdata->pool, hostdata); + spin_unlock_irqrestore(hostdata->host->host_lock, flags); + ibmvscsi_release_crq_queue(&hostdata->queue, hostdata, max_events); kthread_stop(hostdata->work_thread); - srp_remove_host(hostdata->host); - scsi_remove_host(hostdata->host); + unmap_persist_bufs(hostdata); + + spin_lock(&ibmvscsi_driver_lock); + list_del(&hostdata->host_list); + spin_unlock(&ibmvscsi_driver_lock); + scsi_host_put(hostdata->host); return 0; From a7e830047886221d314096183159cd52fc1d7a31 Mon Sep 17 00:00:00 2001 From: Chen Jie Date: Fri, 15 Mar 2019 03:44:38 +0000 Subject: [PATCH 2470/2608] futex: Ensure that futex address is aligned in handle_futex_death() commit 5a07168d8d89b00fe1760120714378175b3ef992 upstream. The futex code requires that the user space addresses of futexes are 32bit aligned. sys_futex() checks this in futex_get_keys() but the robust list code has no alignment check in place. As a consequence the kernel crashes on architectures with strict alignment requirements in handle_futex_death() when trying to cmpxchg() on an unaligned futex address which was retrieved from the robust list. [ tglx: Rewrote changelog, proper sizeof() based alignement check and add comment ] Fixes: 0771dfefc9e5 ("[PATCH] lightweight robust futexes: core") Signed-off-by: Chen Jie Signed-off-by: Thomas Gleixner Cc: Cc: Cc: Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1552621478-119787-1-git-send-email-chenjie6@huawei.com Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/futex.c b/kernel/futex.c index 22f83064abb3..f2fa48c6c476 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3450,6 +3450,10 @@ int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) { u32 uval, uninitialized_var(nval), mval; + /* Futex address must be 32bit aligned */ + if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0) + return -1; + retry: if (get_user(uval, uaddr)) return -1; From 060c8899d4f992f35a5a0e9306e07da4160a270e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 4 Mar 2019 15:13:21 +0200 Subject: [PATCH 2471/2608] perf probe: Fix getting the kernel map commit eaeffeb9838a7c0dec981d258666bfcc0fa6a947 upstream. Since commit 4d99e4136580 ("perf machine: Workaround missing maps for x86 PTI entry trampolines"), perf tools has been creating more than one kernel map, however 'perf probe' assumed there could be only one. Fix by using machine__kernel_map() to get the main kernel map. Signed-off-by: Adrian Hunter Tested-by: Joseph Qi Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Greg Kroah-Hartman Cc: Jiufei Xue Cc: Peter Zijlstra Cc: stable@vger.kernel.org Cc: Xu Yu Fixes: 4d99e4136580 ("perf machine: Workaround missing maps for x86 PTI entry trampolines") Fixes: d83212d5dd67 ("kallsyms, x86: Export addresses of PTI entry trampolines") Link: http://lkml.kernel.org/r/2ed432de-e904-85d2-5c36-5897ddc5b23b@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/probe-event.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 68786bb7790e..6670e12a2bb3 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -169,8 +169,10 @@ static struct map *kernel_get_module_map(const char *module) if (module && strchr(module, '/')) return dso__new_map(module); - if (!module) - module = "kernel"; + if (!module) { + pos = machine__kernel_map(host_machine); + return map__get(pos); + } for (pos = maps__first(maps); pos; pos = map__next(pos)) { /* short_name is "[module]" */ From d431ba69113dd23256b5f23d98c06ceb5ef5f056 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Mar 2019 19:09:38 -0500 Subject: [PATCH 2472/2608] objtool: Move objtool_file struct off the stack commit 0c671812f152b628bd87c0af49da032cc2a2c319 upstream. Objtool uses over 512k of stack, thanks to the hash table embedded in the objtool_file struct. This causes an unnecessarily large stack allocation and breaks users with low stack limits. Move the struct off the stack. Fixes: 042ba73fe7eb ("objtool: Add several performance improvements") Reported-by: Vassili Karpov Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/df92dcbc4b84b02ffa252f46876df125fb56e2d7.1552954176.git.jpoimboe@redhat.com Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e128d1c71c30..3ff025b64527 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2132,9 +2132,10 @@ static void cleanup(struct objtool_file *file) elf_close(file->elf); } +static struct objtool_file file; + int check(const char *_objname, bool orc) { - struct objtool_file file; int ret, warnings = 0; objname = _objname; From a37fe2be55b6c965e616f5d46e2490005355d427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 24 Oct 2018 18:48:24 +0300 Subject: [PATCH 2473/2608] ALSA: x86: Fix runtime PM for hdmi-lpe-audio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8dfb839cfe737a17def8e5f88ee13c295230364a upstream. Commit 46e831abe864 ("drm/i915/lpe: Mark LPE audio runtime pm as "no callbacks"") broke runtime PM with lpe audio. We can no longer runtime suspend the GPU since the sysfs power/control for the lpe-audio device no longer exists and the device is considered always active. We can fix this by not marking the device as active. Cc: Chris Wilson Cc: Takashi Iwai Cc: Pierre-Louis Bossart Fixes: 46e831abe864 ("drm/i915/lpe: Mark LPE audio runtime pm as "no callbacks"") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181024154825.18185-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson Acked-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/x86/intel_hdmi_audio.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index 8b7abbd69116..88fe5eb4516f 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1887,7 +1887,6 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) pm_runtime_use_autosuspend(&pdev->dev); pm_runtime_mark_last_busy(&pdev->dev); - pm_runtime_set_active(&pdev->dev); dev_dbg(&pdev->dev, "%s: handle pending notification\n", __func__); for_each_port(card_ctx, port) { From 03c5cdb620217763e3ef0012e54508f4ec6f88ef Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Thu, 14 Mar 2019 23:19:22 -0400 Subject: [PATCH 2474/2608] ext4: fix NULL pointer dereference while journal is aborted commit fa30dde38aa8628c73a6dded7cb0bba38c27b576 upstream. We see the following NULL pointer dereference while running xfstests generic/475: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 PGD 8000000c84bad067 P4D 8000000c84bad067 PUD c84e62067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 7 PID: 9886 Comm: fsstress Kdump: loaded Not tainted 5.0.0-rc8 #10 RIP: 0010:ext4_do_update_inode+0x4ec/0x760 ... Call Trace: ? jbd2_journal_get_write_access+0x42/0x50 ? __ext4_journal_get_write_access+0x2c/0x70 ? ext4_truncate+0x186/0x3f0 ext4_mark_iloc_dirty+0x61/0x80 ext4_mark_inode_dirty+0x62/0x1b0 ext4_truncate+0x186/0x3f0 ? unmap_mapping_pages+0x56/0x100 ext4_setattr+0x817/0x8b0 notify_change+0x1df/0x430 do_truncate+0x5e/0x90 ? generic_permission+0x12b/0x1a0 This is triggered because the NULL pointer handle->h_transaction was dereferenced in function ext4_update_inode_fsync_trans(). I found that the h_transaction was set to NULL in jbd2__journal_restart but failed to attached to a new transaction while the journal is aborted. Fix this by checking the handle before updating the inode. Fixes: b436b9bef84d ("ext4: Wait for proper transaction commit on fsync") Signed-off-by: Jiufei Xue Signed-off-by: Theodore Ts'o Reviewed-by: Joseph Qi Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4_jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 48143e32411c..1437f62d068c 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -387,7 +387,7 @@ static inline void ext4_update_inode_fsync_trans(handle_t *handle, { struct ext4_inode_info *ei = EXT4_I(inode); - if (ext4_handle_valid(handle)) { + if (ext4_handle_valid(handle) && !is_handle_aborted(handle)) { ei->i_sync_tid = handle->h_transaction->t_tid; if (datasync) ei->i_datasync_tid = handle->h_transaction->t_tid; From 766b823eafb885aaa25ac979f5e5a10f051c9634 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Thu, 14 Mar 2019 23:20:25 -0400 Subject: [PATCH 2475/2608] ext4: fix data corruption caused by unaligned direct AIO commit 372a03e01853f860560eade508794dd274e9b390 upstream. Ext4 needs to serialize unaligned direct AIO because the zeroing of partial blocks of two competing unaligned AIOs can result in data corruption. However it decides not to serialize if the potentially unaligned aio is past i_size with the rationale that no pending writes are possible past i_size. Unfortunately if the i_size is not block aligned and the second unaligned write lands past i_size, but still into the same block, it has the potential of corrupting the previous unaligned write to the same block. This is (very simplified) reproducer from Frank // 41472 = (10 * 4096) + 512 // 37376 = 41472 - 4096 ftruncate(fd, 41472); io_prep_pwrite(iocbs[0], fd, buf[0], 4096, 37376); io_prep_pwrite(iocbs[1], fd, buf[1], 4096, 41472); io_submit(io_ctx, 1, &iocbs[1]); io_submit(io_ctx, 1, &iocbs[2]); io_getevents(io_ctx, 2, 2, events, NULL); Without this patch the 512B range from 40960 up to the start of the second unaligned write (41472) is going to be zeroed overwriting the data written by the first write. This is a data corruption. 00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00009200 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 * 0000a000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0000a200 31 31 31 31 31 31 31 31 31 31 31 31 31 31 31 31 With this patch the data corruption is avoided because we will recognize the unaligned_aio and wait for the unwritten extent conversion. 00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00009200 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 * 0000a200 31 31 31 31 31 31 31 31 31 31 31 31 31 31 31 31 * 0000b200 Reported-by: Frank Sorenson Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Fixes: e9e3bcecf44c ("ext4: serialize unaligned asynchronous DIO") Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 5cb9aa3ad249..1913c69498c1 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -123,7 +123,7 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos) struct super_block *sb = inode->i_sb; int blockmask = sb->s_blocksize - 1; - if (pos >= i_size_read(inode)) + if (pos >= ALIGN(i_size_read(inode), sb->s_blocksize)) return 0; if ((pos | iov_iter_alignment(from)) & blockmask) From 33fb49969357376c93a20ba6fd55087b8c181ae6 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Sat, 23 Mar 2019 11:43:05 -0400 Subject: [PATCH 2476/2608] ext4: brelse all indirect buffer in ext4_ind_remove_space() commit 674a2b27234d1b7afcb0a9162e81b2e53aeef217 upstream. All indirect buffers get by ext4_find_shared() should be released no mater the branch should be freed or not. But now, we forget to release the lower depth indirect buffers when removing space from the same higher depth indirect block. It will lead to buffer leak and futher more, it may lead to quota information corruption when using old quota, consider the following case. - Create and mount an empty ext4 filesystem without extent and quota features, - quotacheck and enable the user & group quota, - Create some files and write some data to them, and then punch hole to some files of them, it may trigger the buffer leak problem mentioned above. - Disable quota and run quotacheck again, it will create two new aquota files and write the checked quota information to them, which probably may reuse the freed indirect block(the buffer and page cache was not freed) as data block. - Enable quota again, it will invoke vfs_load_quota_inode()->invalidate_bdev() to try to clean unused buffers and pagecache. Unfortunately, because of the buffer of quota data block is still referenced, quota code cannot read the up to date quota info from the device and lead to quota information corruption. This problem can be reproduced by xfstests generic/231 on ext3 file system or ext4 file system without extent and quota features. This patch fix this problem by releasing the missing indirect buffers, in ext4_ind_remove_space(). Reported-by: Hulk Robot Signed-off-by: zhangyi (F) Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/indirect.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index bf7fa1507e81..9e96a0bd08d9 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -1387,10 +1387,14 @@ end_range: partial->p + 1, partial2->p, (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); - BUFFER_TRACE(partial2->bh, "call brelse"); - brelse(partial2->bh); + while (partial > chain) { + BUFFER_TRACE(partial->bh, "call brelse"); + brelse(partial->bh); + } + while (partial2 > chain2) { + BUFFER_TRACE(partial2->bh, "call brelse"); + brelse(partial2->bh); + } return 0; } From 3616a46e4622df40e2d9c1a22c305e769eff9775 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 18 Dec 2018 08:37:08 -0500 Subject: [PATCH 2477/2608] media: v4l2-ctrls.c/uvc: zero v4l2_event commit f45f3f753b0a3d739acda8e311b4f744d82dc52a upstream. Control events can leak kernel memory since they do not fully zero the event. The same code is present in both v4l2-ctrls.c and uvc_ctrl.c, so fix both. It appears that all other event code is properly zeroing the structure, it's these two places. Signed-off-by: Hans Verkuil Reported-by: syzbot+4f021cf3697781dbd9fb@syzkaller.appspotmail.com Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/uvc/uvc_ctrl.c | 2 +- drivers/media/v4l2-core/v4l2-ctrls.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index 20397aba6849..d92967e2e385 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -1203,7 +1203,7 @@ static void uvc_ctrl_fill_event(struct uvc_video_chain *chain, __uvc_query_v4l2_ctrl(chain, ctrl, mapping, &v4l2_ctrl); - memset(ev->reserved, 0, sizeof(ev->reserved)); + memset(ev, 0, sizeof(*ev)); ev->type = V4L2_EVENT_CTRL; ev->id = v4l2_ctrl.id; ev->u.ctrl.value = value; diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index 8033d6f73501..07bd2008ae4b 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -1239,7 +1239,7 @@ static u32 user_flags(const struct v4l2_ctrl *ctrl) static void fill_event(struct v4l2_event *ev, struct v4l2_ctrl *ctrl, u32 changes) { - memset(ev->reserved, 0, sizeof(ev->reserved)); + memset(ev, 0, sizeof(*ev)); ev->type = V4L2_EVENT_CTRL; ev->id = ctrl->id; ev->u.ctrl.changes = changes; From 86384a1fa3e5389e58d8a6673ed09143abe1354e Mon Sep 17 00:00:00 2001 From: Myungho Jung Date: Tue, 22 Jan 2019 00:33:26 -0800 Subject: [PATCH 2478/2608] Bluetooth: hci_uart: Check if socket buffer is ERR_PTR in h4_recv_buf() commit 1dc2d785156cbdc80806c32e8d2c7c735d0b4721 upstream. h4_recv_buf() callers store the return value to socket buffer and recursively pass the buffer to h4_recv_buf() without protection. So, ERR_PTR returned from h4_recv_buf() can be dereferenced, if called again before setting the socket buffer to NULL from previous error. Check if skb is ERR_PTR in h4_recv_buf(). Reported-by: syzbot+017a32f149406df32703@syzkaller.appspotmail.com Signed-off-by: Myungho Jung Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_h4.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index 3b82a87224a9..d428117c97c3 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c @@ -174,6 +174,10 @@ struct sk_buff *h4_recv_buf(struct hci_dev *hdev, struct sk_buff *skb, struct hci_uart *hu = hci_get_drvdata(hdev); u8 alignment = hu->alignment ? hu->alignment : 1; + /* Check for error from previous call */ + if (IS_ERR(skb)) + skb = NULL; + while (count) { int i, len; From 3df00eb895f8ea16ccbfb6db49dc905f99ac9d17 Mon Sep 17 00:00:00 2001 From: Myungho Jung Date: Sat, 2 Feb 2019 16:56:36 -0800 Subject: [PATCH 2479/2608] Bluetooth: Fix decrementing reference count twice in releasing socket commit e20a2e9c42c9e4002d9e338d74e7819e88d77162 upstream. When releasing socket, it is possible to enter hci_sock_release() and hci_sock_dev_event(HCI_DEV_UNREG) at the same time in different thread. The reference count of hdev should be decremented only once from one of them but if storing hdev to local variable in hci_sock_release() before detached from socket and setting to NULL in hci_sock_dev_event(), hci_dev_put(hdev) is unexpectedly called twice. This is resolved by referencing hdev from socket after bt_sock_unlink() in hci_sock_release(). Reported-by: syzbot+fdc00003f4efff43bc5b@syzkaller.appspotmail.com Signed-off-by: Myungho Jung Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_sock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 65d734c165bd..4a05235929b9 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -826,8 +826,6 @@ static int hci_sock_release(struct socket *sock) if (!sk) return 0; - hdev = hci_pi(sk)->hdev; - switch (hci_pi(sk)->channel) { case HCI_CHANNEL_MONITOR: atomic_dec(&monitor_promisc); @@ -849,6 +847,7 @@ static int hci_sock_release(struct socket *sock) bt_sock_unlink(&hci_sk_list, sk); + hdev = hci_pi(sk)->hdev; if (hdev) { if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { /* When releasing a user channel exclusive access, From 6ea83d9338c181de28ca60cbf221b0a4b94c4fd9 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Wed, 6 Feb 2019 12:54:16 -0500 Subject: [PATCH 2480/2608] Bluetooth: hci_ldisc: Initialize hci_dev before open() commit 32a7b4cbe93b0a0ef7e63d31ca69ce54736c4412 upstream. The hci_dev struct hdev is referenced in work queues and timers started by open() in some protocols. This creates a race between the initialization function and the work or timer which can result hdev being dereferenced while it is still null. The syzbot report contains a reliable reproducer which causes a null pointer dereference of hdev in hci_uart_write_work() by making the memory allocation for hdev fail. To fix this, ensure hdev is valid from before calling a protocol's open() until after calling a protocol's close(). Reported-by: syzbot+257790c15bcdef6fe00c@syzkaller.appspotmail.com Signed-off-by: Jeremy Cline Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_ldisc.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 30bbe19b4b85..6a3a3011b38e 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -207,11 +207,11 @@ static void hci_uart_init_work(struct work_struct *work) err = hci_register_dev(hu->hdev); if (err < 0) { BT_ERR("Can't register HCI device"); + clear_bit(HCI_UART_PROTO_READY, &hu->flags); + hu->proto->close(hu); hdev = hu->hdev; hu->hdev = NULL; hci_free_dev(hdev); - clear_bit(HCI_UART_PROTO_READY, &hu->flags); - hu->proto->close(hu); return; } @@ -612,6 +612,7 @@ static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, static int hci_uart_register_dev(struct hci_uart *hu) { struct hci_dev *hdev; + int err; BT_DBG(""); @@ -655,11 +656,22 @@ static int hci_uart_register_dev(struct hci_uart *hu) else hdev->dev_type = HCI_PRIMARY; + /* Only call open() for the protocol after hdev is fully initialized as + * open() (or a timer/workqueue it starts) may attempt to reference it. + */ + err = hu->proto->open(hu); + if (err) { + hu->hdev = NULL; + hci_free_dev(hdev); + return err; + } + if (test_bit(HCI_UART_INIT_PENDING, &hu->hdev_flags)) return 0; if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); + hu->proto->close(hu); hu->hdev = NULL; hci_free_dev(hdev); return -ENODEV; @@ -679,17 +691,12 @@ static int hci_uart_set_proto(struct hci_uart *hu, int id) if (!p) return -EPROTONOSUPPORT; - err = p->open(hu); - if (err) - return err; - hu->proto = p; set_bit(HCI_UART_PROTO_READY, &hu->flags); err = hci_uart_register_dev(hu); if (err) { clear_bit(HCI_UART_PROTO_READY, &hu->flags); - p->close(hu); return err; } From a1dbb34da6f2edf321df5023cb2accd92579269b Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Sat, 23 Feb 2019 12:33:27 +0800 Subject: [PATCH 2481/2608] Bluetooth: hci_ldisc: Postpone HCI_UART_PROTO_READY bit set in hci_uart_set_proto() commit 56897b217a1d0a91c9920cb418d6b3fe922f590a upstream. task A: task B: hci_uart_set_proto flush_to_ldisc - p->open(hu) -> h5_open //alloc h5 - receive_buf - set_bit HCI_UART_PROTO_READY - tty_port_default_receive_buf - hci_uart_register_dev - tty_ldisc_receive_buf - hci_uart_tty_receive - test_bit HCI_UART_PROTO_READY - h5_recv - clear_bit HCI_UART_PROTO_READY while() { - p->open(hu) -> h5_close //free h5 - h5_rx_3wire_hdr - h5_reset() //use-after-free } It could use ioctl to set hci uart proto, but there is a use-after-free issue when hci_uart_register_dev() fail in hci_uart_set_proto(), see stack above, fix this by setting HCI_UART_PROTO_READY bit only when hci_uart_register_dev() return success. Reported-by: syzbot+899a33dc0fa0dbaf06a6@syzkaller.appspotmail.com Signed-off-by: Kefeng Wang Reviewed-by: Jeremy Cline Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_ldisc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 6a3a3011b38e..3b63a781f10f 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -692,14 +692,13 @@ static int hci_uart_set_proto(struct hci_uart *hu, int id) return -EPROTONOSUPPORT; hu->proto = p; - set_bit(HCI_UART_PROTO_READY, &hu->flags); err = hci_uart_register_dev(hu); if (err) { - clear_bit(HCI_UART_PROTO_READY, &hu->flags); return err; } + set_bit(HCI_UART_PROTO_READY, &hu->flags); return 0; } From 4f160d8dd684f8bb12f3a21bfe88ad37667246c4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 30 Dec 2018 12:28:42 +0000 Subject: [PATCH 2482/2608] drm: Reorder set_property_atomic to avoid returning with an active ww_ctx commit 227ad6d957898a88b1746e30234ece64d305f066 upstream. Delay the drm_modeset_acquire_init() until after we check for an allocation failure so that we can return immediately upon error without having to unwind. WARNING: lock held when returning to user space! 4.20.0+ #174 Not tainted ------------------------------------------------ syz-executor556/8153 is leaving the kernel with locks still held! 1 lock held by syz-executor556/8153: #0: 000000005100c85c (crtc_ww_class_acquire){+.+.}, at: set_property_atomic+0xb3/0x330 drivers/gpu/drm/drm_mode_object.c:462 Reported-by: syzbot+6ea337c427f5083ebdf2@syzkaller.appspotmail.com Fixes: 144a7999d633 ("drm: Handle properties in the core for atomic drivers") Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Maarten Lankhorst Cc: Sean Paul Cc: David Airlie Cc: # v4.14+ Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20181230122842.21917-1-chris@chris-wilson.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_mode_object.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c index 1055533792f3..5b692ce6a45d 100644 --- a/drivers/gpu/drm/drm_mode_object.c +++ b/drivers/gpu/drm/drm_mode_object.c @@ -432,12 +432,13 @@ static int set_property_atomic(struct drm_mode_object *obj, struct drm_modeset_acquire_ctx ctx; int ret; - drm_modeset_acquire_init(&ctx, 0); - state = drm_atomic_state_alloc(dev); if (!state) return -ENOMEM; + + drm_modeset_acquire_init(&ctx, 0); state->acquire_ctx = &ctx; + retry: if (prop == state->dev->mode_config.dpms_property) { if (obj->type != DRM_MODE_OBJECT_CONNECTOR) { From b763bd262a792684cdca3040ad9f45106304df62 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 19 Feb 2019 00:37:21 +0100 Subject: [PATCH 2483/2608] netfilter: ebtables: remove BUGPRINT messages commit d824548dae220820bdf69b2d1561b7c4b072783f upstream. They are however frequently triggered by syzkaller, so remove them. ebtables userspace should never trigger any of these, so there is little value in making them pr_debug (or ratelimited). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/bridge/netfilter/ebtables.c | 131 ++++++++++---------------------- 1 file changed, 39 insertions(+), 92 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 53392ac58b38..38b3309edba8 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -31,10 +31,6 @@ /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" -#define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\ - "report to author: "format, ## args) -/* #define BUGPRINT(format, args...) */ - /* Each cpu has its own set of counters, so there is no need for write_lock in * the softirq * For reading or updating the counters, the user context needs to @@ -453,8 +449,6 @@ static int ebt_verify_pointers(const struct ebt_replace *repl, /* we make userspace set this right, * so there is no misunderstanding */ - BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set " - "in distinguisher\n"); return -EINVAL; } if (i != NF_BR_NUMHOOKS) @@ -472,18 +466,14 @@ static int ebt_verify_pointers(const struct ebt_replace *repl, offset += e->next_offset; } } - if (offset != limit) { - BUGPRINT("entries_size too small\n"); + if (offset != limit) return -EINVAL; - } /* check if all valid hooks have a chain */ for (i = 0; i < NF_BR_NUMHOOKS; i++) { if (!newinfo->hook_entry[i] && - (valid_hooks & (1 << i))) { - BUGPRINT("Valid hook without chain\n"); + (valid_hooks & (1 << i))) return -EINVAL; - } } return 0; } @@ -510,26 +500,20 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e, /* this checks if the previous chain has as many entries * as it said it has */ - if (*n != *cnt) { - BUGPRINT("nentries does not equal the nr of entries " - "in the chain\n"); + if (*n != *cnt) return -EINVAL; - } + if (((struct ebt_entries *)e)->policy != EBT_DROP && ((struct ebt_entries *)e)->policy != EBT_ACCEPT) { /* only RETURN from udc */ if (i != NF_BR_NUMHOOKS || - ((struct ebt_entries *)e)->policy != EBT_RETURN) { - BUGPRINT("bad policy\n"); + ((struct ebt_entries *)e)->policy != EBT_RETURN) return -EINVAL; - } } if (i == NF_BR_NUMHOOKS) /* it's a user defined chain */ (*udc_cnt)++; - if (((struct ebt_entries *)e)->counter_offset != *totalcnt) { - BUGPRINT("counter_offset != totalcnt"); + if (((struct ebt_entries *)e)->counter_offset != *totalcnt) return -EINVAL; - } *n = ((struct ebt_entries *)e)->nentries; *cnt = 0; return 0; @@ -537,15 +521,13 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e, /* a plain old entry, heh */ if (sizeof(struct ebt_entry) > e->watchers_offset || e->watchers_offset > e->target_offset || - e->target_offset >= e->next_offset) { - BUGPRINT("entry offsets not in right order\n"); + e->target_offset >= e->next_offset) return -EINVAL; - } + /* this is not checked anywhere else */ - if (e->next_offset - e->target_offset < sizeof(struct ebt_entry_target)) { - BUGPRINT("target size too small\n"); + if (e->next_offset - e->target_offset < sizeof(struct ebt_entry_target)) return -EINVAL; - } + (*cnt)++; (*totalcnt)++; return 0; @@ -665,18 +647,15 @@ ebt_check_entry(struct ebt_entry *e, struct net *net, if (e->bitmask == 0) return 0; - if (e->bitmask & ~EBT_F_MASK) { - BUGPRINT("Unknown flag for bitmask\n"); + if (e->bitmask & ~EBT_F_MASK) return -EINVAL; - } - if (e->invflags & ~EBT_INV_MASK) { - BUGPRINT("Unknown flag for inv bitmask\n"); + + if (e->invflags & ~EBT_INV_MASK) return -EINVAL; - } - if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3)) { - BUGPRINT("NOPROTO & 802_3 not allowed\n"); + + if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3)) return -EINVAL; - } + /* what hook do we belong to? */ for (i = 0; i < NF_BR_NUMHOOKS; i++) { if (!newinfo->hook_entry[i]) @@ -735,13 +714,11 @@ ebt_check_entry(struct ebt_entry *e, struct net *net, t->u.target = target; if (t->u.target == &ebt_standard_target) { if (gap < sizeof(struct ebt_standard_target)) { - BUGPRINT("Standard target size too big\n"); ret = -EFAULT; goto cleanup_watchers; } if (((struct ebt_standard_target *)t)->verdict < -NUM_STANDARD_TARGETS) { - BUGPRINT("Invalid standard target\n"); ret = -EFAULT; goto cleanup_watchers; } @@ -801,10 +778,9 @@ static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack if (strcmp(t->u.name, EBT_STANDARD_TARGET)) goto letscontinue; if (e->target_offset + sizeof(struct ebt_standard_target) > - e->next_offset) { - BUGPRINT("Standard target size too big\n"); + e->next_offset) return -1; - } + verdict = ((struct ebt_standard_target *)t)->verdict; if (verdict >= 0) { /* jump to another chain */ struct ebt_entries *hlp2 = @@ -813,14 +789,12 @@ static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack if (hlp2 == cl_s[i].cs.chaininfo) break; /* bad destination or loop */ - if (i == udc_cnt) { - BUGPRINT("bad destination\n"); + if (i == udc_cnt) return -1; - } - if (cl_s[i].cs.n) { - BUGPRINT("loop\n"); + + if (cl_s[i].cs.n) return -1; - } + if (cl_s[i].hookmask & (1 << hooknr)) goto letscontinue; /* this can't be 0, so the loop test is correct */ @@ -853,24 +827,21 @@ static int translate_table(struct net *net, const char *name, i = 0; while (i < NF_BR_NUMHOOKS && !newinfo->hook_entry[i]) i++; - if (i == NF_BR_NUMHOOKS) { - BUGPRINT("No valid hooks specified\n"); + if (i == NF_BR_NUMHOOKS) return -EINVAL; - } - if (newinfo->hook_entry[i] != (struct ebt_entries *)newinfo->entries) { - BUGPRINT("Chains don't start at beginning\n"); + + if (newinfo->hook_entry[i] != (struct ebt_entries *)newinfo->entries) return -EINVAL; - } + /* make sure chains are ordered after each other in same order * as their corresponding hooks */ for (j = i + 1; j < NF_BR_NUMHOOKS; j++) { if (!newinfo->hook_entry[j]) continue; - if (newinfo->hook_entry[j] <= newinfo->hook_entry[i]) { - BUGPRINT("Hook order must be followed\n"); + if (newinfo->hook_entry[j] <= newinfo->hook_entry[i]) return -EINVAL; - } + i = j; } @@ -888,15 +859,11 @@ static int translate_table(struct net *net, const char *name, if (ret != 0) return ret; - if (i != j) { - BUGPRINT("nentries does not equal the nr of entries in the " - "(last) chain\n"); + if (i != j) return -EINVAL; - } - if (k != newinfo->nentries) { - BUGPRINT("Total nentries is wrong\n"); + + if (k != newinfo->nentries) return -EINVAL; - } /* get the location of the udc, put them in an array * while we're at it, allocate the chainstack @@ -929,7 +896,6 @@ static int translate_table(struct net *net, const char *name, ebt_get_udc_positions, newinfo, &i, cl_s); /* sanity check */ if (i != udc_cnt) { - BUGPRINT("i != udc_cnt\n"); vfree(cl_s); return -EFAULT; } @@ -1030,7 +996,6 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, goto free_unlock; if (repl->num_counters && repl->num_counters != t->private->nentries) { - BUGPRINT("Wrong nr. of counters requested\n"); ret = -EINVAL; goto free_unlock; } @@ -1115,15 +1080,12 @@ static int do_replace(struct net *net, const void __user *user, if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; - if (len != sizeof(tmp) + tmp.entries_size) { - BUGPRINT("Wrong len argument\n"); + if (len != sizeof(tmp) + tmp.entries_size) return -EINVAL; - } - if (tmp.entries_size == 0) { - BUGPRINT("Entries_size never zero\n"); + if (tmp.entries_size == 0) return -EINVAL; - } + /* overflow check */ if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) / NR_CPUS - SMP_CACHE_BYTES) / sizeof(struct ebt_counter)) @@ -1150,7 +1112,6 @@ static int do_replace(struct net *net, const void __user *user, } if (copy_from_user( newinfo->entries, tmp.entries, tmp.entries_size) != 0) { - BUGPRINT("Couldn't copy entries from userspace\n"); ret = -EFAULT; goto free_entries; } @@ -1197,10 +1158,8 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, if (input_table == NULL || (repl = input_table->table) == NULL || repl->entries == NULL || repl->entries_size == 0 || - repl->counters != NULL || input_table->private != NULL) { - BUGPRINT("Bad table data for ebt_register_table!!!\n"); + repl->counters != NULL || input_table->private != NULL) return -EINVAL; - } /* Don't add one table to multiple lists. */ table = kmemdup(input_table, sizeof(struct ebt_table), GFP_KERNEL); @@ -1238,13 +1197,10 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, ((char *)repl->hook_entry[i] - repl->entries); } ret = translate_table(net, repl->name, newinfo); - if (ret != 0) { - BUGPRINT("Translate_table failed\n"); + if (ret != 0) goto free_chainstack; - } if (table->check && table->check(newinfo, table->valid_hooks)) { - BUGPRINT("The table doesn't like its own initial data, lol\n"); ret = -EINVAL; goto free_chainstack; } @@ -1255,7 +1211,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) { if (strcmp(t->name, table->name) == 0) { ret = -EEXIST; - BUGPRINT("Table name already exists\n"); goto free_unlock; } } @@ -1327,7 +1282,6 @@ static int do_update_counters(struct net *net, const char *name, goto free_tmp; if (num_counters != t->private->nentries) { - BUGPRINT("Wrong nr of counters\n"); ret = -EINVAL; goto unlock_mutex; } @@ -1452,10 +1406,8 @@ static int copy_counters_to_user(struct ebt_table *t, if (num_counters == 0) return 0; - if (num_counters != nentries) { - BUGPRINT("Num_counters wrong\n"); + if (num_counters != nentries) return -EINVAL; - } counterstmp = vmalloc(nentries * sizeof(*counterstmp)); if (!counterstmp) @@ -1501,15 +1453,11 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user, (tmp.num_counters ? nentries * sizeof(struct ebt_counter) : 0)) return -EINVAL; - if (tmp.nentries != nentries) { - BUGPRINT("Nentries wrong\n"); + if (tmp.nentries != nentries) return -EINVAL; - } - if (tmp.entries_size != entries_size) { - BUGPRINT("Wrong size\n"); + if (tmp.entries_size != entries_size) return -EINVAL; - } ret = copy_counters_to_user(t, oldcounters, tmp.counters, tmp.num_counters, nentries); @@ -1581,7 +1529,6 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) } mutex_unlock(&ebt_mutex); if (copy_to_user(user, &tmp, *len) != 0) { - BUGPRINT("c2u Didn't work\n"); ret = -EFAULT; break; } From c60f18c6653fe6675bd66fb2b7008d3fc14e957a Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 1 Mar 2019 04:12:00 +0100 Subject: [PATCH 2484/2608] x86/unwind: Handle NULL pointer calls better in frame unwinder commit f4f34e1b82eb4219d8eaa1c7e2e17ca219a6a2b5 upstream. When the frame unwinder is invoked for an oops caused by a call to NULL, it currently skips the parent function because BP still points to the parent's stack frame; the (nonexistent) current function only has the first half of a stack frame, and BP doesn't point to it yet. Add a special case for IP==0 that calculates a fake BP from SP, then uses the real BP for the next frame. Note that this handles first_frame specially: Return information about the parent function as long as the saved IP is >=first_frame, even if the fake BP points below it. With an artificially-added NULL call in prctl_set_seccomp(), before this patch, the trace is: Call Trace: ? prctl_set_seccomp+0x3a/0x50 __x64_sys_prctl+0x457/0x6f0 ? __ia32_sys_prctl+0x750/0x750 do_syscall_64+0x72/0x160 entry_SYSCALL_64_after_hwframe+0x44/0xa9 After this patch, the trace is: Call Trace: prctl_set_seccomp+0x3a/0x50 __x64_sys_prctl+0x457/0x6f0 ? __ia32_sys_prctl+0x750/0x750 do_syscall_64+0x72/0x160 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Signed-off-by: Jann Horn Signed-off-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Borislav Petkov Cc: Andrew Morton Cc: syzbot Cc: "H. Peter Anvin" Cc: Masahiro Yamada Cc: Michal Marek Cc: linux-kbuild@vger.kernel.org Link: https://lkml.kernel.org/r/20190301031201.7416-1-jannh@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/unwind.h | 6 ++++++ arch/x86/kernel/unwind_frame.c | 25 ++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index 1f86e1b0a5cd..499578f7e6d7 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -23,6 +23,12 @@ struct unwind_state { #elif defined(CONFIG_UNWINDER_FRAME_POINTER) bool got_irq; unsigned long *bp, *orig_sp, ip; + /* + * If non-NULL: The current frame is incomplete and doesn't contain a + * valid BP. When looking for the next frame, use this instead of the + * non-existent saved BP. + */ + unsigned long *next_bp; struct pt_regs *regs; #else unsigned long *sp; diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 3dc26f95d46e..9b9fd4826e7a 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -320,10 +320,14 @@ bool unwind_next_frame(struct unwind_state *state) } /* Get the next frame pointer: */ - if (state->regs) + if (state->next_bp) { + next_bp = state->next_bp; + state->next_bp = NULL; + } else if (state->regs) { next_bp = (unsigned long *)state->regs->bp; - else + } else { next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task, *state->bp); + } /* Move to the next frame if it's safe: */ if (!update_stack_state(state, next_bp)) @@ -398,6 +402,21 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, bp = get_frame_pointer(task, regs); + /* + * If we crash with IP==0, the last successfully executed instruction + * was probably an indirect function call with a NULL function pointer. + * That means that SP points into the middle of an incomplete frame: + * *SP is a return pointer, and *(SP-sizeof(unsigned long)) is where we + * would have written a frame pointer if we hadn't crashed. + * Pretend that the frame is complete and that BP points to it, but save + * the real BP so that we can use it when looking for the next frame. + */ + if (regs && regs->ip == 0 && + (unsigned long *)kernel_stack_pointer(regs) >= first_frame) { + state->next_bp = bp; + bp = ((unsigned long *)kernel_stack_pointer(regs)) - 1; + } + /* Initialize stack info and make sure the frame data is accessible: */ get_stack_info(bp, state->task, &state->stack_info, &state->stack_mask); @@ -410,7 +429,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, */ while (!unwind_done(state) && (!on_stack(&state->stack_info, first_frame, sizeof(long)) || - state->bp < first_frame)) + (state->next_bp == NULL && state->bp < first_frame))) unwind_next_frame(state); } EXPORT_SYMBOL_GPL(__unwind_start); From 5befc25f5cd2a1ec90bca48d4e03270b1005c70b Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 1 Mar 2019 04:12:01 +0100 Subject: [PATCH 2485/2608] x86/unwind: Add hardcoded ORC entry for NULL commit ac5ceccce5501e43d217c596e4ee859f2a3fef79 upstream. When the ORC unwinder is invoked for an oops caused by IP==0, it currently has no idea what to do because there is no debug information for the stack frame of NULL. But if RIP is NULL, it is very likely that the last successfully executed instruction was an indirect CALL/JMP, and it is possible to unwind out in the same way as for the first instruction of a normal function. Hardcode a corresponding ORC entry. With an artificially-added NULL call in prctl_set_seccomp(), before this patch, the trace is: Call Trace: ? __x64_sys_prctl+0x402/0x680 ? __ia32_sys_prctl+0x6e0/0x6e0 ? __do_page_fault+0x457/0x620 ? do_syscall_64+0x6d/0x160 ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 After this patch, the trace looks like this: Call Trace: __x64_sys_prctl+0x402/0x680 ? __ia32_sys_prctl+0x6e0/0x6e0 ? __do_page_fault+0x457/0x620 do_syscall_64+0x6d/0x160 entry_SYSCALL_64_after_hwframe+0x44/0xa9 prctl_set_seccomp() still doesn't show up in the trace because for some reason, tail call optimization is only disabled in builds that use the frame pointer unwinder. Signed-off-by: Jann Horn Signed-off-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Borislav Petkov Cc: Andrew Morton Cc: syzbot Cc: "H. Peter Anvin" Cc: Masahiro Yamada Cc: Michal Marek Cc: linux-kbuild@vger.kernel.org Link: https://lkml.kernel.org/r/20190301031201.7416-2-jannh@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/unwind_orc.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index be86a865087a..3bbb399f7ead 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -74,11 +74,28 @@ static struct orc_entry *orc_module_find(unsigned long ip) } #endif +/* + * If we crash with IP==0, the last successfully executed instruction + * was probably an indirect function call with a NULL function pointer, + * and we don't have unwind information for NULL. + * This hardcoded ORC entry for IP==0 allows us to unwind from a NULL function + * pointer into its parent and then continue normally from there. + */ +static struct orc_entry null_orc_entry = { + .sp_offset = sizeof(long), + .sp_reg = ORC_REG_SP, + .bp_reg = ORC_REG_UNDEFINED, + .type = ORC_TYPE_CALL +}; + static struct orc_entry *orc_find(unsigned long ip) { if (!orc_init) return NULL; + if (ip == 0) + return &null_orc_entry; + /* For non-init vmlinux addresses, use the fast lookup table: */ if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) { unsigned int idx, start, stop; From 4a195a0bc2e954b91085d5c82eb20c51835ee7b0 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 9 Jan 2019 23:03:25 -0500 Subject: [PATCH 2486/2608] locking/lockdep: Add debug_locks check in __lock_downgrade() commit 71492580571467fb7177aade19c18ce7486267f5 upstream. Tetsuo Handa had reported he saw an incorrect "downgrading a read lock" warning right after a previous lockdep warning. It is likely that the previous warning turned off lock debugging causing the lockdep to have inconsistency states leading to the lock downgrade warning. Fix that by add a check for debug_locks at the beginning of __lock_downgrade(). Debugged-by: Tetsuo Handa Reported-by: Tetsuo Handa Reported-by: syzbot+53383ae265fb161ef488@syzkaller.appspotmail.com Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: https://lkml.kernel.org/r/1547093005-26085-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/locking/lockdep.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index bf694c709b96..e57be7031cb3 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3650,6 +3650,9 @@ __lock_set_class(struct lockdep_map *lock, const char *name, unsigned int depth; int i; + if (unlikely(!debug_locks)) + return 0; + depth = curr->lockdep_depth; /* * This function is about (re)setting the class of a held lock, From 886e8316b599a1f704f90e768547cc63725a8474 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 29 Jan 2019 14:03:33 +0100 Subject: [PATCH 2487/2608] ALSA: hda - Record the current power state before suspend/resume calls commit 98081ca62cbac31fb0f7efaf90b2e7384ce22257 upstream. Currently we deal with single codec and suspend codec callbacks for all S3, S4 and runtime PM handling. But it turned out that we want distinguish the call patterns sometimes, e.g. for applying some init sequence only at probing and restoring from hibernate. This patch slightly modifies the common PM callbacks for HD-audio codec and stores the currently processed PM event in power_state of the codec's device.power field, which is currently unused. The codec callback can take a look at this event value and judges which purpose it's being called. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_codec.c | 43 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 8a027973f2ad..cdb43de7d659 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2900,6 +2900,7 @@ static void hda_call_codec_resume(struct hda_codec *codec) hda_jackpoll_work(&codec->jackpoll_work.work); else snd_hda_jack_report_sync(codec); + codec->core.dev.power.power_state = PMSG_ON; atomic_dec(&codec->core.in_pm); } @@ -2932,10 +2933,48 @@ static int hda_codec_runtime_resume(struct device *dev) } #endif /* CONFIG_PM */ +#ifdef CONFIG_PM_SLEEP +static int hda_codec_pm_suspend(struct device *dev) +{ + dev->power.power_state = PMSG_SUSPEND; + return pm_runtime_force_suspend(dev); +} + +static int hda_codec_pm_resume(struct device *dev) +{ + dev->power.power_state = PMSG_RESUME; + return pm_runtime_force_resume(dev); +} + +static int hda_codec_pm_freeze(struct device *dev) +{ + dev->power.power_state = PMSG_FREEZE; + return pm_runtime_force_suspend(dev); +} + +static int hda_codec_pm_thaw(struct device *dev) +{ + dev->power.power_state = PMSG_THAW; + return pm_runtime_force_resume(dev); +} + +static int hda_codec_pm_restore(struct device *dev) +{ + dev->power.power_state = PMSG_RESTORE; + return pm_runtime_force_resume(dev); +} +#endif /* CONFIG_PM_SLEEP */ + /* referred in hda_bind.c */ const struct dev_pm_ops hda_codec_driver_pm = { - SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, - pm_runtime_force_resume) +#ifdef CONFIG_PM_SLEEP + .suspend = hda_codec_pm_suspend, + .resume = hda_codec_pm_resume, + .freeze = hda_codec_pm_freeze, + .thaw = hda_codec_pm_thaw, + .poweroff = hda_codec_pm_suspend, + .restore = hda_codec_pm_restore, +#endif /* CONFIG_PM_SLEEP */ SET_RUNTIME_PM_OPS(hda_codec_runtime_suspend, hda_codec_runtime_resume, NULL) }; From 5c622e33da8febc0de0512344cd5a35c17a7676f Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 19 Mar 2019 09:28:44 +0800 Subject: [PATCH 2488/2608] ALSA: hda - Enforces runtime_resume after S3 and S4 for each codec commit b5a236c175b0d984552a5f7c9d35141024c2b261 upstream. Recently we found the audio jack detection stop working after suspend on many machines with Realtek codec. Sometimes the audio selection dialogue didn't show up after users plugged headhphone/headset into the headset jack, sometimes after uses plugged headphone/headset, then click the sound icon on the upper-right corner of gnome-desktop, it also showed the speaker rather than the headphone. The root cause is that before suspend, the codec already call the runtime_suspend since this codec is not used by any apps, then in resume, it will not call runtime_resume for this codec. But for some realtek codec (so far, alc236, alc255 and alc891) with the specific BIOS, if it doesn't run runtime_resume after suspend, all codec functions including jack detection stop working anymore. This problem existed for a long time, but it was not exposed, that is because when problem happens, if users play sound or open sound-setting to check audio device, this will trigger calling to runtime_resume (via snd_hda_power_up), then the codec starts working again before users notice this problem. Since we don't know how many codec and BIOS combinations have this problem, to fix it, let the driver call runtime_resume for all codecs in pm_resume, maybe for some codecs, this is not needed, but it is harmless. After a codec is runtime resumed, if it is not used by any apps, it will be runtime suspended soon and furthermore we don't run suspend frequently, this change will not add much power consumption. Fixes: cc72da7d4d06 ("ALSA: hda - Use standard runtime PM for codec power-save control") Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_codec.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index cdb43de7d659..e3f3351da480 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2934,6 +2934,20 @@ static int hda_codec_runtime_resume(struct device *dev) #endif /* CONFIG_PM */ #ifdef CONFIG_PM_SLEEP +static int hda_codec_force_resume(struct device *dev) +{ + int ret; + + /* The get/put pair below enforces the runtime resume even if the + * device hasn't been used at suspend time. This trick is needed to + * update the jack state change during the sleep. + */ + pm_runtime_get_noresume(dev); + ret = pm_runtime_force_resume(dev); + pm_runtime_put(dev); + return ret; +} + static int hda_codec_pm_suspend(struct device *dev) { dev->power.power_state = PMSG_SUSPEND; @@ -2943,7 +2957,7 @@ static int hda_codec_pm_suspend(struct device *dev) static int hda_codec_pm_resume(struct device *dev) { dev->power.power_state = PMSG_RESUME; - return pm_runtime_force_resume(dev); + return hda_codec_force_resume(dev); } static int hda_codec_pm_freeze(struct device *dev) @@ -2955,13 +2969,13 @@ static int hda_codec_pm_freeze(struct device *dev) static int hda_codec_pm_thaw(struct device *dev) { dev->power.power_state = PMSG_THAW; - return pm_runtime_force_resume(dev); + return hda_codec_force_resume(dev); } static int hda_codec_pm_restore(struct device *dev) { dev->power.power_state = PMSG_RESTORE; - return pm_runtime_force_resume(dev); + return hda_codec_force_resume(dev); } #endif /* CONFIG_PM_SLEEP */ From aab86217763b06bcb563cb69dbbff8d598b52a39 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Nov 2017 15:28:04 -0800 Subject: [PATCH 2489/2608] lib/int_sqrt: optimize small argument commit 3f3295709edea6268ff1609855f498035286af73 upstream. The current int_sqrt() computation is sub-optimal for the case of small @x. Which is the interesting case when we're going to do cumulative distribution functions on idle times, which we assume to be a random variable, where the target residency of the deepest idle state gives an upper bound on the variable (5e6ns on recent Intel chips). In the case of small @x, the compute loop: while (m != 0) { b = y + m; y >>= 1; if (x >= b) { x -= b; y += m; } m >>= 2; } can be reduced to: while (m > x) m >>= 2; Because y==0, b==m and until x>=m y will remain 0. And while this is computationally equivalent, it runs much faster because there's less code, in particular less branches. cycles: branches: branch-misses: OLD: hot: 45.109444 +- 0.044117 44.333392 +- 0.002254 0.018723 +- 0.000593 cold: 187.737379 +- 0.156678 44.333407 +- 0.002254 6.272844 +- 0.004305 PRE: hot: 67.937492 +- 0.064124 66.999535 +- 0.000488 0.066720 +- 0.001113 cold: 232.004379 +- 0.332811 66.999527 +- 0.000488 6.914634 +- 0.006568 POST: hot: 43.633557 +- 0.034373 45.333132 +- 0.002277 0.023529 +- 0.000681 cold: 207.438411 +- 0.125840 45.333132 +- 0.002277 6.976486 +- 0.004219 Averages computed over all values <128k using a LFSR to generate order. Cold numbers have a LFSR based branch trace buffer 'confuser' ran between each int_sqrt() invocation. Link: http://lkml.kernel.org/r/20171020164644.876503355@infradead.org Fixes: 30493cc9dddb ("lib/int_sqrt.c: optimize square root algorithm") Signed-off-by: Peter Zijlstra (Intel) Suggested-by: Anshul Garg Acked-by: Linus Torvalds Cc: Davidlohr Bueso Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Will Deacon Cc: Joe Perches Cc: David Miller Cc: Matthew Wilcox Cc: Kees Cook Cc: Michael Davidson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- lib/int_sqrt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c index db0b5aa071fc..036c96781ea8 100644 --- a/lib/int_sqrt.c +++ b/lib/int_sqrt.c @@ -23,6 +23,9 @@ unsigned long int_sqrt(unsigned long x) return x; m = 1UL << (BITS_PER_LONG - 2); + while (m > x) + m >>= 2; + while (m != 0) { b = y + m; y >>= 1; From 4a4aed9055e8e8e6e7becf10821d5f2e1c90c3a8 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 11 Dec 2017 22:48:41 +0100 Subject: [PATCH 2490/2608] USB: core: only clean up what we allocated commit 32fd87b3bbf5f7a045546401dfe2894dbbf4d8c3 upstream. When cleaning up the configurations, make sure we only free the number of configurations and interfaces that we could have allocated. Reported-by: Andrey Konovalov Cc: stable Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index bd749e78df59..1a6ccdd5a5fc 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -768,18 +768,21 @@ void usb_destroy_configuration(struct usb_device *dev) return; if (dev->rawdescriptors) { - for (i = 0; i < dev->descriptor.bNumConfigurations; i++) + for (i = 0; i < dev->descriptor.bNumConfigurations && + i < USB_MAXCONFIG; i++) kfree(dev->rawdescriptors[i]); kfree(dev->rawdescriptors); dev->rawdescriptors = NULL; } - for (c = 0; c < dev->descriptor.bNumConfigurations; c++) { + for (c = 0; c < dev->descriptor.bNumConfigurations && + c < USB_MAXCONFIG; c++) { struct usb_host_config *cf = &dev->config[c]; kfree(cf->string); - for (i = 0; i < cf->desc.bNumInterfaces; i++) { + for (i = 0; i < cf->desc.bNumInterfaces && + i < USB_MAXINTERFACES; i++) { if (cf->intf_cache[i]) kref_put(&cf->intf_cache[i]->ref, usb_release_interface_cache); From 4aac26ecb4ace3df840af95408f2279c85684f62 Mon Sep 17 00:00:00 2001 From: kehuanlin Date: Wed, 6 Sep 2017 17:58:39 +0800 Subject: [PATCH 2491/2608] scsi: ufs: fix wrong command type of UTRD for UFSHCI v2.1 commit 83dc7e3dea76b77b6bcc289eb86c5b5c145e8dff upstream. Since the command type of UTRD in UFS 2.1 specification is the same with UFS 2.0. And it assumes the future UFS specification will follow the same definition. Signed-off-by: kehuanlin Reviewed-by: Subhash Jadavani Signed-off-by: Martin K. Petersen Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ufs/ufshcd.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 66540491839e..581571de2461 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -2195,10 +2195,11 @@ static int ufshcd_comp_devman_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) u32 upiu_flags; int ret = 0; - if (hba->ufs_version == UFSHCI_VERSION_20) - lrbp->command_type = UTP_CMD_TYPE_UFS_STORAGE; - else + if ((hba->ufs_version == UFSHCI_VERSION_10) || + (hba->ufs_version == UFSHCI_VERSION_11)) lrbp->command_type = UTP_CMD_TYPE_DEV_MANAGE; + else + lrbp->command_type = UTP_CMD_TYPE_UFS_STORAGE; ufshcd_prepare_req_desc_hdr(lrbp, &upiu_flags, DMA_NONE); if (hba->dev_cmd.type == DEV_CMD_TYPE_QUERY) @@ -2222,10 +2223,11 @@ static int ufshcd_comp_scsi_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) u32 upiu_flags; int ret = 0; - if (hba->ufs_version == UFSHCI_VERSION_20) - lrbp->command_type = UTP_CMD_TYPE_UFS_STORAGE; - else + if ((hba->ufs_version == UFSHCI_VERSION_10) || + (hba->ufs_version == UFSHCI_VERSION_11)) lrbp->command_type = UTP_CMD_TYPE_SCSI; + else + lrbp->command_type = UTP_CMD_TYPE_UFS_STORAGE; if (likely(lrbp->cmd)) { ufshcd_prepare_req_desc_hdr(lrbp, &upiu_flags, From a9d76f59faffc0447701b902ed8322d35c45a983 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 20 Dec 2017 00:29:23 +0100 Subject: [PATCH 2492/2608] PCI: designware-ep: dw_pcie_ep_set_msi() should only set MMC bits commit 099a95f3591ade29da52131895a3ba9f92a0e82c upstream. Previously, dw_pcie_ep_set_msi() wrote all bits in the Message Control register, thus overwriting the PCI_MSI_FLAGS_64BIT bit. By clearing the PCI_MSI_FLAGS_64BIT bit, we break MSI on systems where the RC has set a 64 bit MSI address. Fix dw_pcie_ep_set_msi() so that it only sets MMC bits. Tested-by: Gustavo Pimentel Signed-off-by: Niklas Cassel Signed-off-by: Lorenzo Pieralisi Acked-by: Joao Pinto Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/pci/dwc/pcie-designware-ep.c | 4 +++- drivers/pci/dwc/pcie-designware.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/pci/dwc/pcie-designware-ep.c b/drivers/pci/dwc/pcie-designware-ep.c index 7c621877a939..df317d390317 100644 --- a/drivers/pci/dwc/pcie-designware-ep.c +++ b/drivers/pci/dwc/pcie-designware-ep.c @@ -214,7 +214,9 @@ static int dw_pcie_ep_set_msi(struct pci_epc *epc, u8 encode_int) struct dw_pcie_ep *ep = epc_get_drvdata(epc); struct dw_pcie *pci = to_dw_pcie_from_ep(ep); - val = (encode_int << MSI_CAP_MMC_SHIFT); + val = dw_pcie_readw_dbi(pci, MSI_MESSAGE_CONTROL); + val &= ~MSI_CAP_MMC_MASK; + val |= (encode_int << MSI_CAP_MMC_SHIFT) & MSI_CAP_MMC_MASK; dw_pcie_writew_dbi(pci, MSI_MESSAGE_CONTROL, val); return 0; diff --git a/drivers/pci/dwc/pcie-designware.h b/drivers/pci/dwc/pcie-designware.h index 3551dd607b90..5af29d125c7e 100644 --- a/drivers/pci/dwc/pcie-designware.h +++ b/drivers/pci/dwc/pcie-designware.h @@ -99,6 +99,7 @@ #define MSI_MESSAGE_CONTROL 0x52 #define MSI_CAP_MMC_SHIFT 1 +#define MSI_CAP_MMC_MASK (7 << MSI_CAP_MMC_SHIFT) #define MSI_CAP_MME_SHIFT 4 #define MSI_CAP_MSI_EN_MASK 0x1 #define MSI_CAP_MME_MASK (7 << MSI_CAP_MME_SHIFT) From dd913d610e214b97bba81b1b76883d0f29d63a37 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 20 Dec 2017 00:29:24 +0100 Subject: [PATCH 2493/2608] PCI: designware-ep: Read-only registers need DBI_RO_WR_EN to be writable commit 1cab826b30c6275d479a6ab1dea1067e15dbec62 upstream. Certain registers that pcie-designware-ep tries to write to are read-only registers. However, these registers can become read/write if we first enable the DBI_RO_WR_EN bit. Set/unset the DBI_RO_WR_EN bit before/after writing these registers. Tested-by: Gustavo Pimentel Signed-off-by: Niklas Cassel Signed-off-by: Lorenzo Pieralisi Acked-by: Joao Pinto Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/pci/dwc/pcie-designware-ep.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pci/dwc/pcie-designware-ep.c b/drivers/pci/dwc/pcie-designware-ep.c index df317d390317..abcbf0770358 100644 --- a/drivers/pci/dwc/pcie-designware-ep.c +++ b/drivers/pci/dwc/pcie-designware-ep.c @@ -35,8 +35,10 @@ static void dw_pcie_ep_reset_bar(struct dw_pcie *pci, enum pci_barno bar) u32 reg; reg = PCI_BASE_ADDRESS_0 + (4 * bar); + dw_pcie_dbi_ro_wr_en(pci); dw_pcie_writel_dbi2(pci, reg, 0x0); dw_pcie_writel_dbi(pci, reg, 0x0); + dw_pcie_dbi_ro_wr_dis(pci); } static int dw_pcie_ep_write_header(struct pci_epc *epc, @@ -45,6 +47,7 @@ static int dw_pcie_ep_write_header(struct pci_epc *epc, struct dw_pcie_ep *ep = epc_get_drvdata(epc); struct dw_pcie *pci = to_dw_pcie_from_ep(ep); + dw_pcie_dbi_ro_wr_en(pci); dw_pcie_writew_dbi(pci, PCI_VENDOR_ID, hdr->vendorid); dw_pcie_writew_dbi(pci, PCI_DEVICE_ID, hdr->deviceid); dw_pcie_writeb_dbi(pci, PCI_REVISION_ID, hdr->revid); @@ -58,6 +61,7 @@ static int dw_pcie_ep_write_header(struct pci_epc *epc, dw_pcie_writew_dbi(pci, PCI_SUBSYSTEM_ID, hdr->subsys_id); dw_pcie_writeb_dbi(pci, PCI_INTERRUPT_PIN, hdr->interrupt_pin); + dw_pcie_dbi_ro_wr_dis(pci); return 0; } @@ -142,8 +146,10 @@ static int dw_pcie_ep_set_bar(struct pci_epc *epc, enum pci_barno bar, if (ret) return ret; + dw_pcie_dbi_ro_wr_en(pci); dw_pcie_writel_dbi2(pci, reg, size - 1); dw_pcie_writel_dbi(pci, reg, flags); + dw_pcie_dbi_ro_wr_dis(pci); return 0; } @@ -217,7 +223,9 @@ static int dw_pcie_ep_set_msi(struct pci_epc *epc, u8 encode_int) val = dw_pcie_readw_dbi(pci, MSI_MESSAGE_CONTROL); val &= ~MSI_CAP_MMC_MASK; val |= (encode_int << MSI_CAP_MMC_SHIFT) & MSI_CAP_MMC_MASK; + dw_pcie_dbi_ro_wr_en(pci); dw_pcie_writew_dbi(pci, MSI_MESSAGE_CONTROL, val); + dw_pcie_dbi_ro_wr_dis(pci); return 0; } From 421abdcc909322d6595504cf8d20c1adf74bc1cd Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Thu, 11 Jan 2018 14:00:57 +0530 Subject: [PATCH 2494/2608] PCI: endpoint: Use EPC's device in dma_alloc_coherent()/dma_free_coherent() commit b330104fa76df3eae6e199a23791fed5d35f06b4 upstream. After commit 723288836628 ("of: restrict DMA configuration"), of_dma_configure() doesn't configure the coherent_dma_mask/dma_mask of endpoint function device (since it doesn't have a DT node associated with and hence no dma-ranges property), resulting in dma_alloc_coherent() (used in pci_epf_alloc_space()) to fail. Fix it by making dma_alloc_coherent() use EPC's device for allocating memory address. Link: http://lkml.kernel.org/r/64d63468-d28f-8fcd-a6f3-cf2a6401c8cb@ti.com Signed-off-by: Kishon Vijay Abraham I [lorenzo.pieralisi@arm.com: tweaked commit log] Signed-off-by: Lorenzo Pieralisi Cc: Robin Murphy Cc: Rob Herring Cc: Christoph Hellwig Tested-by: Cyrille Pitchen Tested-by: Niklas Cassel Reviewed-by: Robin Murphy Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/pci/endpoint/pci-epc-core.c | 10 ---------- drivers/pci/endpoint/pci-epf-core.c | 4 ++-- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c index 42c2a1156325..cd7d4788b94d 100644 --- a/drivers/pci/endpoint/pci-epc-core.c +++ b/drivers/pci/endpoint/pci-epc-core.c @@ -18,7 +18,6 @@ */ #include -#include #include #include #include @@ -371,7 +370,6 @@ EXPORT_SYMBOL_GPL(pci_epc_write_header); int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf) { unsigned long flags; - struct device *dev = epc->dev.parent; if (epf->epc) return -EBUSY; @@ -383,12 +381,6 @@ int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf) return -EINVAL; epf->epc = epc; - if (dev->of_node) { - of_dma_configure(&epf->dev, dev->of_node); - } else { - dma_set_coherent_mask(&epf->dev, epc->dev.coherent_dma_mask); - epf->dev.dma_mask = epc->dev.dma_mask; - } spin_lock_irqsave(&epc->lock, flags); list_add_tail(&epf->list, &epc->pci_epf); @@ -503,9 +495,7 @@ __pci_epc_create(struct device *dev, const struct pci_epc_ops *ops, INIT_LIST_HEAD(&epc->pci_epf); device_initialize(&epc->dev); - dma_set_coherent_mask(&epc->dev, dev->coherent_dma_mask); epc->dev.class = pci_epc_class; - epc->dev.dma_mask = dev->dma_mask; epc->dev.parent = dev; epc->ops = ops; diff --git a/drivers/pci/endpoint/pci-epf-core.c b/drivers/pci/endpoint/pci-epf-core.c index ae1611a62808..95ccc4b8a0a2 100644 --- a/drivers/pci/endpoint/pci-epf-core.c +++ b/drivers/pci/endpoint/pci-epf-core.c @@ -99,7 +99,7 @@ EXPORT_SYMBOL_GPL(pci_epf_bind); */ void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar) { - struct device *dev = &epf->dev; + struct device *dev = epf->epc->dev.parent; if (!addr) return; @@ -122,7 +122,7 @@ EXPORT_SYMBOL_GPL(pci_epf_free_space); void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar) { void *space; - struct device *dev = &epf->dev; + struct device *dev = epf->epc->dev.parent; dma_addr_t phys_addr; if (size < 128) From c492471db905b6c7c673ae5630dd0a27c8cd00b0 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 25 Dec 2017 19:10:37 +0800 Subject: [PATCH 2495/2608] rtc: Fix overflow when converting time64_t to rtc_time commit 36d46cdb43efea74043e29e2a62b13e9aca31452 upstream. If we convert one large time values to rtc_time, in the original formula 'days * 86400' can be overflowed in 'unsigned int' type to make the formula get one incorrect remain seconds value. Thus we can use div_s64_rem() function to avoid this situation. Signed-off-by: Baolin Wang Acked-by: Arnd Bergmann Signed-off-by: Alexandre Belloni Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-lib.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-lib.c b/drivers/rtc/rtc-lib.c index 1ae7da5cfc60..ad5bb21908e5 100644 --- a/drivers/rtc/rtc-lib.c +++ b/drivers/rtc/rtc-lib.c @@ -52,13 +52,11 @@ EXPORT_SYMBOL(rtc_year_days); */ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm) { - unsigned int month, year; - unsigned long secs; + unsigned int month, year, secs; int days; /* time must be positive */ - days = div_s64(time, 86400); - secs = time - (unsigned int) days * 86400; + days = div_s64_rem(time, 86400, &secs); /* day of the week, 1970-01-01 was a Thursday */ tm->tm_wday = (days + 4) % 7; From 423f2c97a25392635216d349b85a4c2440de3a45 Mon Sep 17 00:00:00 2001 From: Jules Maselbas Date: Thu, 29 Mar 2018 15:43:01 +0100 Subject: [PATCH 2496/2608] sched/cpufreq/schedutil: Fix error path mutex unlock commit 1b5d43cfb69759d8ef8d30469cea31d0c037aed5 upstream. This patch prevents the 'global_tunables_lock' mutex from being unlocked before being locked. This mutex is not locked if the sugov_kthread_create() function fails. Signed-off-by: Jules Maselbas Acked-by: Peter Zijlstra Cc: Chris Redpath Cc: Dietmar Eggermann Cc: Linus Torvalds Cc: Mike Galbraith Cc: Patrick Bellasi Cc: Stephen Kyle Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Cc: nd@arm.com Link: http://lkml.kernel.org/r/20180329144301.38419-1-jules.maselbas@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- kernel/sched/cpufreq_schedutil.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 81eb7899c7c8..b314c9eaa71d 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -605,10 +605,9 @@ fail: stop_kthread: sugov_kthread_stop(sg_policy); - -free_sg_policy: mutex_unlock(&global_tunables_lock); +free_sg_policy: sugov_policy_free(sg_policy); disable_fast_switch: From 9c5f91cc9b2e8d4637182e9b43124af31803abbe Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Wed, 28 Mar 2018 19:03:23 +0200 Subject: [PATCH 2497/2608] pwm-backlight: Enable/disable the PWM before/after LCD enable toggle. commit 5fb5caee92ba35a4a3baa61d45a78eb057e2c031 upstream. Before this patch the enable signal was set before the PWM signal and vice-versa on power off. This sequence is wrong, at least, it is on the different panels datasheets that I checked, so I inverted the sequence to follow the specs. For reference the following panels have the mentioned sequence: - N133HSE-EA1 (Innolux) - N116BGE (Innolux) - N156BGE-L21 (Innolux) - B101EAN0 (Auo) - B101AW03 (Auo) - LTN101NT05 (Samsung) - CLAA101WA01A (Chunghwa) Signed-off-by: Enric Balletbo i Serra Acked-by: Daniel Thompson Acked-by: Jingoo Han Acked-by: Thierry Reding Signed-off-by: Lee Jones Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/video/backlight/pwm_bl.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index 0fa7d2bd0e48..155153ecb894 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -54,10 +54,11 @@ static void pwm_backlight_power_on(struct pwm_bl_data *pb, int brightness) if (err < 0) dev_err(pb->dev, "failed to enable power supply\n"); + pwm_enable(pb->pwm); + if (pb->enable_gpio) gpiod_set_value_cansleep(pb->enable_gpio, 1); - pwm_enable(pb->pwm); pb->enabled = true; } @@ -66,12 +67,12 @@ static void pwm_backlight_power_off(struct pwm_bl_data *pb) if (!pb->enabled) return; - pwm_config(pb->pwm, 0, pb->period); - pwm_disable(pb->pwm); - if (pb->enable_gpio) gpiod_set_value_cansleep(pb->enable_gpio, 0); + pwm_config(pb->pwm, 0, pb->period); + pwm_disable(pb->pwm); + regulator_disable(pb->power_supply); pb->enabled = false; } From 69ef9ca4677e0e83ef8dc08f4eec8c466d218ba9 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 16 Nov 2018 19:01:10 +0800 Subject: [PATCH 2498/2608] power: supply: charger-manager: Fix incorrect return value commit f25a646fbe2051527ad9721853e892d13a99199e upstream. Fix incorrect return value. Signed-off-by: Baolin Wang Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/charger-manager.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/power/supply/charger-manager.c b/drivers/power/supply/charger-manager.c index 6502fa7c2106..f60dfc213257 100644 --- a/drivers/power/supply/charger-manager.c +++ b/drivers/power/supply/charger-manager.c @@ -1212,7 +1212,6 @@ static int charger_extcon_init(struct charger_manager *cm, if (ret < 0) { pr_info("Cannot register extcon_dev for %s(cable: %s)\n", cable->extcon_name, cable->name); - ret = -EINVAL; } return ret; @@ -1629,7 +1628,7 @@ static int charger_manager_probe(struct platform_device *pdev) if (IS_ERR(desc)) { dev_err(&pdev->dev, "No platform data (desc) found\n"); - return -ENODEV; + return PTR_ERR(desc); } cm = devm_kzalloc(&pdev->dev, sizeof(*cm), GFP_KERNEL); From 0cc17a7a320324a84f7b4731841b0ec10e65214e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 29 Mar 2018 00:06:10 +0200 Subject: [PATCH 2499/2608] ath10k: avoid possible string overflow commit 6707ba0105a2d350710bc0a537a98f49eb4b895d upstream. The way that 'strncat' is used here raised a warning in gcc-8: drivers/net/wireless/ath/ath10k/wmi.c: In function 'ath10k_wmi_tpc_stats_final_disp_tables': drivers/net/wireless/ath/ath10k/wmi.c:4649:4: error: 'strncat' output truncated before terminating nul copying as many bytes from a string as its length [-Werror=stringop-truncation] Effectively, this is simply a strcat() but the use of strncat() suggests some form of overflow check. Regardless of whether this might actually overflow, using strlcat() instead of strncat() avoids the warning and makes the code more robust. Fixes: bc64d05220f3 ("ath10k: debugfs support to get final TPC stats for 10.4 variants") Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 8cb47858eb00..ab8eb9cdfda0 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -4309,7 +4309,7 @@ static void ath10k_tpc_config_disp_tables(struct ath10k *ar, rate_code[i], type); snprintf(buff, sizeof(buff), "%8d ", tpc[j]); - strncat(tpc_value, buff, strlen(buff)); + strlcat(tpc_value, buff, sizeof(tpc_value)); } tpc_stats->tpc_table[type].pream_idx[i] = pream_idx; tpc_stats->tpc_table[type].rate_code[i] = rate_code[i]; From 1848c32fad1666bdc04d40f857284ffcb55f694a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 27 Mar 2019 14:13:56 +0900 Subject: [PATCH 2500/2608] Linux 4.14.109 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 170411b62525..e02bced59a57 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 108 +SUBLEVEL = 109 EXTRAVERSION = NAME = Petit Gorille From 82868814a87236b17ac588fbb779c87616e99345 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 18 Jan 2019 12:56:20 +0100 Subject: [PATCH 2501/2608] Bluetooth: Check L2CAP option sizes returned from l2cap_get_conf_opt commit af3d5d1c87664a4f150fcf3534c6567cb19909b0 upstream. When doing option parsing for standard type values of 1, 2 or 4 octets, the value is converted directly into a variable instead of a pointer. To avoid being tricked into being a pointer, check that for these option types that sizes actually match. In L2CAP every option is fixed size and thus it is prudent anyway to ensure that the remote side sends us the right option size along with option paramters. If the option size is not matching the option type, then that option is silently ignored. It is a protocol violation and instead of trying to give the remote attacker any further hints just pretend that option is not present and proceed with the default values. Implementation following the specification and its qualification procedures will always use the correct size and thus not being impacted here. To keep the code readable and consistent accross all options, a few cosmetic changes were also required. Signed-off-by: Marcel Holtmann Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hedberg Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_core.c | 77 +++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 31 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9b7907ebfa01..8fee6b85be9b 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3342,10 +3342,14 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data switch (type) { case L2CAP_CONF_MTU: + if (olen != 2) + break; mtu = val; break; case L2CAP_CONF_FLUSH_TO: + if (olen != 2) + break; chan->flush_to = val; break; @@ -3353,26 +3357,30 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data break; case L2CAP_CONF_RFC: - if (olen == sizeof(rfc)) - memcpy(&rfc, (void *) val, olen); + if (olen != sizeof(rfc)) + break; + memcpy(&rfc, (void *) val, olen); break; case L2CAP_CONF_FCS: + if (olen != 1) + break; if (val == L2CAP_FCS_NONE) set_bit(CONF_RECV_NO_FCS, &chan->conf_state); break; case L2CAP_CONF_EFS: - if (olen == sizeof(efs)) { - remote_efs = 1; - memcpy(&efs, (void *) val, olen); - } + if (olen != sizeof(efs)) + break; + remote_efs = 1; + memcpy(&efs, (void *) val, olen); break; case L2CAP_CONF_EWS: + if (olen != 2) + break; if (!(chan->conn->local_fixed_chan & L2CAP_FC_A2MP)) return -ECONNREFUSED; - set_bit(FLAG_EXT_CTRL, &chan->flags); set_bit(CONF_EWS_RECV, &chan->conf_state); chan->tx_win_max = L2CAP_DEFAULT_EXT_WINDOW; @@ -3382,7 +3390,6 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data default: if (hint) break; - result = L2CAP_CONF_UNKNOWN; *((u8 *) ptr++) = type; break; @@ -3550,55 +3557,60 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, switch (type) { case L2CAP_CONF_MTU: + if (olen != 2) + break; if (val < L2CAP_DEFAULT_MIN_MTU) { *result = L2CAP_CONF_UNACCEPT; chan->imtu = L2CAP_DEFAULT_MIN_MTU; } else chan->imtu = val; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, + endptr - ptr); break; case L2CAP_CONF_FLUSH_TO: + if (olen != 2) + break; chan->flush_to = val; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, - 2, chan->flush_to, endptr - ptr); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, + chan->flush_to, endptr - ptr); break; case L2CAP_CONF_RFC: - if (olen == sizeof(rfc)) - memcpy(&rfc, (void *)val, olen); - + if (olen != sizeof(rfc)) + break; + memcpy(&rfc, (void *)val, olen); if (test_bit(CONF_STATE2_DEVICE, &chan->conf_state) && rfc.mode != chan->mode) return -ECONNREFUSED; - chan->fcs = 0; - - l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc, endptr - ptr); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), + (unsigned long) &rfc, endptr - ptr); break; case L2CAP_CONF_EWS: + if (olen != 2) + break; chan->ack_win = min_t(u16, val, chan->ack_win); l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2, chan->tx_win, endptr - ptr); break; case L2CAP_CONF_EFS: - if (olen == sizeof(efs)) { - memcpy(&efs, (void *)val, olen); - - if (chan->local_stype != L2CAP_SERV_NOTRAFIC && - efs.stype != L2CAP_SERV_NOTRAFIC && - efs.stype != chan->local_stype) - return -ECONNREFUSED; - - l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), - (unsigned long) &efs, endptr - ptr); - } + if (olen != sizeof(efs)) + break; + memcpy(&efs, (void *)val, olen); + if (chan->local_stype != L2CAP_SERV_NOTRAFIC && + efs.stype != L2CAP_SERV_NOTRAFIC && + efs.stype != chan->local_stype) + return -ECONNREFUSED; + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), + (unsigned long) &efs, endptr - ptr); break; case L2CAP_CONF_FCS: + if (olen != 1) + break; if (*result == L2CAP_CONF_PENDING) if (val == L2CAP_FCS_NONE) set_bit(CONF_RECV_NO_FCS, @@ -3730,10 +3742,13 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len) switch (type) { case L2CAP_CONF_RFC: - if (olen == sizeof(rfc)) - memcpy(&rfc, (void *)val, olen); + if (olen != sizeof(rfc)) + break; + memcpy(&rfc, (void *)val, olen); break; case L2CAP_CONF_EWS: + if (olen != 2) + break; txwin_ext = val; break; } From 2b59d36f22622c92c0b06aee7571f0a86a217188 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 18 Jan 2019 13:43:19 +0100 Subject: [PATCH 2502/2608] Bluetooth: Verify that l2cap_get_conf_opt provides large enough buffer commit 7c9cbd0b5e38a1672fcd137894ace3b042dfbf69 upstream. The function l2cap_get_conf_opt will return L2CAP_CONF_OPT_SIZE + opt->len as length value. The opt->len however is in control over the remote user and can be used by an attacker to gain access beyond the bounds of the actual packet. To prevent any potential leak of heap memory, it is enough to check that the resulting len calculation after calling l2cap_get_conf_opt is not below zero. A well formed packet will always return >= 0 here and will end with the length value being zero after the last option has been parsed. In case of malformed packets messing with the opt->len field the length value will become negative. If that is the case, then just abort and ignore the option. In case an attacker uses a too short opt->len value, then garbage will be parsed, but that is protected by the unknown option handling and also the option parameter size checks. Signed-off-by: Marcel Holtmann Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hedberg Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 8fee6b85be9b..b510da76170e 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3336,6 +3336,8 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&req, &type, &olen, &val); + if (len < 0) + break; hint = type & L2CAP_CONF_HINT; type &= L2CAP_CONF_MASK; @@ -3554,6 +3556,8 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); + if (len < 0) + break; switch (type) { case L2CAP_CONF_MTU: @@ -3739,6 +3743,8 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len) while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); + if (len < 0) + break; switch (type) { case L2CAP_CONF_RFC: From 6f90688633d5d3f992cae1c00bcaac272350f1dc Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 3 Jul 2018 17:43:09 +0200 Subject: [PATCH 2503/2608] video: fbdev: Set pixclock = 0 in goldfishfb [ Upstream commit ace6033ec5c356615eaa3582fb1946e9eaff6662 ] User space Android code identifies pixclock == 0 as a sign for emulation and will set the frame rate to 60 fps when reading this value, which is the desired outcome. Signed-off-by: Christoffer Dall Signed-off-by: Peter Maydell Signed-off-by: Roman Kiryanov Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/goldfishfb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c index 14a93cb21310..66d58e93bc32 100644 --- a/drivers/video/fbdev/goldfishfb.c +++ b/drivers/video/fbdev/goldfishfb.c @@ -234,7 +234,7 @@ static int goldfish_fb_probe(struct platform_device *pdev) fb->fb.var.activate = FB_ACTIVATE_NOW; fb->fb.var.height = readl(fb->reg_base + FB_GET_PHYS_HEIGHT); fb->fb.var.width = readl(fb->reg_base + FB_GET_PHYS_WIDTH); - fb->fb.var.pixclock = 10000; + fb->fb.var.pixclock = 0; fb->fb.var.red.offset = 11; fb->fb.var.red.length = 5; From 1e594a4f60abb4e8d10d8a27286d2c4cb9366f97 Mon Sep 17 00:00:00 2001 From: Bhadram Varka Date: Fri, 27 Oct 2017 08:22:02 +0530 Subject: [PATCH 2504/2608] stmmac: copy unicast mac address to MAC registers [ Upstream commit a830405ee452ddc4101c3c9334e6fedd42c6b357 ] Currently stmmac driver not copying the valid ethernet MAC address to MAC registers. This patch takes care of updating the MAC register with MAC address. Signed-off-by: Bhadram Varka Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 0cc83e8417ef..4a9dbee6f054 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3787,6 +3787,20 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return ret; } +static int stmmac_set_mac_address(struct net_device *ndev, void *addr) +{ + struct stmmac_priv *priv = netdev_priv(ndev); + int ret = 0; + + ret = eth_mac_addr(ndev, addr); + if (ret) + return ret; + + priv->hw->mac->set_umac_addr(priv->hw, ndev->dev_addr, 0); + + return ret; +} + #ifdef CONFIG_DEBUG_FS static struct dentry *stmmac_fs_dir; @@ -4014,7 +4028,7 @@ static const struct net_device_ops stmmac_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = stmmac_poll_controller, #endif - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = stmmac_set_mac_address, }; /** From 27eb4125f2a50a0bffe029c5dfcb853666ba92ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Mar 2019 05:46:18 -0700 Subject: [PATCH 2505/2608] dccp: do not use ipv6 header for ipv4 flow [ Upstream commit e0aa67709f89d08c8d8e5bdd9e0b649df61d0090 ] When a dual stack dccp listener accepts an ipv4 flow, it should not attempt to use an ipv6 header or inet6_iif() helper. Fixes: 3df80d9320bc ("[DCCP]: Introduce DCCPv6") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ipv6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6344f1b18a6a..58a401e9cf09 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -433,8 +433,8 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; - newnp->mcast_oif = inet6_iif(skb); - newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; + newnp->mcast_oif = inet_iif(skb); + newnp->mcast_hops = ip_hdr(skb)->ttl; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count From 111fdc6e948b36115e29cf3b1981058d033ffd54 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 21 Mar 2019 15:02:50 +0800 Subject: [PATCH 2506/2608] genetlink: Fix a memory leak on error path [ Upstream commit ceabee6c59943bdd5e1da1a6a20dc7ee5f8113a2 ] In genl_register_family(), when idr_alloc() fails, we forget to free the memory we possibly allocate for family->attrbuf. Reported-by: Hulk Robot Fixes: 2ae0f17df1cd ("genetlink: use idr to track families") Signed-off-by: YueHaibing Reviewed-by: Kirill Tkhai Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/genetlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index b9ce82c9440f..e9b8b0b0ac43 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -365,7 +365,7 @@ int genl_register_family(struct genl_family *family) start, end + 1, GFP_KERNEL); if (family->id < 0) { err = family->id; - goto errout_locked; + goto errout_free; } err = genl_validate_assign_mc_groups(family); @@ -384,6 +384,7 @@ int genl_register_family(struct genl_family *family) errout_remove: idr_remove(&genl_fam_idr, family->id); +errout_free: kfree(family->attrbuf); errout_locked: genl_unlock_all(); From 7ca0727ee469f2954b689d5bdbe25fea029401bc Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 18 Mar 2019 08:51:06 -0500 Subject: [PATCH 2507/2608] mISDN: hfcpci: Test both vendor & device ID for Digium HFC4S [ Upstream commit fae846e2b7124d4b076ef17791c73addf3b26350 ] The device ID alone does not uniquely identify a device. Test both the vendor and device ID to make sure we don't mistakenly think some other vendor's 0xB410 device is a Digium HFC4S. Also, instead of the bare hex ID, use the same constant (PCI_DEVICE_ID_DIGIUM_HFC4S) used in the device ID table. No functional change intended. Signed-off-by: Bjorn Helgaas Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/hardware/mISDN/hfcmulti.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index 3cf07b8ced1c..df01018acff1 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -4367,7 +4367,8 @@ setup_pci(struct hfc_multi *hc, struct pci_dev *pdev, if (m->clock2) test_and_set_bit(HFC_CHIP_CLOCK2, &hc->chip); - if (ent->device == 0xB410) { + if (ent->vendor == PCI_VENDOR_ID_DIGIUM && + ent->device == PCI_DEVICE_ID_DIGIUM_HFC4S) { test_and_set_bit(HFC_CHIP_B410P, &hc->chip); test_and_set_bit(HFC_CHIP_PCM_MASTER, &hc->chip); test_and_clear_bit(HFC_CHIP_PCM_SLAVE, &hc->chip); From 701052e17bb73979a6321edc6c324622fc7cc56d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 25 Mar 2019 14:18:06 +0100 Subject: [PATCH 2508/2608] net: datagram: fix unbounded loop in __skb_try_recv_datagram() [ Upstream commit 0b91bce1ebfc797ff3de60c8f4a1e6219a8a3187 ] Christoph reported a stall while peeking datagram with an offset when busy polling is enabled. __skb_try_recv_datagram() uses as the loop termination condition 'queue empty'. When peeking, the socket queue can be not empty, even when no additional packets are received. Address the issue explicitly checking for receive queue changes, as currently done by __skb_wait_for_more_packets(). Fixes: 2b5cd0dfa384 ("net: Change return type of sk_busy_loop from bool to void") Reported-and-tested-by: Christoph Paasch Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index d8a0774f7608..dcb333e95702 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -281,7 +281,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, break; sk_busy_loop(sk, flags & MSG_DONTWAIT); - } while (!skb_queue_empty(&sk->sk_receive_queue)); + } while (sk->sk_receive_queue.prev != *last); error = -EAGAIN; From 1a3acbd6adb551a5f807148d6512d8aeba8cb8bb Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 18 Mar 2019 23:14:52 -0700 Subject: [PATCH 2509/2608] net/packet: Set __GFP_NOWARN upon allocation in alloc_pg_vec [ Upstream commit 398f0132c14754fcd03c1c4f8e7176d001ce8ea1 ] Since commit fc62814d690c ("net/packet: fix 4gb buffer limit due to overflow check") one can now allocate packet ring buffers >= UINT_MAX. However, syzkaller found that that triggers a warning: [ 21.100000] WARNING: CPU: 2 PID: 2075 at mm/page_alloc.c:4584 __alloc_pages_nod0 [ 21.101490] Modules linked in: [ 21.101921] CPU: 2 PID: 2075 Comm: syz-executor.0 Not tainted 5.0.0 #146 [ 21.102784] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.5.1 01/01/2011 [ 21.103887] RIP: 0010:__alloc_pages_nodemask+0x2a0/0x630 [ 21.104640] Code: fe ff ff 65 48 8b 04 25 c0 de 01 00 48 05 90 0f 00 00 41 bd 01 00 00 00 48 89 44 24 48 e9 9c fe 3 [ 21.107121] RSP: 0018:ffff88805e1cf920 EFLAGS: 00010246 [ 21.107819] RAX: 0000000000000000 RBX: ffffffff85a488a0 RCX: 0000000000000000 [ 21.108753] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: 0000000000000000 [ 21.109699] RBP: 1ffff1100bc39f28 R08: ffffed100bcefb67 R09: ffffed100bcefb67 [ 21.110646] R10: 0000000000000001 R11: ffffed100bcefb66 R12: 000000000000000d [ 21.111623] R13: 0000000000000000 R14: ffff88805e77d888 R15: 000000000000000d [ 21.112552] FS: 00007f7c7de05700(0000) GS:ffff88806d100000(0000) knlGS:0000000000000000 [ 21.113612] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 21.114405] CR2: 000000000065c000 CR3: 000000005e58e006 CR4: 00000000001606e0 [ 21.115367] Call Trace: [ 21.115705] ? __alloc_pages_slowpath+0x21c0/0x21c0 [ 21.116362] alloc_pages_current+0xac/0x1e0 [ 21.116923] kmalloc_order+0x18/0x70 [ 21.117393] kmalloc_order_trace+0x18/0x110 [ 21.117949] packet_set_ring+0x9d5/0x1770 [ 21.118524] ? packet_rcv_spkt+0x440/0x440 [ 21.119094] ? lock_downgrade+0x620/0x620 [ 21.119646] ? __might_fault+0x177/0x1b0 [ 21.120177] packet_setsockopt+0x981/0x2940 [ 21.120753] ? __fget+0x2fb/0x4b0 [ 21.121209] ? packet_release+0xab0/0xab0 [ 21.121740] ? sock_has_perm+0x1cd/0x260 [ 21.122297] ? selinux_secmark_relabel_packet+0xd0/0xd0 [ 21.123013] ? __fget+0x324/0x4b0 [ 21.123451] ? selinux_netlbl_socket_setsockopt+0x101/0x320 [ 21.124186] ? selinux_netlbl_sock_rcv_skb+0x3a0/0x3a0 [ 21.124908] ? __lock_acquire+0x529/0x3200 [ 21.125453] ? selinux_socket_setsockopt+0x5d/0x70 [ 21.126075] ? __sys_setsockopt+0x131/0x210 [ 21.126533] ? packet_release+0xab0/0xab0 [ 21.127004] __sys_setsockopt+0x131/0x210 [ 21.127449] ? kernel_accept+0x2f0/0x2f0 [ 21.127911] ? ret_from_fork+0x8/0x50 [ 21.128313] ? do_raw_spin_lock+0x11b/0x280 [ 21.128800] __x64_sys_setsockopt+0xba/0x150 [ 21.129271] ? lockdep_hardirqs_on+0x37f/0x560 [ 21.129769] do_syscall_64+0x9f/0x450 [ 21.130182] entry_SYSCALL_64_after_hwframe+0x49/0xbe We should allocate with __GFP_NOWARN to handle this. Cc: Kal Conley Cc: Andrey Konovalov Fixes: fc62814d690c ("net/packet: fix 4gb buffer limit due to overflow check") Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a2bd5917a2a9..1708566516d9 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4232,7 +4232,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) struct pgv *pg_vec; int i; - pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL); + pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN); if (unlikely(!pg_vec)) goto out; From 7ce391ed729b5bf5c4aa909dd5b9e7ffb63879f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Mar 2019 10:41:14 -0700 Subject: [PATCH 2510/2608] net: rose: fix a possible stack overflow [ Upstream commit e5dcc0c3223c45c94100f05f28d8ef814db3d82c ] rose_write_internal() uses a temp buffer of 100 bytes, but a manual inspection showed that given arbitrary input, rose_create_facilities() can fill up to 110 bytes. Lets use a tailroom of 256 bytes for peace of mind, and remove the bounce buffer : we can simply allocate a big enough skb and adjust its length as needed. syzbot report : BUG: KASAN: stack-out-of-bounds in memcpy include/linux/string.h:352 [inline] BUG: KASAN: stack-out-of-bounds in rose_create_facilities net/rose/rose_subr.c:521 [inline] BUG: KASAN: stack-out-of-bounds in rose_write_internal+0x597/0x15d0 net/rose/rose_subr.c:116 Write of size 7 at addr ffff88808b1ffbef by task syz-executor.0/24854 CPU: 0 PID: 24854 Comm: syz-executor.0 Not tainted 5.0.0+ #97 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 check_memory_region_inline mm/kasan/generic.c:185 [inline] check_memory_region+0x123/0x190 mm/kasan/generic.c:191 memcpy+0x38/0x50 mm/kasan/common.c:131 memcpy include/linux/string.h:352 [inline] rose_create_facilities net/rose/rose_subr.c:521 [inline] rose_write_internal+0x597/0x15d0 net/rose/rose_subr.c:116 rose_connect+0x7cb/0x1510 net/rose/af_rose.c:826 __sys_connect+0x266/0x330 net/socket.c:1685 __do_sys_connect net/socket.c:1696 [inline] __se_sys_connect net/socket.c:1693 [inline] __x64_sys_connect+0x73/0xb0 net/socket.c:1693 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x458079 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f47b8d9dc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000458079 RDX: 000000000000001c RSI: 0000000020000040 RDI: 0000000000000004 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f47b8d9e6d4 R13: 00000000004be4a4 R14: 00000000004ceca8 R15: 00000000ffffffff The buggy address belongs to the page: page:ffffea00022c7fc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 flags: 0x1fffc0000000000() raw: 01fffc0000000000 0000000000000000 ffffffff022c0101 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88808b1ffa80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff88808b1ffb00: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 03 >ffff88808b1ffb80: f2 f2 00 00 00 00 00 00 00 00 00 00 00 00 04 f3 ^ ffff88808b1ffc00: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 ffff88808b1ffc80: 00 00 00 00 00 00 00 f1 f1 f1 f1 f1 f1 01 f2 01 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rose/rose_subr.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 7ca57741b2fb..7849f286bb93 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -105,16 +105,17 @@ void rose_write_internal(struct sock *sk, int frametype) struct sk_buff *skb; unsigned char *dptr; unsigned char lci1, lci2; - char buffer[100]; - int len, faclen = 0; + int maxfaclen = 0; + int len, faclen; + int reserve; - len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 1; + reserve = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1; + len = ROSE_MIN_LEN; switch (frametype) { case ROSE_CALL_REQUEST: len += 1 + ROSE_ADDR_LEN + ROSE_ADDR_LEN; - faclen = rose_create_facilities(buffer, rose); - len += faclen; + maxfaclen = 256; break; case ROSE_CALL_ACCEPTED: case ROSE_CLEAR_REQUEST: @@ -123,15 +124,16 @@ void rose_write_internal(struct sock *sk, int frametype) break; } - if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) + skb = alloc_skb(reserve + len + maxfaclen, GFP_ATOMIC); + if (!skb) return; /* * Space for AX.25 header and PID. */ - skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1); + skb_reserve(skb, reserve); - dptr = skb_put(skb, skb_tailroom(skb)); + dptr = skb_put(skb, len); lci1 = (rose->lci >> 8) & 0x0F; lci2 = (rose->lci >> 0) & 0xFF; @@ -146,7 +148,8 @@ void rose_write_internal(struct sock *sk, int frametype) dptr += ROSE_ADDR_LEN; memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN); dptr += ROSE_ADDR_LEN; - memcpy(dptr, buffer, faclen); + faclen = rose_create_facilities(dptr, rose); + skb_put(skb, faclen); dptr += faclen; break; From a12d2154828c8453e0c16a268815c985846ef157 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Mon, 18 Mar 2019 23:36:08 +0200 Subject: [PATCH 2511/2608] net: stmmac: fix memory corruption with large MTUs [ Upstream commit 223a960c01227e4dbcb6f9fa06b47d73bda21274 ] When using 16K DMA buffers and ring mode, the DES3 refill is not working correctly as the function is using a bogus pointer for checking the private data. As a result stale pointers will remain in the RX descriptor ring, so DMA will now likely overwrite/corrupt some already freed memory. As simple reproducer, just receive some UDP traffic: # ifconfig eth0 down; ifconfig eth0 mtu 9000; ifconfig eth0 up # iperf3 -c 192.168.253.40 -u -b 0 -R If you didn't crash by now check the RX descriptors to find non-contiguous RX buffers: cat /sys/kernel/debug/stmmaceth/eth0/descriptors_status [...] 1 [0x2be5020]: 0xa3220321 0x9ffc1ffc 0x72d70082 0x130e207e ^^^^^^^^^^^^^^^^^^^^^ 2 [0x2be5040]: 0xa3220321 0x9ffc1ffc 0x72998082 0x1311a07e ^^^^^^^^^^^^^^^^^^^^^ A simple ping test will now report bad data: # ping -s 8200 192.168.253.40 PING 192.168.253.40 (192.168.253.40) 8200(8228) bytes of data. 8208 bytes from 192.168.253.40: icmp_seq=1 ttl=64 time=1.00 ms wrong data byte #8144 should be 0xd0 but was 0x88 Fix the wrong pointer. Also we must refill DES3 only if the DMA buffer size is 16K. Fixes: 54139cf3bb33 ("net: stmmac: adding multiple buffers for rx") Signed-off-by: Aaro Koskinen Acked-by: Jose Abreu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/ring_mode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index 1af7b078b94d..d4c3bf78d928 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -114,10 +114,11 @@ static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc) static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p) { - struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr; + struct stmmac_rx_queue *rx_q = priv_ptr; + struct stmmac_priv *priv = rx_q->priv_data; /* Fill DES3 in case of RING mode */ - if (priv->dma_buf_sz >= BUF_SIZE_8KiB) + if (priv->dma_buf_sz == BUF_SIZE_16KiB) p->des3 = cpu_to_le32(le32_to_cpu(p->des2) + BUF_SIZE_8KiB); } From e331c9066901dfe40bea4647521b86e9fb9901bb Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 19 Mar 2019 10:16:53 +0800 Subject: [PATCH 2512/2608] net-sysfs: call dev_hold if kobject_init_and_add success [ Upstream commit a3e23f719f5c4a38ffb3d30c8d7632a4ed8ccd9e ] In netdev_queue_add_kobject and rx_queue_add_kobject, if sysfs_create_group failed, kobject_put will call netdev_queue_release to decrease dev refcont, however dev_hold has not be called. So we will see this while unregistering dev: unregister_netdevice: waiting for bcsh0 to become free. Usage count = -1 Reported-by: Hulk Robot Fixes: d0d668371679 ("net: don't decrement kobj reference count on init failure") Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/net-sysfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index c6a2655cc28a..dee57c5ff738 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -917,6 +917,8 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) if (error) return error; + dev_hold(queue->dev); + if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); if (error) { @@ -926,7 +928,6 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) } kobject_uevent(kobj, KOBJ_ADD); - dev_hold(queue->dev); return error; } @@ -1327,6 +1328,8 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) if (error) return error; + dev_hold(queue->dev); + #ifdef CONFIG_BQL error = sysfs_create_group(kobj, &dql_group); if (error) { @@ -1336,7 +1339,6 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) #endif kobject_uevent(kobj, KOBJ_ADD); - dev_hold(queue->dev); return 0; } From 68979f5eccc6ecc2630e6edc7c0d0a915d3d630b Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Sat, 16 Mar 2019 14:41:30 +0100 Subject: [PATCH 2513/2608] packets: Always register packet sk in the same order [ Upstream commit a4dc6a49156b1f8d6e17251ffda17c9e6a5db78a ] When using fanouts with AF_PACKET, the demux functions such as fanout_demux_cpu will return an index in the fanout socket array, which corresponds to the selected socket. The ordering of this array depends on the order the sockets were added to a given fanout group, so for FANOUT_CPU this means sockets are bound to cpus in the order they are configured, which is OK. However, when stopping then restarting the interface these sockets are bound to, the sockets are reassigned to the fanout group in the reverse order, due to the fact that they were inserted at the head of the interface's AF_PACKET socket list. This means that traffic that was directed to the first socket in the fanout group is now directed to the last one after an interface restart. In the case of FANOUT_CPU, traffic from CPU0 will be directed to the socket that used to receive traffic from the last CPU after an interface restart. This commit introduces a helper to add a socket at the tail of a list, then uses it to register AF_PACKET sockets. Note that this changes the order in which sockets are listed in /proc and with sock_diag. Fixes: dc99f600698d ("packet: Add fanout support") Signed-off-by: Maxime Chevallier Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sock.h | 6 ++++++ net/packet/af_packet.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/net/sock.h b/include/net/sock.h index 4280e96d4b46..60eef7f1ac05 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -682,6 +682,12 @@ static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) hlist_add_head_rcu(&sk->sk_node, list); } +static inline void sk_add_node_tail_rcu(struct sock *sk, struct hlist_head *list) +{ + sock_hold(sk); + hlist_add_tail_rcu(&sk->sk_node, list); +} + static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1708566516d9..e8ca6aa3a32f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3281,7 +3281,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, } mutex_lock(&net->packet.sklist_lock); - sk_add_node_rcu(sk, &net->packet.sklist); + sk_add_node_tail_rcu(sk, &net->packet.sklist); mutex_unlock(&net->packet.sklist_lock); preempt_disable(); From 9f6e90284fcf0e710630899d94e80982f58c8441 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 21 Mar 2019 09:39:52 +0800 Subject: [PATCH 2514/2608] rhashtable: Still do rehash when we get EEXIST [ Upstream commit 408f13ef358aa5ad56dc6230c2c7deb92cf462b1 ] As it stands if a shrink is delayed because of an outstanding rehash, we will go into a rescheduling loop without ever doing the rehash. This patch fixes this by still carrying out the rehash and then rescheduling so that we can shrink after the completion of the rehash should it still be necessary. The return value of EEXIST captures this case and other cases (e.g., another thread expanded/rehashed the table at the same time) where we should still proceed with the rehash. Fixes: da20420f83ea ("rhashtable: Add nested tables") Reported-by: Josh Elsasser Signed-off-by: Herbert Xu Tested-by: Josh Elsasser Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- lib/rhashtable.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index cebbcec877d7..cb577ca65fa9 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -459,8 +459,12 @@ static void rht_deferred_worker(struct work_struct *work) else if (tbl->nest) err = rhashtable_rehash_alloc(ht, tbl, tbl->size); - if (!err) - err = rhashtable_rehash_table(ht); + if (!err || err == -EEXIST) { + int nerr; + + nerr = rhashtable_rehash_table(ht); + err = err ?: nerr; + } mutex_unlock(&ht->mutex); From 94e82e56fdd379d26a8fe4d2a605c976662d8373 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Mar 2019 05:45:35 -0700 Subject: [PATCH 2515/2608] tcp: do not use ipv6 header for ipv4 flow [ Upstream commit 89e4130939a20304f4059ab72179da81f5347528 ] When a dual stack tcp listener accepts an ipv4 flow, it should not attempt to use an ipv6 header or tcp_v6_iif() helper. Fixes: 1397ed35f22d ("ipv6: add flowinfo for tcp6 pkt_options for all cases") Fixes: df3687ffc665 ("ipv6: add the IPV6_FL_F_REFLECT flag to IPV6_FL_A_GET") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/tcp_ipv6.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ba8586aadffa..7b4ce3f9e2f4 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1083,11 +1083,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; - newnp->mcast_oif = tcp_v6_iif(skb); - newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); + newnp->mcast_oif = inet_iif(skb); + newnp->mcast_hops = ip_hdr(skb)->ttl; + newnp->rcv_flowinfo = 0; if (np->repflow) - newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); + newnp->flow_label = 0; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count From d6a21619421a81669ace172fc6e48c16d33ce2f3 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Tue, 26 Mar 2019 11:53:19 -0400 Subject: [PATCH 2516/2608] thunderx: enable page recycling for non-XDP case [ Upstream commit b3e208069477588c06f4d5d986164b435bb06e6d ] Commit 773225388dae15e72790 ("net: thunderx: Optimize page recycling for XDP") added code to nicvf_alloc_page() that inadvertently disables receive buffer page recycling for the non-XDP case by always NULL'ng the page pointer. This patch corrects two if-conditionals to allow for the recycling of non-XDP mode pages by only setting the page pointer to NULL when the page is not ready for recycling. Fixes: 773225388dae ("net: thunderx: Optimize page recycling for XDP") Signed-off-by: Dean Nelson Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../ethernet/cavium/thunder/nicvf_queues.c | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 09494e1c77c5..5f779198223c 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -105,20 +105,19 @@ static inline struct pgcache *nicvf_alloc_page(struct nicvf *nic, /* Check if page can be recycled */ if (page) { ref_count = page_ref_count(page); - /* Check if this page has been used once i.e 'put_page' - * called after packet transmission i.e internal ref_count - * and page's ref_count are equal i.e page can be recycled. + /* This page can be recycled if internal ref_count and page's + * ref_count are equal, indicating that the page has been used + * once for packet transmission. For non-XDP mode, internal + * ref_count is always '1'. */ - if (rbdr->is_xdp && (ref_count == pgcache->ref_count)) - pgcache->ref_count--; - else - page = NULL; - - /* In non-XDP mode, page's ref_count needs to be '1' for it - * to be recycled. - */ - if (!rbdr->is_xdp && (ref_count != 1)) + if (rbdr->is_xdp) { + if (ref_count == pgcache->ref_count) + pgcache->ref_count--; + else + page = NULL; + } else if (ref_count != 1) { page = NULL; + } } if (!page) { From e9df36340030db6efc895a0cbf75f5e77cb8984c Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Tue, 26 Mar 2019 11:53:26 -0400 Subject: [PATCH 2517/2608] thunderx: eliminate extra calls to put_page() for pages held for recycling [ Upstream commit cd35ef91490ad8049dd180bb060aff7ee192eda9 ] For the non-XDP case, commit 773225388dae15e72790 ("net: thunderx: Optimize page recycling for XDP") added code to nicvf_free_rbdr() that, when releasing the additional receive buffer page reference held for recycling, repeatedly calls put_page() until the page's _refcount goes to zero. Which results in the page being freed. This is not okay if the page's _refcount was greater than 1 (in the non-XDP case), because nicvf_free_rbdr() should not be subtracting more than what nicvf_alloc_page() had previously added to the page's _refcount, which was only 1 (in the non-XDP case). This can arise if a received packet is still being processed and the receive buffer (i.e., skb->head) has not yet been freed via skb_free_head() when nicvf_free_rbdr() is spinning through the aforementioned put_page() loop. If this should occur, when the received packet finishes processing and skb_free_head() is called, various problems can ensue. Exactly what, depends on whether the page has already been reallocated or not, anything from "BUG: Bad page state ... ", to "Unable to handle kernel NULL pointer dereference ..." or "Unable to handle kernel paging request...". So this patch changes nicvf_free_rbdr() to only call put_page() once for pages held for recycling (in the non-XDP case). Fixes: 773225388dae ("net: thunderx: Optimize page recycling for XDP") Signed-off-by: Dean Nelson Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 5f779198223c..7ad1d56d8389 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -364,11 +364,10 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) while (head < rbdr->pgcnt) { pgcache = &rbdr->pgcache[head]; if (pgcache->page && page_ref_count(pgcache->page) != 0) { - if (!rbdr->is_xdp) { - put_page(pgcache->page); - continue; + if (rbdr->is_xdp) { + page_ref_sub(pgcache->page, + pgcache->ref_count - 1); } - page_ref_sub(pgcache->page, pgcache->ref_count - 1); put_page(pgcache->page); } head++; From bf583844131acbaea3e0436e47bca1214c384029 Mon Sep 17 00:00:00 2001 From: Zhiqiang Liu Date: Sat, 16 Mar 2019 17:02:54 +0800 Subject: [PATCH 2518/2608] vxlan: Don't call gro_cells_destroy() before device is unregistered [ Upstream commit cc4807bb609230d8959fd732b0bf3bd4c2de8eac ] Commit ad6c9986bcb62 ("vxlan: Fix GRO cells race condition between receive and link delete") fixed a race condition for the typical case a vxlan device is dismantled from the current netns. But if a netns is dismantled, vxlan_destroy_tunnels() is called to schedule a unregister_netdevice_queue() of all the vxlan tunnels that are related to this netns. In vxlan_destroy_tunnels(), gro_cells_destroy() is called and finished before unregister_netdevice_queue(). This means that the gro_cells_destroy() call is done too soon, for the same reasons explained in above commit. So we need to fully respect the RCU rules, and thus must remove the gro_cells_destroy() call or risk use after-free. Fixes: 58ce31cca1ff ("vxlan: GRO support at tunnel layer") Signed-off-by: Suanming.Mou Suggested-by: Eric Dumazet Reviewed-by: Stefano Brivio Reviewed-by: Zhiqiang Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index df48f65c4f90..2fbaa279988e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3793,10 +3793,8 @@ static void __net_exit vxlan_exit_net(struct net *net) /* If vxlan->dev is in the same netns, it has already been added * to the list by the previous loop. */ - if (!net_eq(dev_net(vxlan->dev), net)) { - gro_cells_destroy(&vxlan->gro_cells); + if (!net_eq(dev_net(vxlan->dev), net)) unregister_netdevice_queue(vxlan->dev, &list); - } } unregister_netdevice_many(&list); From d75259910db799b3db39d870c00eda72b74535f7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 19:47:00 +0800 Subject: [PATCH 2519/2608] sctp: get sctphdr by offset in sctp_compute_cksum [ Upstream commit 273160ffc6b993c7c91627f5a84799c66dfe4dee ] sctp_hdr(skb) only works when skb->transport_header is set properly. But in Netfilter, skb->transport_header for ipv6 is not guaranteed to be right value for sctphdr. It would cause to fail to check the checksum for sctp packets. So fix it by using offset, which is always right in all places. v1->v2: - Fix the changelog. Fixes: e6d8b64b34aa ("net: sctp: fix and consolidate SCTP checksumming code") Reported-by: Li Shuang Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sctp/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index 4a5b9a306c69..803fc26ef0ba 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -60,7 +60,7 @@ static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, static inline __le32 sctp_compute_cksum(const struct sk_buff *skb, unsigned int offset) { - struct sctphdr *sh = sctp_hdr(skb); + struct sctphdr *sh = (struct sctphdr *)(skb->data + offset); __le32 ret, old = sh->checksum; const struct skb_checksum_ops ops = { .update = sctp_csum_update, From 50f8cd4d319fc3a8692ce3408e1167ded778b90b Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Sat, 16 Mar 2019 08:28:18 +0000 Subject: [PATCH 2520/2608] net: aquantia: fix rx checksum offload for UDP/TCP over IPv6 [ Upstream commit a7faaa0c5dc7d091cc9f72b870d7edcdd6f43f12 ] TCP/UDP checksum validity was propagated to skb only if IP checksum is valid. But for IPv6 there is no validity as there is no checksum in IPv6. This patch propagates TCP/UDP checksum validity regardless of IP checksum. Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") Signed-off-by: Igor Russkikh Signed-off-by: Nikita Danilov Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 640babf752ea..784c3522aaa3 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -270,11 +270,12 @@ int aq_ring_rx_clean(struct aq_ring_s *self, } else { if (buff->is_ip_cso) { __skb_incr_checksum_unnecessary(skb); - if (buff->is_udp_cso || buff->is_tcp_cso) - __skb_incr_checksum_unnecessary(skb); } else { skb->ip_summed = CHECKSUM_NONE; } + + if (buff->is_udp_cso || buff->is_tcp_cso) + __skb_incr_checksum_unnecessary(skb); } skb_set_hash(skb, buff->rss_hash, From f8a85be02fb76fcce4ebf623d42cce4ffbefddb9 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 16 Mar 2019 14:21:19 +1100 Subject: [PATCH 2521/2608] mac8390: Fix mmio access size probe [ Upstream commit bb9e5c5bcd76f4474eac3baf643d7a39f7bac7bb ] The bug that Stan reported is as follows. After a restart, a 16-bit NIC may be incorrectly identified as a 32-bit NIC and stop working. mac8390 slot.E: Memory length resource not found, probing mac8390 slot.E: Farallon EtherMac II-C (type farallon) mac8390 slot.E: MAC 00:00:c5:30:c2:99, IRQ 61, 32 KB shared memory at 0xfeed0000, 32-bit access. The bug never arises after a cold start and only intermittently after a warm start. (I didn't investigate why the bug is intermittent.) It turns out that memcpy_toio() is deprecated and memcmp_withio() also has issues. Replacing these calls with mmio accessors fixes the problem. Reported-and-tested-by: Stan Johnson Fixes: 2964db0f5904 ("m68k: Mac DP8390 update") Signed-off-by: Finn Thain Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/8390/mac8390.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c index 9497f18eaba0..e95a7567bb23 100644 --- a/drivers/net/ethernet/8390/mac8390.c +++ b/drivers/net/ethernet/8390/mac8390.c @@ -156,8 +156,6 @@ static void dayna_block_output(struct net_device *dev, int count, #define memcpy_fromio(a, b, c) memcpy((a), (void *)(b), (c)) #define memcpy_toio(a, b, c) memcpy((void *)(a), (b), (c)) -#define memcmp_withio(a, b, c) memcmp((a), (void *)(b), (c)) - /* Slow Sane (16-bit chunk memory read/write) Cabletron uses this */ static void slow_sane_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr, int ring_page); @@ -237,19 +235,26 @@ static enum mac8390_type __init mac8390_ident(struct nubus_dev *dev) static enum mac8390_access __init mac8390_testio(volatile unsigned long membase) { - unsigned long outdata = 0xA5A0B5B0; - unsigned long indata = 0x00000000; + u32 outdata = 0xA5A0B5B0; + u32 indata = 0; + /* Try writing 32 bits */ - memcpy_toio(membase, &outdata, 4); - /* Now compare them */ - if (memcmp_withio(&outdata, membase, 4) == 0) + nubus_writel(outdata, membase); + /* Now read it back */ + indata = nubus_readl(membase); + if (outdata == indata) return ACCESS_32; + + outdata = 0xC5C0D5D0; + indata = 0; + /* Write 16 bit output */ word_memcpy_tocard(membase, &outdata, 4); /* Now read it back */ word_memcpy_fromcard(&indata, membase, 4); if (outdata == indata) return ACCESS_16; + return ACCESS_UNKNOWN; } From 63a85fb4871a096c3bd5777b9f870b2386f45160 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Mar 2019 20:19:47 -0700 Subject: [PATCH 2522/2608] tun: properly test for IFF_UP [ Upstream commit 4477138fa0ae4e1b699786ef0600863ea6e6c61c ] Same reasons than the ones explained in commit 4179cb5a4c92 ("vxlan: test dev->flags & IFF_UP before calling netif_rx()") netif_rx_ni() or napi_gro_frags() must be called under a strict contract. At device dismantle phase, core networking clears IFF_UP and flush_all_backlogs() is called after rcu grace period to make sure no incoming packet might be in a cpu backlog and still referencing the device. A similar protocol is used for gro layer. Most drivers call netif_rx() from their interrupt handler, and since the interrupts are disabled at device dismantle, netif_rx() does not have to check dev->flags & IFF_UP Virtual drivers do not have this guarantee, and must therefore make the check themselves. Fixes: 1bd4978a88ac ("tun: honor IFF_UP in tun_get_user()") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 4227ee33ef19..9d6431d13c98 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1403,9 +1403,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, u32 rxhash; int skb_xdp = 1; - if (!(tun->dev->flags & IFF_UP)) - return -EIO; - if (!(tun->flags & IFF_NO_PI)) { if (len < sizeof(pi)) return -EINVAL; @@ -1493,9 +1490,11 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, err = skb_copy_datagram_from_iter(skb, 0, from, len); if (err) { + err = -EFAULT; +drop: this_cpu_inc(tun->pcpu_stats->rx_dropped); kfree_skb(skb); - return -EFAULT; + return err; } } @@ -1566,11 +1565,19 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } rxhash = __skb_get_hash_symmetric(skb); + + rcu_read_lock(); + if (unlikely(!(tun->dev->flags & IFF_UP))) { + err = -EIO; + goto drop; + } + #ifndef CONFIG_4KSTACKS tun_rx_batched(tun, tfile, skb, more); #else netif_rx_ni(skb); #endif + rcu_read_unlock(); stats = get_cpu_ptr(tun->pcpu_stats); u64_stats_update_begin(&stats->syncp); From 6caa2c1036293ef8571f5263b0fb247d81fb19f4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 16 Mar 2019 13:09:53 -0700 Subject: [PATCH 2523/2608] tun: add a missing rcu_read_unlock() in error path commit 9180bb4f046064dfa4541488102703b402bb04e1 upstream. In my latest patch I missed one rcu_read_unlock(), in case device is down. Fixes: 4477138fa0ae ("tun: properly test for IFF_UP") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9d6431d13c98..3b13d9e4030a 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1569,6 +1569,7 @@ drop: rcu_read_lock(); if (unlikely(!(tun->dev->flags & IFF_UP))) { err = -EIO; + rcu_read_unlock(); goto drop; } From f1a6390af33c9bff231e31d514f295e25c271a8c Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 29 Mar 2019 22:25:49 +1100 Subject: [PATCH 2524/2608] powerpc/64s: Add support for ori barrier_nospec patching commit 2eea7f067f495e33b8b116b35b5988ab2b8aec55 upstream. Based on the RFI patching. This is required to be able to disable the speculation barrier. Only one barrier type is supported and it does nothing when the firmware does not enable it. Also re-patching modules is not supported So the only meaningful thing that can be done is patching out the speculation barrier at boot when the user says it is not wanted. Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/barrier.h | 2 +- arch/powerpc/include/asm/feature-fixups.h | 9 ++++++++ arch/powerpc/include/asm/setup.h | 1 + arch/powerpc/kernel/security.c | 9 ++++++++ arch/powerpc/kernel/vmlinux.lds.S | 7 ++++++ arch/powerpc/lib/feature-fixups.c | 27 +++++++++++++++++++++++ 6 files changed, 54 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index e582d2c88092..f67b3f6e36be 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -81,7 +81,7 @@ do { \ * Prevent execution of subsequent instructions until preceding branches have * been fully resolved and are no longer executing speculatively. */ -#define barrier_nospec_asm ori 31,31,0 +#define barrier_nospec_asm NOSPEC_BARRIER_FIXUP_SECTION; nop // This also acts as a compiler barrier due to the memory clobber. #define barrier_nospec() asm (stringify_in_c(barrier_nospec_asm) ::: "memory") diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index a9b64df34e2a..fcfd05672b1b 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -211,6 +211,14 @@ label##3: \ FTR_ENTRY_OFFSET 951b-952b; \ .popsection; +#define NOSPEC_BARRIER_FIXUP_SECTION \ +953: \ + .pushsection __barrier_nospec_fixup,"a"; \ + .align 2; \ +954: \ + FTR_ENTRY_OFFSET 953b-954b; \ + .popsection; + #ifndef __ASSEMBLY__ #include @@ -219,6 +227,7 @@ extern long stf_barrier_fallback; extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; +extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; void apply_feature_fixups(void); void setup_feature_keys(void); diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index a5e919e34c42..88018e442386 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -52,6 +52,7 @@ enum l1d_flush_type { void setup_rfi_flush(enum l1d_flush_type, bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); +void do_barrier_nospec_fixups(bool enable); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index b98a722da915..4f18a447ab70 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -10,10 +10,19 @@ #include #include +#include unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; +static bool barrier_nospec_enabled; + +static void enable_barrier_nospec(bool enable) +{ + barrier_nospec_enabled = enable; + do_barrier_nospec_fixups(enable); +} + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { bool thread_priv; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index c89ffb88fa3b..43960d69bec9 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -153,6 +153,13 @@ SECTIONS *(__rfi_flush_fixup) __stop___rfi_flush_fixup = .; } + + . = ALIGN(8); + __spec_barrier_fixup : AT(ADDR(__spec_barrier_fixup) - LOAD_OFFSET) { + __start___barrier_nospec_fixup = .; + *(__barrier_nospec_fixup) + __stop___barrier_nospec_fixup = .; + } #endif EXCEPTION_TABLE(0) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index e1bcdc32a851..65b4e8276bdd 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -277,6 +277,33 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) (types & L1D_FLUSH_MTTRIG) ? "mttrig type" : "unknown"); } + +void do_barrier_nospec_fixups(bool enable) +{ + unsigned int instr, *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___barrier_nospec_fixup), + end = PTRRELOC(&__stop___barrier_nospec_fixup); + + instr = 0x60000000; /* nop */ + + if (enable) { + pr_info("barrier-nospec: using ORI speculation barrier\n"); + instr = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + } + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + patch_instruction(dest, instr); + } + + printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); +} + #endif /* CONFIG_PPC_BOOK3S_64 */ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) From 31c3a1e81171f03c7df48aac3efc1d417a18c708 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 29 Mar 2019 22:25:50 +1100 Subject: [PATCH 2525/2608] powerpc/64s: Patch barrier_nospec in modules commit 815069ca57c142eb71d27439bc27f41a433a67b3 upstream. Note that unlike RFI which is patched only in kernel the nospec state reflects settings at the time the module was loaded. Iterating all modules and re-patching every time the settings change is not implemented. Based on lwsync patching. Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/setup.h | 7 +++++++ arch/powerpc/kernel/module.c | 6 ++++++ arch/powerpc/kernel/security.c | 2 +- arch/powerpc/lib/feature-fixups.c | 16 +++++++++++++--- 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 88018e442386..8f3e5f6de0dd 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -53,6 +53,13 @@ enum l1d_flush_type { void setup_rfi_flush(enum l1d_flush_type, bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); void do_barrier_nospec_fixups(bool enable); +extern bool barrier_nospec_enabled; + +#ifdef CONFIG_PPC_BOOK3S_64 +void do_barrier_nospec_fixups_range(bool enable, void *start, void *end); +#else +static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { }; +#endif #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 3f7ba0f5bf29..1b3c6835e730 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -72,6 +72,12 @@ int module_finalize(const Elf_Ehdr *hdr, do_feature_fixups(powerpc_firmware_features, (void *)sect->sh_addr, (void *)sect->sh_addr + sect->sh_size); + + sect = find_section(hdr, sechdrs, "__spec_barrier_fixup"); + if (sect != NULL) + do_barrier_nospec_fixups_range(barrier_nospec_enabled, + (void *)sect->sh_addr, + (void *)sect->sh_addr + sect->sh_size); #endif sect = find_section(hdr, sechdrs, "__lwsync_fixup"); diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 4f18a447ab70..4eb9d2b252e3 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -15,7 +15,7 @@ unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; -static bool barrier_nospec_enabled; +bool barrier_nospec_enabled; static void enable_barrier_nospec(bool enable) { diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 65b4e8276bdd..d78421174ab6 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -278,14 +278,14 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) : "unknown"); } -void do_barrier_nospec_fixups(bool enable) +void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) { unsigned int instr, *dest; long *start, *end; int i; - start = PTRRELOC(&__start___barrier_nospec_fixup), - end = PTRRELOC(&__stop___barrier_nospec_fixup); + start = fixup_start; + end = fixup_end; instr = 0x60000000; /* nop */ @@ -304,6 +304,16 @@ void do_barrier_nospec_fixups(bool enable) printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } +void do_barrier_nospec_fixups(bool enable) +{ + void *start, *end; + + start = PTRRELOC(&__start___barrier_nospec_fixup), + end = PTRRELOC(&__stop___barrier_nospec_fixup); + + do_barrier_nospec_fixups_range(enable, start, end); +} + #endif /* CONFIG_PPC_BOOK3S_64 */ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) From ae082ed4702e31ed833b7b40b1afe4ce11adb6b4 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 29 Mar 2019 22:25:51 +1100 Subject: [PATCH 2526/2608] powerpc/64s: Enable barrier_nospec based on firmware settings commit cb3d6759a93c6d0aea1c10deb6d00e111c29c19c upstream. Check what firmware told us and enable/disable the barrier_nospec as appropriate. We err on the side of enabling the barrier, as it's no-op on older systems, see the comment for more detail. Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/setup.h | 1 + arch/powerpc/kernel/security.c | 59 ++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/setup.c | 1 + arch/powerpc/platforms/pseries/setup.c | 1 + 4 files changed, 62 insertions(+) diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 8f3e5f6de0dd..cd436d208b40 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -52,6 +52,7 @@ enum l1d_flush_type { void setup_rfi_flush(enum l1d_flush_type, bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); +void setup_barrier_nospec(void); void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 4eb9d2b252e3..7553951b500a 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -23,6 +23,65 @@ static void enable_barrier_nospec(bool enable) do_barrier_nospec_fixups(enable); } +void setup_barrier_nospec(void) +{ + bool enable; + + /* + * It would make sense to check SEC_FTR_SPEC_BAR_ORI31 below as well. + * But there's a good reason not to. The two flags we check below are + * both are enabled by default in the kernel, so if the hcall is not + * functional they will be enabled. + * On a system where the host firmware has been updated (so the ori + * functions as a barrier), but on which the hypervisor (KVM/Qemu) has + * not been updated, we would like to enable the barrier. Dropping the + * check for SEC_FTR_SPEC_BAR_ORI31 achieves that. The only downside is + * we potentially enable the barrier on systems where the host firmware + * is not updated, but that's harmless as it's a no-op. + */ + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR); + + enable_barrier_nospec(enable); +} + +#ifdef CONFIG_DEBUG_FS +static int barrier_nospec_set(void *data, u64 val) +{ + switch (val) { + case 0: + case 1: + break; + default: + return -EINVAL; + } + + if (!!val == !!barrier_nospec_enabled) + return 0; + + enable_barrier_nospec(!!val); + + return 0; +} + +static int barrier_nospec_get(void *data, u64 *val) +{ + *val = barrier_nospec_enabled ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_barrier_nospec, + barrier_nospec_get, barrier_nospec_set, "%llu\n"); + +static __init int barrier_nospec_debugfs_init(void) +{ + debugfs_create_file("barrier_nospec", 0600, powerpc_debugfs_root, NULL, + &fops_barrier_nospec); + return 0; +} +device_initcall(barrier_nospec_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { bool thread_priv; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index fd143c934768..e6f8505a3818 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -123,6 +123,7 @@ static void pnv_setup_rfi_flush(void) security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); setup_rfi_flush(type, enable); + setup_barrier_nospec(); } static void __init pnv_setup_arch(void) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 45f814041448..ac12ec4d839d 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -534,6 +534,7 @@ void pseries_setup_rfi_flush(void) security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR); setup_rfi_flush(types, enable); + setup_barrier_nospec(); } static void __init pSeries_setup_arch(void) From 3692ca6ff3ee470951c91c788447ed38e6f8dd7c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 29 Mar 2019 22:25:52 +1100 Subject: [PATCH 2527/2608] powerpc: Use barrier_nospec in copy_from_user() commit ddf35cf3764b5a182b178105f57515b42e2634f8 upstream. Based on the x86 commit doing the same. See commit 304ec1b05031 ("x86/uaccess: Use __uaccess_begin_nospec() and uaccess_try_nospec") and b3bbfb3fb5d2 ("x86: Introduce __uaccess_begin_nospec() and uaccess_try_nospec") for more detail. In all cases we are ordering the load from the potentially user-controlled pointer vs a previous branch based on an access_ok() check or similar. Base on a patch from Michal Suchanek. Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/uaccess.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index cf26e62b268d..bd6d0fb5be9f 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -238,6 +238,7 @@ do { \ __chk_user_ptr(ptr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ + barrier_nospec(); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__typeof__(*(ptr)))__gu_val; \ __gu_err; \ @@ -249,8 +250,10 @@ do { \ __long_type(*(ptr)) __gu_val = 0; \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ might_fault(); \ - if (access_ok(VERIFY_READ, __gu_addr, (size))) \ + if (access_ok(VERIFY_READ, __gu_addr, (size))) { \ + barrier_nospec(); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + } \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) @@ -261,6 +264,7 @@ do { \ __long_type(*(ptr)) __gu_val; \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ + barrier_nospec(); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ __gu_err; \ @@ -288,15 +292,19 @@ static inline unsigned long raw_copy_from_user(void *to, switch (n) { case 1: + barrier_nospec(); __get_user_size(*(u8 *)to, from, 1, ret); break; case 2: + barrier_nospec(); __get_user_size(*(u16 *)to, from, 2, ret); break; case 4: + barrier_nospec(); __get_user_size(*(u32 *)to, from, 4, ret); break; case 8: + barrier_nospec(); __get_user_size(*(u64 *)to, from, 8, ret); break; } @@ -304,6 +312,7 @@ static inline unsigned long raw_copy_from_user(void *to, return 0; } + barrier_nospec(); return __copy_tofrom_user((__force void __user *)to, from, n); } From d4d3a4b43809a1a8bf6031dd22b7eb9b05e09c09 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 29 Mar 2019 22:25:53 +1100 Subject: [PATCH 2528/2608] powerpc/64: Use barrier_nospec in syscall entry commit 51973a815c6b46d7b23b68d6af371ad1c9d503ca upstream. Our syscall entry is done in assembly so patch in an explicit barrier_nospec. Based on a patch by Michal Suchanek. Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/entry_64.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index c194f4c8e66b..7a43b27dc6e0 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -36,6 +36,7 @@ #include #include #include +#include #include #ifdef CONFIG_PPC_BOOK3S #include @@ -179,6 +180,15 @@ system_call: /* label this so stack traces look sane */ clrldi r8,r8,32 15: slwi r0,r0,4 + + barrier_nospec_asm + /* + * Prevent the load of the handler below (based on the user-passed + * system call number) being speculatively executed until the test + * against NR_syscalls and branch to .Lsyscall_enosys above has + * committed. + */ + ldx r12,r11,r0 /* Fetch system call handler [ptr] */ mtctr r12 bctrl /* Call handler */ From 3fe64040fbb266aa998ff69ba8fb47f69f88f853 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 29 Mar 2019 22:25:54 +1100 Subject: [PATCH 2529/2608] powerpc/64s: Enhance the information in cpu_show_spectre_v1() commit a377514519b9a20fa1ea9adddbb4129573129cef upstream. We now have barrier_nospec as mitigation so print it in cpu_show_spectre_v1() when enabled. Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/security.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 7553951b500a..a8b277362931 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -120,6 +120,9 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, c if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR)) return sprintf(buf, "Not affected\n"); + if (barrier_nospec_enabled) + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + return sprintf(buf, "Vulnerable\n"); } From 5159e24439a1863b2f237bca8880ead7da608400 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 29 Mar 2019 22:25:55 +1100 Subject: [PATCH 2530/2608] powerpc64s: Show ori31 availability in spectre_v1 sysfs file not v2 commit 6d44acae1937b81cf8115ada8958e04f601f3f2e upstream. When I added the spectre_v2 information in sysfs, I included the availability of the ori31 speculation barrier. Although the ori31 barrier can be used to mitigate v2, it's primarily intended as a spectre v1 mitigation. Spectre v2 is mitigated by hardware changes. So rework the sysfs files to show the ori31 information in the spectre_v1 file, rather than v2. Currently we display eg: $ grep . spectre_v* spectre_v1:Mitigation: __user pointer sanitization spectre_v2:Mitigation: Indirect branch cache disabled, ori31 speculation barrier enabled After: $ grep . spectre_v* spectre_v1:Mitigation: __user pointer sanitization, ori31 speculation barrier enabled spectre_v2:Mitigation: Indirect branch cache disabled Fixes: d6fbe1c55c55 ("powerpc/64s: Wire up cpu_show_spectre_v2()") Cc: stable@vger.kernel.org # v4.17+ Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/security.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index a8b277362931..4cb8f1f7b593 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -117,25 +117,35 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { - if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR)) - return sprintf(buf, "Not affected\n"); + struct seq_buf s; - if (barrier_nospec_enabled) - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + seq_buf_init(&s, buf, PAGE_SIZE - 1); - return sprintf(buf, "Vulnerable\n"); + if (security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR)) { + if (barrier_nospec_enabled) + seq_buf_printf(&s, "Mitigation: __user pointer sanitization"); + else + seq_buf_printf(&s, "Vulnerable"); + + if (security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31)) + seq_buf_printf(&s, ", ori31 speculation barrier enabled"); + + seq_buf_printf(&s, "\n"); + } else + seq_buf_printf(&s, "Not affected\n"); + + return s.len; } ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { - bool bcs, ccd, ori; struct seq_buf s; + bool bcs, ccd; seq_buf_init(&s, buf, PAGE_SIZE - 1); bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED); ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED); - ori = security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31); if (bcs || ccd) { seq_buf_printf(&s, "Mitigation: "); @@ -151,9 +161,6 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c } else seq_buf_printf(&s, "Vulnerable"); - if (ori) - seq_buf_printf(&s, ", ori31 speculation barrier enabled"); - seq_buf_printf(&s, "\n"); return s.len; From 64ca6cd4ae6dad71db5b3df06237a3bd44b747b9 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 29 Mar 2019 22:25:56 +1100 Subject: [PATCH 2531/2608] powerpc/64: Disable the speculation barrier from the command line commit cf175dc315f90185128fb061dc05b6fbb211aa2f upstream. The speculation barrier can be disabled from the command line with the parameter: "nospectre_v1". Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/security.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 4cb8f1f7b593..79f9397998ed 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -16,6 +16,7 @@ unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; bool barrier_nospec_enabled; +static bool no_nospec; static void enable_barrier_nospec(bool enable) { @@ -42,9 +43,18 @@ void setup_barrier_nospec(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR); - enable_barrier_nospec(enable); + if (!no_nospec) + enable_barrier_nospec(enable); } +static int __init handle_nospectre_v1(char *p) +{ + no_nospec = true; + + return 0; +} +early_param("nospectre_v1", handle_nospectre_v1); + #ifdef CONFIG_DEBUG_FS static int barrier_nospec_set(void *data, u64 val) { From 5a56eba71a139f1ded1707cd102017c5f2ad83b1 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 29 Mar 2019 22:25:57 +1100 Subject: [PATCH 2532/2608] powerpc/64: Make stf barrier PPC_BOOK3S_64 specific. commit 6453b532f2c8856a80381e6b9a1f5ea2f12294df upstream. NXP Book3E platforms are not vulnerable to speculative store bypass, so make the mitigations PPC_BOOK3S_64 specific. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/security.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 79f9397998ed..8ee1ade845c6 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -176,6 +176,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c return s.len; } +#ifdef CONFIG_PPC_BOOK3S_64 /* * Store-forwarding barrier support. */ @@ -323,3 +324,4 @@ static __init int stf_barrier_debugfs_init(void) } device_initcall(stf_barrier_debugfs_init); #endif /* CONFIG_DEBUG_FS */ +#endif /* CONFIG_PPC_BOOK3S_64 */ From 56fe5a966a902b72e36a3b9991f07ed9c04f67fe Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 29 Mar 2019 22:25:58 +1100 Subject: [PATCH 2533/2608] powerpc/64: Add CONFIG_PPC_BARRIER_NOSPEC commit 179ab1cbf883575c3a585bcfc0f2160f1d22a149 upstream. Add a config symbol to encode which platforms support the barrier_nospec speculation barrier. Currently this is just Book3S 64 but we will add Book3E in a future patch. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Kconfig | 7 ++++++- arch/powerpc/include/asm/barrier.h | 6 +++--- arch/powerpc/include/asm/setup.h | 2 +- arch/powerpc/kernel/Makefile | 3 ++- arch/powerpc/kernel/module.c | 4 +++- arch/powerpc/kernel/vmlinux.lds.S | 4 +++- arch/powerpc/lib/feature-fixups.c | 6 ++++-- 7 files changed, 22 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index fe418226df7f..d4eb36bb1f7c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -164,7 +164,7 @@ config PPC select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE - select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64 + select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_SMP_IDLE_THREAD @@ -236,6 +236,11 @@ config PPC # Please keep this list sorted alphabetically. # +config PPC_BARRIER_NOSPEC + bool + default y + depends on PPC_BOOK3S_64 + config GENERIC_CSUM def_bool n diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index f67b3f6e36be..ec43375463ba 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -76,7 +76,7 @@ do { \ ___p1; \ }) -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_BARRIER_NOSPEC /* * Prevent execution of subsequent instructions until preceding branches have * been fully resolved and are no longer executing speculatively. @@ -86,10 +86,10 @@ do { \ // This also acts as a compiler barrier due to the memory clobber. #define barrier_nospec() asm (stringify_in_c(barrier_nospec_asm) ::: "memory") -#else /* !CONFIG_PPC_BOOK3S_64 */ +#else /* !CONFIG_PPC_BARRIER_NOSPEC */ #define barrier_nospec_asm #define barrier_nospec() -#endif +#endif /* CONFIG_PPC_BARRIER_NOSPEC */ #include diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index cd436d208b40..1f06bfaac7cc 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -56,7 +56,7 @@ void setup_barrier_nospec(void); void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_BARRIER_NOSPEC void do_barrier_nospec_fixups_range(bool enable, void *start, void *end); #else static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { }; diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index a1089c9a9aa5..142b08d40642 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -45,9 +45,10 @@ obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o -obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o security.o +obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o +obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 1b3c6835e730..77371c9ef3d8 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -72,13 +72,15 @@ int module_finalize(const Elf_Ehdr *hdr, do_feature_fixups(powerpc_firmware_features, (void *)sect->sh_addr, (void *)sect->sh_addr + sect->sh_size); +#endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_BARRIER_NOSPEC sect = find_section(hdr, sechdrs, "__spec_barrier_fixup"); if (sect != NULL) do_barrier_nospec_fixups_range(barrier_nospec_enabled, (void *)sect->sh_addr, (void *)sect->sh_addr + sect->sh_size); -#endif +#endif /* CONFIG_PPC_BARRIER_NOSPEC */ sect = find_section(hdr, sechdrs, "__lwsync_fixup"); if (sect != NULL) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 43960d69bec9..7a178dc3f19c 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -153,14 +153,16 @@ SECTIONS *(__rfi_flush_fixup) __stop___rfi_flush_fixup = .; } +#endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_BARRIER_NOSPEC . = ALIGN(8); __spec_barrier_fixup : AT(ADDR(__spec_barrier_fixup) - LOAD_OFFSET) { __start___barrier_nospec_fixup = .; *(__barrier_nospec_fixup) __stop___barrier_nospec_fixup = .; } -#endif +#endif /* CONFIG_PPC_BARRIER_NOSPEC */ EXCEPTION_TABLE(0) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index d78421174ab6..4f6acfc87010 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -304,6 +304,9 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } +#endif /* CONFIG_PPC_BOOK3S_64 */ + +#ifdef CONFIG_PPC_BARRIER_NOSPEC void do_barrier_nospec_fixups(bool enable) { void *start, *end; @@ -313,8 +316,7 @@ void do_barrier_nospec_fixups(bool enable) do_barrier_nospec_fixups_range(enable, start, end); } - -#endif /* CONFIG_PPC_BOOK3S_64 */ +#endif /* CONFIG_PPC_BARRIER_NOSPEC */ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { From ef245f3d4494224da209adc91a9077d0467b85cd Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 29 Mar 2019 22:25:59 +1100 Subject: [PATCH 2534/2608] powerpc/64: Call setup_barrier_nospec() from setup_arch() commit af375eefbfb27cbb5b831984e66d724a40d26b5c upstream. Currently we require platform code to call setup_barrier_nospec(). But if we add an empty definition for the !CONFIG_PPC_BARRIER_NOSPEC case then we can call it in setup_arch(). Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/setup.h | 4 ++++ arch/powerpc/kernel/setup-common.c | 2 ++ arch/powerpc/platforms/powernv/setup.c | 1 - arch/powerpc/platforms/pseries/setup.c | 1 - 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 1f06bfaac7cc..102b778c8496 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -52,7 +52,11 @@ enum l1d_flush_type { void setup_rfi_flush(enum l1d_flush_type, bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); +#ifdef CONFIG_PPC_BARRIER_NOSPEC void setup_barrier_nospec(void); +#else +static inline void setup_barrier_nospec(void) { }; +#endif void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 008447664643..ab7a75b731da 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -937,6 +937,8 @@ void __init setup_arch(char **cmdline_p) if (ppc_md.setup_arch) ppc_md.setup_arch(); + setup_barrier_nospec(); + paging_init(); /* Initialize the MMU context management stuff. */ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index e6f8505a3818..fd143c934768 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -123,7 +123,6 @@ static void pnv_setup_rfi_flush(void) security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); setup_rfi_flush(type, enable); - setup_barrier_nospec(); } static void __init pnv_setup_arch(void) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index ac12ec4d839d..45f814041448 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -534,7 +534,6 @@ void pseries_setup_rfi_flush(void) security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR); setup_rfi_flush(types, enable); - setup_barrier_nospec(); } static void __init pSeries_setup_arch(void) From 0c60d583305d36c2d9a67152a8fb43095486d1ab Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 29 Mar 2019 22:26:00 +1100 Subject: [PATCH 2535/2608] powerpc/64: Make meltdown reporting Book3S 64 specific commit 406d2b6ae3420f5bb2b3db6986dc6f0b6dbb637b upstream. In a subsequent patch we will enable building security.c for Book3E. However the NXP platforms are not vulnerable to Meltdown, so make the Meltdown vulnerability reporting PPC_BOOK3S_64 specific. Signed-off-by: Diana Craciun [mpe: Split out of larger patch] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/security.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 8ee1ade845c6..206488603b66 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -92,6 +92,7 @@ static __init int barrier_nospec_debugfs_init(void) device_initcall(barrier_nospec_debugfs_init); #endif /* CONFIG_DEBUG_FS */ +#ifdef CONFIG_PPC_BOOK3S_64 ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { bool thread_priv; @@ -124,6 +125,7 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha return sprintf(buf, "Vulnerable\n"); } +#endif ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { From fd4856a96b87da46a862d6205ddad6d4c89e0fdd Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 29 Mar 2019 22:26:01 +1100 Subject: [PATCH 2536/2608] powerpc/fsl: Add barrier_nospec implementation for NXP PowerPC Book3E commit ebcd1bfc33c7a90df941df68a6e5d4018c022fba upstream. Implement the barrier_nospec as a isync;sync instruction sequence. The implementation uses the infrastructure built for BOOK3S 64. Signed-off-by: Diana Craciun [mpe: Split out of larger patch] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/barrier.h | 8 +++++++- arch/powerpc/lib/feature-fixups.c | 31 ++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d4eb36bb1f7c..de3b07c7be30 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -239,7 +239,7 @@ config PPC config PPC_BARRIER_NOSPEC bool default y - depends on PPC_BOOK3S_64 + depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E config GENERIC_CSUM def_bool n diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index ec43375463ba..449474f667c4 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -76,12 +76,18 @@ do { \ ___p1; \ }) +#ifdef CONFIG_PPC_BOOK3S_64 +#define NOSPEC_BARRIER_SLOT nop +#elif defined(CONFIG_PPC_FSL_BOOK3E) +#define NOSPEC_BARRIER_SLOT nop; nop +#endif + #ifdef CONFIG_PPC_BARRIER_NOSPEC /* * Prevent execution of subsequent instructions until preceding branches have * been fully resolved and are no longer executing speculatively. */ -#define barrier_nospec_asm NOSPEC_BARRIER_FIXUP_SECTION; nop +#define barrier_nospec_asm NOSPEC_BARRIER_FIXUP_SECTION; NOSPEC_BARRIER_SLOT // This also acts as a compiler barrier due to the memory clobber. #define barrier_nospec() asm (stringify_in_c(barrier_nospec_asm) ::: "memory") diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 4f6acfc87010..cac17882ac61 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -318,6 +318,37 @@ void do_barrier_nospec_fixups(bool enable) } #endif /* CONFIG_PPC_BARRIER_NOSPEC */ +#ifdef CONFIG_PPC_FSL_BOOK3E +void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) +{ + unsigned int instr[2], *dest; + long *start, *end; + int i; + + start = fixup_start; + end = fixup_end; + + instr[0] = PPC_INST_NOP; + instr[1] = PPC_INST_NOP; + + if (enable) { + pr_info("barrier-nospec: using isync; sync as speculation barrier\n"); + instr[0] = PPC_INST_ISYNC; + instr[1] = PPC_INST_SYNC; + } + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + patch_instruction(dest, instr[0]); + patch_instruction(dest + 1, instr[1]); + } + + printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); +} +#endif /* CONFIG_PPC_FSL_BOOK3E */ + void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; From 0890a57258e06446e04662529e2a7ad3f65d9816 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 29 Mar 2019 22:26:02 +1100 Subject: [PATCH 2537/2608] powerpc/fsl: Sanitize the syscall table for NXP PowerPC 32 bit platforms commit c28218d4abbf4f2035495334d8bfcba64bda4787 upstream. Used barrier_nospec to sanitize the syscall table. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/entry_32.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 4ae464b9d490..a2999cd73a82 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -33,6 +33,7 @@ #include #include #include +#include /* * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE. @@ -358,6 +359,15 @@ syscall_dotrace_cont: ori r10,r10,sys_call_table@l slwi r0,r0,2 bge- 66f + + barrier_nospec_asm + /* + * Prevent the load of the handler below (based on the user-passed + * system call number) being speculatively executed until the test + * against NR_syscalls and branch to .66f above has + * committed. + */ + lwzx r10,r10,r0 /* Fetch system call handler [ptr] */ mtlr r10 addi r9,r1,STACK_FRAME_OVERHEAD From ef7c2ea84e7f3e2fa368d63208684d5b6eb2c919 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 29 Mar 2019 22:26:03 +1100 Subject: [PATCH 2538/2608] powerpc/asm: Add a patch_site macro & helpers for patching instructions commit 06d0bbc6d0f56dacac3a79900e9a9a0d5972d818 upstream. Add a macro and some helper C functions for patching single asm instructions. The gas macro means we can do something like: 1: nop patch_site 1b, patch__foo Which is less visually distracting than defining a GLOBAL symbol at 1, and also doesn't pollute the symbol table which can confuse eg. perf. These are obviously similar to our existing feature sections, but are not automatically patched based on CPU/MMU features, rather they are designed to be manually patched by C code at some arbitrary point. Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/code-patching-asm.h | 18 ++++++++++++++++++ arch/powerpc/include/asm/code-patching.h | 2 ++ arch/powerpc/lib/code-patching.c | 16 ++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 arch/powerpc/include/asm/code-patching-asm.h diff --git a/arch/powerpc/include/asm/code-patching-asm.h b/arch/powerpc/include/asm/code-patching-asm.h new file mode 100644 index 000000000000..ed7b1448493a --- /dev/null +++ b/arch/powerpc/include/asm/code-patching-asm.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright 2018, Michael Ellerman, IBM Corporation. + */ +#ifndef _ASM_POWERPC_CODE_PATCHING_ASM_H +#define _ASM_POWERPC_CODE_PATCHING_ASM_H + +/* Define a "site" that can be patched */ +.macro patch_site label name + .pushsection ".rodata" + .balign 4 + .global \name +\name: + .4byte \label - . + .popsection +.endm + +#endif /* _ASM_POWERPC_CODE_PATCHING_ASM_H */ diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 812535f40124..b2051234ada8 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -32,6 +32,8 @@ unsigned int create_cond_branch(const unsigned int *addr, int patch_branch(unsigned int *addr, unsigned long target, int flags); int patch_instruction(unsigned int *addr, unsigned int instr); int raw_patch_instruction(unsigned int *addr, unsigned int instr); +int patch_instruction_site(s32 *addr, unsigned int instr); +int patch_branch_site(s32 *site, unsigned long target, int flags); int instr_is_relative_branch(unsigned int instr); int instr_is_relative_link_branch(unsigned int instr); diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 130405158afa..c5154817178b 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -206,6 +206,22 @@ int patch_branch(unsigned int *addr, unsigned long target, int flags) return patch_instruction(addr, create_branch(addr, target, flags)); } +int patch_branch_site(s32 *site, unsigned long target, int flags) +{ + unsigned int *addr; + + addr = (unsigned int *)((unsigned long)site + *site); + return patch_instruction(addr, create_branch(addr, target, flags)); +} + +int patch_instruction_site(s32 *site, unsigned int instr) +{ + unsigned int *addr; + + addr = (unsigned int *)((unsigned long)site + *site); + return patch_instruction(addr, instr); +} + bool is_offset_in_branch_range(long offset) { /* From 3bedc7080f100fd041c639dddc0fd45c1e72b211 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 29 Mar 2019 22:26:04 +1100 Subject: [PATCH 2539/2608] powerpc/64s: Add new security feature flags for count cache flush commit dc8c6cce9a26a51fc19961accb978217a3ba8c75 upstream. Add security feature flags to indicate the need for software to flush the count cache on context switch, and for the presence of a hardware assisted count cache flush. Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/security_features.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 44989b22383c..a0d47bc18a5c 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -59,6 +59,9 @@ static inline bool security_ftr_enabled(unsigned long feature) // Indirect branch prediction cache disabled #define SEC_FTR_COUNT_CACHE_DISABLED 0x0000000000000020ull +// bcctr 2,0,0 triggers a hardware assisted count cache flush +#define SEC_FTR_BCCTR_FLUSH_ASSIST 0x0000000000000800ull + // Features indicating need for Spectre/Meltdown mitigations @@ -74,6 +77,9 @@ static inline bool security_ftr_enabled(unsigned long feature) // Firmware configuration indicates user favours security over performance #define SEC_FTR_FAVOUR_SECURITY 0x0000000000000200ull +// Software required to flush count cache on context switch +#define SEC_FTR_FLUSH_COUNT_CACHE 0x0000000000000400ull + // Features enabled by default #define SEC_FTR_DEFAULT \ From 9346855c66dc54a15febd728c2a78d72bc3454c8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 29 Mar 2019 22:26:05 +1100 Subject: [PATCH 2540/2608] powerpc/64s: Add support for software count cache flush commit ee13cb249fabdff8b90aaff61add347749280087 upstream. Some CPU revisions support a mode where the count cache needs to be flushed by software on context switch. Additionally some revisions may have a hardware accelerated flush, in which case the software flush sequence can be shortened. If we detect the appropriate flag from firmware we patch a branch into _switch() which takes us to a count cache flush sequence. That sequence in turn may be patched to return early if we detect that the CPU supports accelerating the flush sequence in hardware. Add debugfs support for reporting the state of the flush, as well as runtime disabling it. And modify the spectre_v2 sysfs file to report the state of the software flush. Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/asm-prototypes.h | 6 ++ arch/powerpc/include/asm/security_features.h | 1 + arch/powerpc/kernel/entry_64.S | 54 +++++++++++ arch/powerpc/kernel/security.c | 98 +++++++++++++++++++- 4 files changed, 154 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 7330150bfe34..ba4c75062d49 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -126,4 +126,10 @@ extern int __ucmpdi2(u64, u64); void _mcount(void); unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip); +/* Patch sites */ +extern s32 patch__call_flush_count_cache; +extern s32 patch__flush_count_cache_return; + +extern long flush_count_cache; + #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index a0d47bc18a5c..759597bf0fd8 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -22,6 +22,7 @@ enum stf_barrier_type { void setup_stf_barrier(void); void do_stf_barrier_fixups(enum stf_barrier_type types); +void setup_count_cache_flush(void); static inline void security_ftr_set(unsigned long feature) { diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 7a43b27dc6e0..e40e74e8c635 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -497,6 +498,57 @@ _GLOBAL(ret_from_kernel_thread) li r3,0 b .Lsyscall_exit +#ifdef CONFIG_PPC_BOOK3S_64 + +#define FLUSH_COUNT_CACHE \ +1: nop; \ + patch_site 1b, patch__call_flush_count_cache + + +#define BCCTR_FLUSH .long 0x4c400420 + +.macro nops number + .rept \number + nop + .endr +.endm + +.balign 32 +.global flush_count_cache +flush_count_cache: + /* Save LR into r9 */ + mflr r9 + + .rept 64 + bl .+4 + .endr + b 1f + nops 6 + + .balign 32 + /* Restore LR */ +1: mtlr r9 + li r9,0x7fff + mtctr r9 + + BCCTR_FLUSH + +2: nop + patch_site 2b patch__flush_count_cache_return + + nops 3 + + .rept 278 + .balign 32 + BCCTR_FLUSH + nops 7 + .endr + + blr +#else +#define FLUSH_COUNT_CACHE +#endif /* CONFIG_PPC_BOOK3S_64 */ + /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state @@ -528,6 +580,8 @@ _GLOBAL(_switch) std r23,_CCR(r1) std r1,KSP(r3) /* Set old stack pointer */ + FLUSH_COUNT_CACHE + /* * On SMP kernels, care must be taken because a task may be * scheduled off CPUx and on to CPUy. Memory ordering must be diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 206488603b66..554d33c7b758 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -9,12 +9,21 @@ #include #include +#include +#include #include #include unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; +enum count_cache_flush_type { + COUNT_CACHE_FLUSH_NONE = 0x1, + COUNT_CACHE_FLUSH_SW = 0x2, + COUNT_CACHE_FLUSH_HW = 0x4, +}; +static enum count_cache_flush_type count_cache_flush_type; + bool barrier_nospec_enabled; static bool no_nospec; @@ -159,17 +168,29 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED); ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED); - if (bcs || ccd) { + if (bcs || ccd || count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) { + bool comma = false; seq_buf_printf(&s, "Mitigation: "); - if (bcs) + if (bcs) { seq_buf_printf(&s, "Indirect branch serialisation (kernel only)"); + comma = true; + } - if (bcs && ccd) + if (ccd) { + if (comma) + seq_buf_printf(&s, ", "); + seq_buf_printf(&s, "Indirect branch cache disabled"); + comma = true; + } + + if (comma) seq_buf_printf(&s, ", "); - if (ccd) - seq_buf_printf(&s, "Indirect branch cache disabled"); + seq_buf_printf(&s, "Software count cache flush"); + + if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW) + seq_buf_printf(&s, "(hardware accelerated)"); } else seq_buf_printf(&s, "Vulnerable"); @@ -326,4 +347,71 @@ static __init int stf_barrier_debugfs_init(void) } device_initcall(stf_barrier_debugfs_init); #endif /* CONFIG_DEBUG_FS */ + +static void toggle_count_cache_flush(bool enable) +{ + if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { + patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP); + count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; + pr_info("count-cache-flush: software flush disabled.\n"); + return; + } + + patch_branch_site(&patch__call_flush_count_cache, + (u64)&flush_count_cache, BRANCH_SET_LINK); + + if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) { + count_cache_flush_type = COUNT_CACHE_FLUSH_SW; + pr_info("count-cache-flush: full software flush sequence enabled.\n"); + return; + } + + patch_instruction_site(&patch__flush_count_cache_return, PPC_INST_BLR); + count_cache_flush_type = COUNT_CACHE_FLUSH_HW; + pr_info("count-cache-flush: hardware assisted flush sequence enabled\n"); +} + +void setup_count_cache_flush(void) +{ + toggle_count_cache_flush(true); +} + +#ifdef CONFIG_DEBUG_FS +static int count_cache_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + toggle_count_cache_flush(enable); + + return 0; +} + +static int count_cache_flush_get(void *data, u64 *val) +{ + if (count_cache_flush_type == COUNT_CACHE_FLUSH_NONE) + *val = 0; + else + *val = 1; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get, + count_cache_flush_set, "%llu\n"); + +static __init int count_cache_flush_debugfs_init(void) +{ + debugfs_create_file("count_cache_flush", 0600, powerpc_debugfs_root, + NULL, &fops_count_cache_flush); + return 0; +} +device_initcall(count_cache_flush_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ #endif /* CONFIG_PPC_BOOK3S_64 */ From 05cc3eb64e1532606ae632f527b1e5821f1b9305 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 29 Mar 2019 22:26:06 +1100 Subject: [PATCH 2541/2608] powerpc/pseries: Query hypervisor for count cache flush settings commit ba72dc171954b782a79d25e0f4b3ed91090c3b1e upstream. Use the existing hypercall to determine the appropriate settings for the count cache flush, and then call the generic powerpc code to set it up based on the security feature flags. Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/hvcall.h | 2 ++ arch/powerpc/platforms/pseries/setup.c | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 5a740feb7bd7..15cef59092c7 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -340,10 +340,12 @@ #define H_CPU_CHAR_BRANCH_HINTS_HONORED (1ull << 58) // IBM bit 5 #define H_CPU_CHAR_THREAD_RECONFIG_CTRL (1ull << 57) // IBM bit 6 #define H_CPU_CHAR_COUNT_CACHE_DISABLED (1ull << 56) // IBM bit 7 +#define H_CPU_CHAR_BCCTR_FLUSH_ASSIST (1ull << 54) // IBM bit 9 #define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 #define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 +#define H_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) // IBM bit 5 /* Flag values used in H_REGISTER_PROC_TBL hcall */ #define PROC_TABLE_OP_MASK 0x18 diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 45f814041448..6a0ad56e89b9 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -484,6 +484,12 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED) security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); + if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) + security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST); + + if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE) + security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE); + /* * The features below are enabled by default, so we instead look to see * if firmware has *disabled* them, and clear them if so. @@ -534,6 +540,7 @@ void pseries_setup_rfi_flush(void) security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR); setup_rfi_flush(types, enable); + setup_count_cache_flush(); } static void __init pSeries_setup_arch(void) From 5d758cf42903f218205902993bde630fb9dc8103 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 29 Mar 2019 22:26:07 +1100 Subject: [PATCH 2542/2608] powerpc/powernv: Query firmware for count cache flush settings commit 99d54754d3d5f896a8f616b0b6520662bc99d66b upstream. Look for fw-features properties to determine the appropriate settings for the count cache flush, and then call the generic powerpc code to set it up based on the security feature flags. Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/setup.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index fd143c934768..888aa9584e94 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -77,6 +77,12 @@ static void init_fw_feat_flags(struct device_node *np) if (fw_feature_is("enabled", "fw-count-cache-disabled", np)) security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); + if (fw_feature_is("enabled", "fw-count-cache-flush-bcctr2,0,0", np)) + security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST); + + if (fw_feature_is("enabled", "needs-count-cache-flush-on-context-switch", np)) + security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE); + /* * The features below are enabled by default, so we instead look to see * if firmware has *disabled* them, and clear them if so. @@ -123,6 +129,7 @@ static void pnv_setup_rfi_flush(void) security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); setup_rfi_flush(type, enable); + setup_count_cache_flush(); } static void __init pnv_setup_arch(void) From 555891364134518e8992ef36de733c71dcace363 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 29 Mar 2019 22:26:08 +1100 Subject: [PATCH 2543/2608] powerpc/fsl: Add infrastructure to fixup branch predictor flush commit 76a5eaa38b15dda92cd6964248c39b5a6f3a4e9d upstream. In order to protect against speculation attacks (Spectre variant 2) on NXP PowerPC platforms, the branch predictor should be flushed when the privillege level is changed. This patch is adding the infrastructure to fixup at runtime the code sections that are performing the branch predictor flush depending on a boot arg parameter which is added later in a separate patch. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/feature-fixups.h | 12 ++++++++++++ arch/powerpc/include/asm/setup.h | 2 ++ arch/powerpc/kernel/vmlinux.lds.S | 8 ++++++++ arch/powerpc/lib/feature-fixups.c | 23 +++++++++++++++++++++++ 4 files changed, 45 insertions(+) diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index fcfd05672b1b..b1d478acbaec 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -219,6 +219,17 @@ label##3: \ FTR_ENTRY_OFFSET 953b-954b; \ .popsection; +#define START_BTB_FLUSH_SECTION \ +955: \ + +#define END_BTB_FLUSH_SECTION \ +956: \ + .pushsection __btb_flush_fixup,"a"; \ + .align 2; \ +957: \ + FTR_ENTRY_OFFSET 955b-957b; \ + FTR_ENTRY_OFFSET 956b-957b; \ + .popsection; #ifndef __ASSEMBLY__ #include @@ -228,6 +239,7 @@ extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; +extern long __start__btb_flush_fixup, __stop__btb_flush_fixup; void apply_feature_fixups(void); void setup_feature_keys(void); diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 102b778c8496..59072bc50fbc 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -66,6 +66,8 @@ void do_barrier_nospec_fixups_range(bool enable, void *start, void *end); static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { }; #endif +void do_btb_flush_fixups(void); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7a178dc3f19c..b0cf4af7ba84 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -164,6 +164,14 @@ SECTIONS } #endif /* CONFIG_PPC_BARRIER_NOSPEC */ +#ifdef CONFIG_PPC_FSL_BOOK3E + . = ALIGN(8); + __spec_btb_flush_fixup : AT(ADDR(__spec_btb_flush_fixup) - LOAD_OFFSET) { + __start__btb_flush_fixup = .; + *(__btb_flush_fixup) + __stop__btb_flush_fixup = .; + } +#endif EXCEPTION_TABLE(0) NOTES :kernel :notes diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index cac17882ac61..de7861e09b41 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -347,6 +347,29 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } + +static void patch_btb_flush_section(long *curr) +{ + unsigned int *start, *end; + + start = (void *)curr + *curr; + end = (void *)curr + *(curr + 1); + for (; start < end; start++) { + pr_devel("patching dest %lx\n", (unsigned long)start); + patch_instruction(start, PPC_INST_NOP); + } +} + +void do_btb_flush_fixups(void) +{ + long *start, *end; + + start = PTRRELOC(&__start__btb_flush_fixup); + end = PTRRELOC(&__stop__btb_flush_fixup); + + for (; start < end; start += 2) + patch_btb_flush_section(start); +} #endif /* CONFIG_PPC_FSL_BOOK3E */ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) From f7a1d9fabc254ec0c8313b925b4735deccaeeb35 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 29 Mar 2019 22:26:09 +1100 Subject: [PATCH 2544/2608] powerpc/fsl: Add macro to flush the branch predictor commit 1cbf8990d79ff69da8ad09e8a3df014e1494462b upstream. The BUCSR register can be used to invalidate the entries in the branch prediction mechanisms. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/ppc_asm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 36f3e41c9fbe..3e1b8de72776 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -802,4 +802,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) stringify_in_c(.long (_target) - . ;) \ stringify_in_c(.previous) +#ifdef CONFIG_PPC_FSL_BOOK3E +#define BTB_FLUSH(reg) \ + lis reg,BUCSR_INIT@h; \ + ori reg,reg,BUCSR_INIT@l; \ + mtspr SPRN_BUCSR,reg; \ + isync; +#else +#define BTB_FLUSH(reg) +#endif /* CONFIG_PPC_FSL_BOOK3E */ + #endif /* _ASM_POWERPC_PPC_ASM_H */ From 191adcc50e1155331b230d0120100b211ea2c032 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 29 Mar 2019 22:26:10 +1100 Subject: [PATCH 2545/2608] powerpc/fsl: Fix spectre_v2 mitigations reporting commit 7d8bad99ba5a22892f0cad6881289fdc3875a930 upstream. Currently for CONFIG_PPC_FSL_BOOK3E the spectre_v2 file is incorrect: $ cat /sys/devices/system/cpu/vulnerabilities/spectre_v2 "Mitigation: Software count cache flush" Which is wrong. Fix it to report vulnerable for now. Fixes: ee13cb249fab ("powerpc/64s: Add support for software count cache flush") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/security.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 554d33c7b758..f9bdd37ddc8c 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -22,7 +22,7 @@ enum count_cache_flush_type { COUNT_CACHE_FLUSH_SW = 0x2, COUNT_CACHE_FLUSH_HW = 0x4, }; -static enum count_cache_flush_type count_cache_flush_type; +static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; bool barrier_nospec_enabled; static bool no_nospec; From d09c53d3bacbcd79f3276285bde70854799d1f2f Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 29 Mar 2019 22:26:11 +1100 Subject: [PATCH 2546/2608] powerpc/fsl: Emulate SPRN_BUCSR register commit 98518c4d8728656db349f875fcbbc7c126d4c973 upstream. In order to flush the branch predictor the guest kernel performs writes to the BUCSR register which is hypervisor privilleged. However, the branch predictor is flushed at each KVM entry, so the branch predictor has been already flushed, so just return as soon as possible to guest. Signed-off-by: Diana Craciun [mpe: Tweak comment formatting] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/e500_emulate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 990db69a1d0b..fa88f641ac03 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -277,6 +277,13 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va vcpu->arch.pwrmgtcr0 = spr_val; break; + case SPRN_BUCSR: + /* + * If we are here, it means that we have already flushed the + * branch predictor, so just return to guest. + */ + break; + /* extra exceptions */ #ifdef CONFIG_SPE_POSSIBLE case SPRN_IVOR32: From 36f75b15da9da072688711be8296e57e4523150c Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 29 Mar 2019 22:26:12 +1100 Subject: [PATCH 2547/2608] powerpc/fsl: Add nospectre_v2 command line argument commit f633a8ad636efb5d4bba1a047d4a0f1ef719aa06 upstream. When the command line argument is present, the Spectre variant 2 mitigations are disabled. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/setup.h | 5 +++++ arch/powerpc/kernel/security.c | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 59072bc50fbc..5ceab440ecb9 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -66,6 +66,11 @@ void do_barrier_nospec_fixups_range(bool enable, void *start, void *end); static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { }; #endif +#ifdef CONFIG_PPC_FSL_BOOK3E +void setup_spectre_v2(void); +#else +static inline void setup_spectre_v2(void) {}; +#endif void do_btb_flush_fixups(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index f9bdd37ddc8c..7a611a187b53 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -26,6 +26,10 @@ static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NO bool barrier_nospec_enabled; static bool no_nospec; +static bool btb_flush_enabled; +#ifdef CONFIG_PPC_FSL_BOOK3E +static bool no_spectrev2; +#endif static void enable_barrier_nospec(bool enable) { @@ -101,6 +105,23 @@ static __init int barrier_nospec_debugfs_init(void) device_initcall(barrier_nospec_debugfs_init); #endif /* CONFIG_DEBUG_FS */ +#ifdef CONFIG_PPC_FSL_BOOK3E +static int __init handle_nospectre_v2(char *p) +{ + no_spectrev2 = true; + + return 0; +} +early_param("nospectre_v2", handle_nospectre_v2); +void setup_spectre_v2(void) +{ + if (no_spectrev2) + do_btb_flush_fixups(); + else + btb_flush_enabled = true; +} +#endif /* CONFIG_PPC_FSL_BOOK3E */ + #ifdef CONFIG_PPC_BOOK3S_64 ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { From a6942e23076de4d5f81e9c396de841d494c83399 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 29 Mar 2019 22:26:13 +1100 Subject: [PATCH 2548/2608] powerpc/fsl: Flush the branch predictor at each kernel entry (64bit) commit 10c5e83afd4a3f01712d97d3bb1ae34d5b74a185 upstream. In order to protect against speculation attacks on indirect branches, the branch predictor is flushed at kernel entry to protect for the following situations: - userspace process attacking another userspace process - userspace process attacking the kernel Basically when the privillege level change (i.e. the kernel is entered), the branch predictor state is flushed. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/entry_64.S | 5 +++++ arch/powerpc/kernel/exceptions-64e.S | 26 +++++++++++++++++++++++++- arch/powerpc/mm/tlb_low_64e.S | 7 +++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index e40e74e8c635..12395895b9aa 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -78,6 +78,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) std r0,GPR0(r1) std r10,GPR1(r1) beq 2f /* if from kernel mode */ +#ifdef CONFIG_PPC_FSL_BOOK3E +START_BTB_FLUSH_SECTION + BTB_FLUSH(r10) +END_BTB_FLUSH_SECTION +#endif ACCOUNT_CPU_USER_ENTRY(r13, r10, r11) 2: std r2,GPR2(r1) std r3,GPR3(r1) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index acd8ca76233e..2acd18a903e9 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -295,7 +295,8 @@ ret_from_mc_except: andi. r10,r11,MSR_PR; /* save stack pointer */ \ beq 1f; /* branch around if supervisor */ \ ld r1,PACAKSAVE(r13); /* get kernel stack coming from usr */\ -1: cmpdi cr1,r1,0; /* check if SP makes sense */ \ +1: type##_BTB_FLUSH \ + cmpdi cr1,r1,0; /* check if SP makes sense */ \ bge- cr1,exc_##n##_bad_stack;/* bad stack (TODO: out of line) */ \ mfspr r10,SPRN_##type##_SRR0; /* read SRR0 before touching stack */ @@ -327,6 +328,29 @@ ret_from_mc_except: #define SPRN_MC_SRR0 SPRN_MCSRR0 #define SPRN_MC_SRR1 SPRN_MCSRR1 +#ifdef CONFIG_PPC_FSL_BOOK3E +#define GEN_BTB_FLUSH \ + START_BTB_FLUSH_SECTION \ + beq 1f; \ + BTB_FLUSH(r10) \ + 1: \ + END_BTB_FLUSH_SECTION + +#define CRIT_BTB_FLUSH \ + START_BTB_FLUSH_SECTION \ + BTB_FLUSH(r10) \ + END_BTB_FLUSH_SECTION + +#define DBG_BTB_FLUSH CRIT_BTB_FLUSH +#define MC_BTB_FLUSH CRIT_BTB_FLUSH +#define GDBELL_BTB_FLUSH GEN_BTB_FLUSH +#else +#define GEN_BTB_FLUSH +#define CRIT_BTB_FLUSH +#define DBG_BTB_FLUSH +#define GDBELL_BTB_FLUSH +#endif + #define NORMAL_EXCEPTION_PROLOG(n, intnum, addition) \ EXCEPTION_PROLOG(n, intnum, GEN, addition##_GEN(n)) diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index eb82d787d99a..b7e9c09dfe19 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -69,6 +69,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) std r15,EX_TLB_R15(r12) std r10,EX_TLB_CR(r12) #ifdef CONFIG_PPC_FSL_BOOK3E +START_BTB_FLUSH_SECTION + mfspr r11, SPRN_SRR1 + andi. r10,r11,MSR_PR + beq 1f + BTB_FLUSH(r10) +1: +END_BTB_FLUSH_SECTION std r7,EX_TLB_R7(r12) #endif TLB_MISS_PROLOG_STATS From d5b60fb984dd9e9974f9adb949cbc2ac2d4ac6e4 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 29 Mar 2019 22:26:14 +1100 Subject: [PATCH 2549/2608] powerpc/fsl: Flush the branch predictor at each kernel entry (32 bit) commit 7fef436295bf6c05effe682c8797dfcb0deb112a upstream. In order to protect against speculation attacks on indirect branches, the branch predictor is flushed at kernel entry to protect for the following situations: - userspace process attacking another userspace process - userspace process attacking the kernel Basically when the privillege level change (i.e.the kernel is entered), the branch predictor state is flushed. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/head_booke.h | 6 ++++++ arch/powerpc/kernel/head_fsl_booke.S | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index d0862a100d29..15ac51072eb3 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -43,6 +43,9 @@ andi. r11, r11, MSR_PR; /* check whether user or kernel */\ mr r11, r1; \ beq 1f; \ +START_BTB_FLUSH_SECTION \ + BTB_FLUSH(r11) \ +END_BTB_FLUSH_SECTION \ /* if from user, start at top of this thread's kernel stack */ \ lwz r11, THREAD_INFO-THREAD(r10); \ ALLOC_STACK_FRAME(r11, THREAD_SIZE); \ @@ -128,6 +131,9 @@ stw r9,_CCR(r8); /* save CR on stack */\ mfspr r11,exc_level_srr1; /* check whether user or kernel */\ DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \ +START_BTB_FLUSH_SECTION \ + BTB_FLUSH(r10) \ +END_BTB_FLUSH_SECTION \ andi. r11,r11,MSR_PR; \ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index bf4c6021515f..60a0aeefc4a7 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -452,6 +452,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mfcr r13 stw r13, THREAD_NORMSAVE(3)(r10) DO_KVM BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1 +START_BTB_FLUSH_SECTION + mfspr r11, SPRN_SRR1 + andi. r10,r11,MSR_PR + beq 1f + BTB_FLUSH(r10) +1: +END_BTB_FLUSH_SECTION mfspr r10, SPRN_DEAR /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -546,6 +553,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mfcr r13 stw r13, THREAD_NORMSAVE(3)(r10) DO_KVM BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1 +START_BTB_FLUSH_SECTION + mfspr r11, SPRN_SRR1 + andi. r10,r11,MSR_PR + beq 1f + BTB_FLUSH(r10) +1: +END_BTB_FLUSH_SECTION + mfspr r10, SPRN_SRR0 /* Get faulting address */ /* If we are faulting a kernel address, we have to use the From 51a498016367f57118a905f209410f3f13f59862 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 29 Mar 2019 22:26:15 +1100 Subject: [PATCH 2550/2608] powerpc/fsl: Flush branch predictor when entering KVM commit e7aa61f47b23afbec41031bc47ca8d6cb6516abc upstream. Switching from the guest to host is another place where the speculative accesses can be exploited. Flush the branch predictor when entering KVM. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/bookehv_interrupts.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 81bd8a07aa51..612b7f6a887f 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -75,6 +75,10 @@ PPC_LL r1, VCPU_HOST_STACK(r4) PPC_LL r2, HOST_R2(r1) +START_BTB_FLUSH_SECTION + BTB_FLUSH(r10) +END_BTB_FLUSH_SECTION + mfspr r10, SPRN_PID lwz r8, VCPU_HOST_PID(r4) PPC_LL r11, VCPU_SHARED(r4) From feafb29fe0ce9f090a3f7a202d7fb09a15308059 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 29 Mar 2019 22:26:16 +1100 Subject: [PATCH 2551/2608] powerpc/fsl: Enable runtime patching if nospectre_v2 boot arg is used commit 3bc8ea8603ae4c1e09aca8de229ad38b8091fcb3 upstream. If the user choses not to use the mitigations, replace the code sequence with nops. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/setup-common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index ab7a75b731da..c58364c74dad 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -938,6 +938,7 @@ void __init setup_arch(char **cmdline_p) ppc_md.setup_arch(); setup_barrier_nospec(); + setup_spectre_v2(); paging_init(); From 9c8a8cef5b369c0323f282c832a8c688288b7eec Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 29 Mar 2019 22:26:17 +1100 Subject: [PATCH 2552/2608] powerpc/fsl: Update Spectre v2 reporting commit dfa88658fb0583abb92e062c7a9cd5a5b94f2a46 upstream. Report branch predictor state flush as a mitigation for Spectre variant 2. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/security.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 7a611a187b53..720a7a912d0d 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -212,8 +212,11 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW) seq_buf_printf(&s, "(hardware accelerated)"); - } else + } else if (btb_flush_enabled) { + seq_buf_printf(&s, "Mitigation: Branch predictor state flush"); + } else { seq_buf_printf(&s, "Vulnerable"); + } seq_buf_printf(&s, "\n"); From 64a0f413dbb58b75a91a873209dbdff0047bc1ce Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 29 Mar 2019 22:26:18 +1100 Subject: [PATCH 2553/2608] powerpc/fsl: Fixed warning: orphan section `__btb_flush_fixup' commit 039daac5526932ec731e4499613018d263af8b3e upstream. Fixed the following build warning: powerpc-linux-gnu-ld: warning: orphan section `__btb_flush_fixup' from `arch/powerpc/kernel/head_44x.o' being placed in section `__btb_flush_fixup'. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/head_booke.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 15ac51072eb3..306e26c073a0 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -32,6 +32,16 @@ */ #define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4)) +#ifdef CONFIG_PPC_FSL_BOOK3E +#define BOOKE_CLEAR_BTB(reg) \ +START_BTB_FLUSH_SECTION \ + BTB_FLUSH(reg) \ +END_BTB_FLUSH_SECTION +#else +#define BOOKE_CLEAR_BTB(reg) +#endif + + #define NORMAL_EXCEPTION_PROLOG(intno) \ mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \ mfspr r10, SPRN_SPRG_THREAD; \ @@ -43,9 +53,7 @@ andi. r11, r11, MSR_PR; /* check whether user or kernel */\ mr r11, r1; \ beq 1f; \ -START_BTB_FLUSH_SECTION \ - BTB_FLUSH(r11) \ -END_BTB_FLUSH_SECTION \ + BOOKE_CLEAR_BTB(r11) \ /* if from user, start at top of this thread's kernel stack */ \ lwz r11, THREAD_INFO-THREAD(r10); \ ALLOC_STACK_FRAME(r11, THREAD_SIZE); \ @@ -131,9 +139,7 @@ END_BTB_FLUSH_SECTION \ stw r9,_CCR(r8); /* save CR on stack */\ mfspr r11,exc_level_srr1; /* check whether user or kernel */\ DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \ -START_BTB_FLUSH_SECTION \ - BTB_FLUSH(r10) \ -END_BTB_FLUSH_SECTION \ + BOOKE_CLEAR_BTB(r10) \ andi. r11,r11,MSR_PR; \ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ From 7b3772c00dc7560fff61e19355981f4d4d76043f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 29 Mar 2019 22:26:19 +1100 Subject: [PATCH 2554/2608] powerpc/fsl: Fix the flush of branch predictor. commit 27da80719ef132cf8c80eb406d5aeb37dddf78cc upstream. The commit identified below adds MC_BTB_FLUSH macro only when CONFIG_PPC_FSL_BOOK3E is defined. This results in the following error on some configs (seen several times with kisskb randconfig_defconfig) arch/powerpc/kernel/exceptions-64e.S:576: Error: Unrecognized opcode: `mc_btb_flush' make[3]: *** [scripts/Makefile.build:367: arch/powerpc/kernel/exceptions-64e.o] Error 1 make[2]: *** [scripts/Makefile.build:492: arch/powerpc/kernel] Error 2 make[1]: *** [Makefile:1043: arch/powerpc] Error 2 make: *** [Makefile:152: sub-make] Error 2 This patch adds a blank definition of MC_BTB_FLUSH for other cases. Fixes: 10c5e83afd4a ("powerpc/fsl: Flush the branch predictor at each kernel entry (64bit)") Cc: Diana Craciun Signed-off-by: Christophe Leroy Reviewed-by: Daniel Axtens Reviewed-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/exceptions-64e.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 2acd18a903e9..2edc1b7b34cc 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -348,6 +348,7 @@ ret_from_mc_except: #define GEN_BTB_FLUSH #define CRIT_BTB_FLUSH #define DBG_BTB_FLUSH +#define MC_BTB_FLUSH #define GDBELL_BTB_FLUSH #endif From f16b7c77ddb6292c3e96457f637a92824963e5bf Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 29 Mar 2019 22:26:20 +1100 Subject: [PATCH 2555/2608] powerpc/security: Fix spectre_v2 reporting commit 92edf8df0ff2ae86cc632eeca0e651fd8431d40d upstream. When I updated the spectre_v2 reporting to handle software count cache flush I got the logic wrong when there's no software count cache enabled at all. The result is that on systems with the software count cache flush disabled we print: Mitigation: Indirect branch cache disabled, Software count cache flush Which correctly indicates that the count cache is disabled, but incorrectly says the software count cache flush is enabled. The root of the problem is that we are trying to handle all combinations of options. But we know now that we only expect to see the software count cache flush enabled if the other options are false. So split the two cases, which simplifies the logic and fixes the bug. We were also missing a space before "(hardware accelerated)". The result is we see one of: Mitigation: Indirect branch serialisation (kernel only) Mitigation: Indirect branch cache disabled Mitigation: Software count cache flush Mitigation: Software count cache flush (hardware accelerated) Fixes: ee13cb249fab ("powerpc/64s: Add support for software count cache flush") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Michael Ellerman Reviewed-by: Michael Neuling Reviewed-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/security.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 720a7a912d0d..48b50fb8dc4b 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -189,29 +189,22 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED); ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED); - if (bcs || ccd || count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) { - bool comma = false; + if (bcs || ccd) { seq_buf_printf(&s, "Mitigation: "); - if (bcs) { + if (bcs) seq_buf_printf(&s, "Indirect branch serialisation (kernel only)"); - comma = true; - } - if (ccd) { - if (comma) - seq_buf_printf(&s, ", "); - seq_buf_printf(&s, "Indirect branch cache disabled"); - comma = true; - } - - if (comma) + if (bcs && ccd) seq_buf_printf(&s, ", "); - seq_buf_printf(&s, "Software count cache flush"); + if (ccd) + seq_buf_printf(&s, "Indirect branch cache disabled"); + } else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) { + seq_buf_printf(&s, "Mitigation: Software count cache flush"); if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW) - seq_buf_printf(&s, "(hardware accelerated)"); + seq_buf_printf(&s, " (hardware accelerated)"); } else if (btb_flush_enabled) { seq_buf_printf(&s, "Mitigation: Branch predictor state flush"); } else { From dfcb397fe8440182d343b7df677db60e4e2f8845 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 4 Mar 2019 14:06:12 +0000 Subject: [PATCH 2556/2608] Btrfs: fix incorrect file size after shrinking truncate and fsync commit bf504110bc8aa05df48b0e5f0aa84bfb81e0574b upstream. If we do a shrinking truncate against an inode which is already present in the respective log tree and then rename it, as part of logging the new name we end up logging an inode item that reflects the old size of the file (the one which we previously logged) and not the new smaller size. The decision to preserve the size previously logged was added by commit 1a4bcf470c886b ("Btrfs: fix fsync data loss after adding hard link to inode") in order to avoid data loss after replaying the log. However that decision is only needed for the case the logged inode size is smaller then the current size of the inode, as explained in that commit's change log. If the current size of the inode is smaller then the previously logged size, we know a shrinking truncate happened and therefore need to use that smaller size. Example to trigger the problem: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ xfs_io -f -c "pwrite -S 0xab 0 8000" /mnt/foo $ xfs_io -c "fsync" /mnt/foo $ xfs_io -c "truncate 3000" /mnt/foo $ mv /mnt/foo /mnt/bar $ xfs_io -c "fsync" /mnt/bar $ mount /dev/sdb /mnt $ od -t x1 -A d /mnt/bar 0000000 ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab * 0008000 Once we rename the file, we log its name (and inode item), and because the inode was already logged before in the current transaction, we log it with a size of 8000 bytes because that is the size we previously logged (with the first fsync). As part of the rename, besides logging the inode, we do also sync the log, which is done since commit d4682ba03ef618 ("Btrfs: sync log after logging new name"), so the next fsync against our inode is effectively a no-op, since no new changes happened since the rename operation. Even if did not sync the log during the rename operation, the same problem (fize size of 8000 bytes instead of 3000 bytes) would be visible after replaying the log if the log ended up getting synced to disk through some other means, such as for example by fsyncing some other modified file. In the example above the fsync after the rename operation is there just because not every filesystem may guarantee logging/journalling the inode (and syncing the log/journal) during the rename operation, for example it is needed for f2fs, but not for ext4 and xfs. Fix this scenario by, when logging a new name (which is triggered by rename and link operations), using the current size of the inode instead of the previously logged inode size. A test case for fstests follows soon. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202695 CC: stable@vger.kernel.org # 4.4+ Reported-by: Seulbae Kim Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 179a383a4aaa..7a9bdc0d11bb 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4501,6 +4501,19 @@ static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode, item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); *size_ret = btrfs_inode_size(path->nodes[0], item); + /* + * If the in-memory inode's i_size is smaller then the inode + * size stored in the btree, return the inode's i_size, so + * that we get a correct inode size after replaying the log + * when before a power failure we had a shrinking truncate + * followed by addition of a new name (rename / new hard link). + * Otherwise return the inode size from the btree, to avoid + * data loss when replaying a log due to previously doing a + * write that expands the inode's size and logging a new name + * immediately after. + */ + if (*size_ret > inode->vfs_inode.i_size) + *size_ret = inode->vfs_inode.i_size; } btrfs_release_path(path); From da04627d0ab4dc83c9dfc27f94b4ada77287ae09 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 6 Mar 2019 17:13:04 -0500 Subject: [PATCH 2557/2608] btrfs: remove WARN_ON in log_dir_items commit 2cc8334270e281815c3850c3adea363c51f21e0d upstream. When Filipe added the recursive directory logging stuff in 2f2ff0ee5e430 ("Btrfs: fix metadata inconsistencies after directory fsync") he specifically didn't take the directory i_mutex for the children directories that we need to log because of lockdep. This is generally fine, but can lead to this WARN_ON() tripping if we happen to run delayed deletion's in between our first search and our second search of dir_item/dir_indexes for this directory. We expect this to happen, so the WARN_ON() isn't necessary. Drop the WARN_ON() and add a comment so we know why this case can happen. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7a9bdc0d11bb..9d72882b0f72 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3422,9 +3422,16 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, } btrfs_release_path(path); - /* find the first key from this transaction again */ + /* + * Find the first key from this transaction again. See the note for + * log_new_dir_dentries, if we're logging a directory recursively we + * won't be holding its i_mutex, which means we can modify the directory + * while we're logging it. If we remove an entry between our first + * search and this search we'll not find the key again and can just + * bail. + */ ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); - if (WARN_ON(ret != 0)) + if (ret != 0) goto done; /* From 2b95e85fb9b14a8e5616d1342181aba486fecace Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 14 Mar 2019 08:56:28 +0100 Subject: [PATCH 2558/2608] btrfs: raid56: properly unmap parity page in finish_parity_scrub() commit 3897b6f0a859288c22fb793fad11ec2327e60fcd upstream. Parity page is incorrectly unmapped in finish_parity_scrub(), triggering a reference counter bug on i386, i.e.: [ 157.662401] kernel BUG at mm/highmem.c:349! [ 157.666725] invalid opcode: 0000 [#1] SMP PTI The reason is that kunmap(p_page) was completely left out, so we never did an unmap for the p_page and the loop unmapping the rbio page was iterating over the wrong number of stripes: unmapping should be done with nr_data instead of rbio->real_stripes. Test case to reproduce the bug: - create a raid5 btrfs filesystem: # mkfs.btrfs -m raid5 -d raid5 /dev/sdb /dev/sdc /dev/sdd /dev/sde - mount it: # mount /dev/sdb /mnt - run btrfs scrub in a loop: # while :; do btrfs scrub start -BR /mnt; done BugLink: https://bugs.launchpad.net/bugs/1812845 Fixes: 5a6ac9eacb49 ("Btrfs, raid56: support parity scrub on raid56") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Johannes Thumshirn Signed-off-by: Andrea Righi Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/raid56.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 2e995e565633..1e35a2327478 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -2414,8 +2414,9 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, bitmap_clear(rbio->dbitmap, pagenr, 1); kunmap(p); - for (stripe = 0; stripe < rbio->real_stripes; stripe++) + for (stripe = 0; stripe < nr_data; stripe++) kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); + kunmap(p_page); } __free_page(p_page); From ace6b822bbe5c6c86904043359b276863505dbda Mon Sep 17 00:00:00 2001 From: Kohji Okuno Date: Tue, 26 Feb 2019 11:34:13 +0900 Subject: [PATCH 2559/2608] ARM: imx6q: cpuidle: fix bug that CPU might not wake up at expected time commit 91740fc8242b4f260cfa4d4536d8551804777fae upstream. In the current cpuidle implementation for i.MX6q, the CPU that sets 'WAIT_UNCLOCKED' and the CPU that returns to 'WAIT_CLOCKED' are always the same. While the CPU that sets 'WAIT_UNCLOCKED' is in IDLE state of "WAIT", if the other CPU wakes up and enters IDLE state of "WFI" istead of "WAIT", this CPU can not wake up at expired time. Because, in the case of "WFI", the CPU must be waked up by the local timer interrupt. But, while 'WAIT_UNCLOCKED' is set, the local timer is stopped, when all CPUs execute "wfi" instruction. As a result, the local timer interrupt is not fired. In this situation, this CPU will wake up by IRQ different from local timer. (e.g. broacast timer) So, this fix changes CPU to return to 'WAIT_CLOCKED'. Signed-off-by: Kohji Okuno Fixes: e5f9dec8ff5f ("ARM: imx6q: support WAIT mode using cpuidle") Cc: Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-imx/cpuidle-imx6q.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c index bfeb25aaf9a2..326e870d7123 100644 --- a/arch/arm/mach-imx/cpuidle-imx6q.c +++ b/arch/arm/mach-imx/cpuidle-imx6q.c @@ -16,30 +16,23 @@ #include "cpuidle.h" #include "hardware.h" -static atomic_t master = ATOMIC_INIT(0); -static DEFINE_SPINLOCK(master_lock); +static int num_idle_cpus = 0; +static DEFINE_SPINLOCK(cpuidle_lock); static int imx6q_enter_wait(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - if (atomic_inc_return(&master) == num_online_cpus()) { - /* - * With this lock, we prevent other cpu to exit and enter - * this function again and become the master. - */ - if (!spin_trylock(&master_lock)) - goto idle; + spin_lock(&cpuidle_lock); + if (++num_idle_cpus == num_online_cpus()) imx6_set_lpm(WAIT_UNCLOCKED); - cpu_do_idle(); - imx6_set_lpm(WAIT_CLOCKED); - spin_unlock(&master_lock); - goto done; - } + spin_unlock(&cpuidle_lock); -idle: cpu_do_idle(); -done: - atomic_dec(&master); + + spin_lock(&cpuidle_lock); + if (num_idle_cpus-- == num_online_cpus()) + imx6_set_lpm(WAIT_CLOCKED); + spin_unlock(&cpuidle_lock); return index; } From fb13de8fe695fccf6a1520c24e2943784f876a4a Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Fri, 15 Mar 2019 20:21:19 +0530 Subject: [PATCH 2560/2608] powerpc: bpf: Fix generation of load/store DW instructions commit 86be36f6502c52ddb4b85938145324fd07332da1 upstream. Yauheni Kaliuta pointed out that PTR_TO_STACK store/load verifier test was failing on powerpc64 BE, and rightfully indicated that the PPC_LD() macro is not masking away the last two bits of the offset per the ISA, resulting in the generation of 'lwa' instruction instead of the intended 'ld' instruction. Segher also pointed out that we can't simply mask away the last two bits as that will result in loading/storing from/to a memory location that was not intended. This patch addresses this by using ldx/stdx if the offset is not word-aligned. We load the offset into a temporary register (TMP_REG_2) and use that as the index register in a subsequent ldx/stdx. We fix PPC_LD() macro to mask off the last two bits, but enhance PPC_BPF_LL() and PPC_BPF_STL() to factor in the offset value and generate the proper instruction sequence. We also convert all existing users of PPC_LD() and PPC_STD() to use these macros. All existing uses of these macros have been audited to ensure that TMP_REG_2 can be clobbered. Fixes: 156d0e290e96 ("powerpc/ebpf/jit: Implement JIT compiler for extended BPF") Cc: stable@vger.kernel.org # v4.9+ Reported-by: Yauheni Kaliuta Signed-off-by: Naveen N. Rao Signed-off-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/ppc-opcode.h | 2 ++ arch/powerpc/net/bpf_jit.h | 17 +++++------------ arch/powerpc/net/bpf_jit32.h | 4 ++++ arch/powerpc/net/bpf_jit64.h | 20 ++++++++++++++++++++ arch/powerpc/net/bpf_jit_comp64.c | 12 ++++++------ 5 files changed, 37 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index ce0930d68857..b991bd31b383 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -288,6 +288,7 @@ /* Misc instructions for BPF compiler */ #define PPC_INST_LBZ 0x88000000 #define PPC_INST_LD 0xe8000000 +#define PPC_INST_LDX 0x7c00002a #define PPC_INST_LHZ 0xa0000000 #define PPC_INST_LWZ 0x80000000 #define PPC_INST_LHBRX 0x7c00062c @@ -295,6 +296,7 @@ #define PPC_INST_STB 0x98000000 #define PPC_INST_STH 0xb0000000 #define PPC_INST_STD 0xf8000000 +#define PPC_INST_STDX 0x7c00012a #define PPC_INST_STDU 0xf8000001 #define PPC_INST_STW 0x90000000 #define PPC_INST_STWU 0x94000000 diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 47fc6660845d..68dece206048 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -51,6 +51,8 @@ #define PPC_LIS(r, i) PPC_ADDIS(r, 0, i) #define PPC_STD(r, base, i) EMIT(PPC_INST_STD | ___PPC_RS(r) | \ ___PPC_RA(base) | ((i) & 0xfffc)) +#define PPC_STDX(r, base, b) EMIT(PPC_INST_STDX | ___PPC_RS(r) | \ + ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_STDU(r, base, i) EMIT(PPC_INST_STDU | ___PPC_RS(r) | \ ___PPC_RA(base) | ((i) & 0xfffc)) #define PPC_STW(r, base, i) EMIT(PPC_INST_STW | ___PPC_RS(r) | \ @@ -65,7 +67,9 @@ #define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \ ___PPC_RA(base) | IMM_L(i)) #define PPC_LD(r, base, i) EMIT(PPC_INST_LD | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) + ___PPC_RA(base) | ((i) & 0xfffc)) +#define PPC_LDX(r, base, b) EMIT(PPC_INST_LDX | ___PPC_RT(r) | \ + ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | ___PPC_RT(r) | \ ___PPC_RA(base) | IMM_L(i)) #define PPC_LHZ(r, base, i) EMIT(PPC_INST_LHZ | ___PPC_RT(r) | \ @@ -85,17 +89,6 @@ ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_BPF_STDCX(s, a, b) EMIT(PPC_INST_STDCX | ___PPC_RS(s) | \ ___PPC_RA(a) | ___PPC_RB(b)) - -#ifdef CONFIG_PPC64 -#define PPC_BPF_LL(r, base, i) do { PPC_LD(r, base, i); } while(0) -#define PPC_BPF_STL(r, base, i) do { PPC_STD(r, base, i); } while(0) -#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0) -#else -#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0) -#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0) -#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0) -#endif - #define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i)) #define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i)) #define PPC_CMPW(a, b) EMIT(PPC_INST_CMPW | ___PPC_RA(a) | \ diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h index a8cd7e289ecd..81a9045d8410 100644 --- a/arch/powerpc/net/bpf_jit32.h +++ b/arch/powerpc/net/bpf_jit32.h @@ -122,6 +122,10 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); #define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i) #endif +#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0) +#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0) +#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0) + #define SEEN_DATAREF 0x10000 /* might call external helpers */ #define SEEN_XREG 0x20000 /* X reg is used */ #define SEEN_MEM 0x40000 /* SEEN_MEM+(1<= SKF_LL_OFF ? func##_negative_offset : func) : \ func##_positive_offset) +/* + * WARNING: These can use TMP_REG_2 if the offset is not at word boundary, + * so ensure that it isn't in use already. + */ +#define PPC_BPF_LL(r, base, i) do { \ + if ((i) % 4) { \ + PPC_LI(b2p[TMP_REG_2], (i)); \ + PPC_LDX(r, base, b2p[TMP_REG_2]); \ + } else \ + PPC_LD(r, base, i); \ + } while(0) +#define PPC_BPF_STL(r, base, i) do { \ + if ((i) % 4) { \ + PPC_LI(b2p[TMP_REG_2], (i)); \ + PPC_STDX(r, base, b2p[TMP_REG_2]); \ + } else \ + PPC_STD(r, base, i); \ + } while(0) +#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0) + #define SEEN_FUNC 0x1000 /* might call external helpers */ #define SEEN_STACK 0x2000 /* uses BPF stack */ #define SEEN_SKB 0x4000 /* uses sk_buff */ diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index fee1e1f8c9d3..3a21d3956ad4 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -261,7 +261,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 * if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; */ - PPC_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); + PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT); PPC_BCC(COND_GT, out); @@ -274,7 +274,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 /* prog = array->ptrs[index]; */ PPC_MULI(b2p[TMP_REG_1], b2p_index, 8); PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array); - PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); + PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); /* * if (prog == NULL) @@ -284,7 +284,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 PPC_BCC(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ - PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); + PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); #ifdef PPC64_ELF_ABI_v1 /* skip past the function descriptor */ PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], @@ -616,7 +616,7 @@ bpf_alu32_trunc: * the instructions generated will remain the * same across all passes */ - PPC_STD(dst_reg, 1, bpf_jit_stack_local(ctx)); + PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)); PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]); break; @@ -672,7 +672,7 @@ emit_clear: PPC_LI32(b2p[TMP_REG_1], imm); src_reg = b2p[TMP_REG_1]; } - PPC_STD(src_reg, dst_reg, off); + PPC_BPF_STL(src_reg, dst_reg, off); break; /* @@ -719,7 +719,7 @@ emit_clear: break; /* dst = *(u64 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_DW: - PPC_LD(dst_reg, src_reg, off); + PPC_BPF_LL(dst_reg, src_reg, off); break; /* From a0aa001a1fdc8e77f517b976ea3a08fa07326f6a Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 19 Mar 2019 12:12:13 -0400 Subject: [PATCH 2561/2608] NFSv4.1 don't free interrupted slot on open commit 0cb98abb5bd13b9a636bde603d952d722688b428 upstream. Allow the async rpc task for finish and update the open state if needed, then free the slot. Otherwise, the async rpc unable to decode the reply. Signed-off-by: Olga Kornievskaia Fixes: ae55e59da0e4 ("pnfs: Don't release the sequence slot...") Cc: stable@vger.kernel.org # v4.18+ Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9041a892701f..a225f98c9903 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2746,7 +2746,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, nfs4_schedule_stateid_recovery(server, state); } out: - nfs4_sequence_free_slot(&opendata->o_res.seq_res); + if (!opendata->cancelled) + nfs4_sequence_free_slot(&opendata->o_res.seq_res); return ret; } From af553a246a930bdd7164fab494aa2ee4ab768a3c Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 22 Mar 2019 01:05:02 +0100 Subject: [PATCH 2562/2608] net: dsa: qca8k: remove leftover phy accessors commit 1eec7151ae0e134bd42e3f128066b2ff8da21393 upstream. This belated patch implements Andrew Lunn's request of "remove the phy_read() and phy_write() functions." While seemingly harmless, this causes the switch's user port PHYs to get registered twice. This is because the DSA subsystem will create a slave mdio-bus not knowing that the qca8k_phy_(read|write) accessors operate on the external mdio-bus. So the same "bus" gets effectively duplicated. Cc: stable@vger.kernel.org Fixes: 6b93fb46480a ("net-next: dsa: add new driver for qca8xxx family") Signed-off-by: Christian Lamparter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/qca8k.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 9645c8f05c7f..c3c9d7e33bd6 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -629,22 +629,6 @@ qca8k_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phy) qca8k_port_set_status(priv, port, 1); } -static int -qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum) -{ - struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - - return mdiobus_read(priv->bus, phy, regnum); -} - -static int -qca8k_phy_write(struct dsa_switch *ds, int phy, int regnum, u16 val) -{ - struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - - return mdiobus_write(priv->bus, phy, regnum, val); -} - static void qca8k_get_strings(struct dsa_switch *ds, int port, uint8_t *data) { @@ -879,8 +863,6 @@ static const struct dsa_switch_ops qca8k_switch_ops = { .setup = qca8k_setup, .adjust_link = qca8k_adjust_link, .get_strings = qca8k_get_strings, - .phy_read = qca8k_phy_read, - .phy_write = qca8k_phy_write, .get_ethtool_stats = qca8k_get_ethtool_stats, .get_sset_count = qca8k_get_sset_count, .get_mac_eee = qca8k_get_mac_eee, From 910ffe1633eef1a4227d467268ce31cb819ed332 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 20 Mar 2019 16:15:24 -0500 Subject: [PATCH 2563/2608] ALSA: rawmidi: Fix potential Spectre v1 vulnerability commit 2b1d9c8f87235f593826b9cf46ec10247741fff9 upstream. info->stream is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: sound/core/rawmidi.c:604 __snd_rawmidi_info_select() warn: potential spectre issue 'rmidi->streams' [r] (local cap) Fix this by sanitizing info->stream before using it to index rmidi->streams. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://lore.kernel.org/lkml/20180423164740.GY17484@dhcp22.suse.cz/ Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/rawmidi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index abacbbc0b0e8..d22472ba211e 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -591,6 +592,7 @@ static int __snd_rawmidi_info_select(struct snd_card *card, return -ENXIO; if (info->stream < 0 || info->stream > 1) return -EINVAL; + info->stream = array_index_nospec(info->stream, 2); pstr = &rmidi->streams[info->stream]; if (pstr->substream_count == 0) return -ENOENT; From 8f368827496b0323d6ca021cd98baf40eaa9739f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 20 Mar 2019 18:42:01 -0500 Subject: [PATCH 2564/2608] ALSA: seq: oss: Fix Spectre v1 vulnerability commit c709f14f0616482b67f9fbcb965e1493a03ff30b upstream. dev is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: sound/core/seq/oss/seq_oss_synth.c:626 snd_seq_oss_synth_make_info() warn: potential spectre issue 'dp->synths' [w] (local cap) Fix this by sanitizing dev before using it to index dp->synths. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://lore.kernel.org/lkml/20180423164740.GY17484@dhcp22.suse.cz/ Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/oss/seq_oss_synth.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c index 278ebb993122..c93945917235 100644 --- a/sound/core/seq/oss/seq_oss_synth.c +++ b/sound/core/seq/oss/seq_oss_synth.c @@ -617,13 +617,14 @@ int snd_seq_oss_synth_make_info(struct seq_oss_devinfo *dp, int dev, struct synth_info *inf) { struct seq_oss_synth *rec; + struct seq_oss_synthinfo *info = get_synthinfo_nospec(dp, dev); - if (dev < 0 || dev >= dp->max_synthdev) + if (!info) return -ENXIO; - if (dp->synths[dev].is_midi) { + if (info->is_midi) { struct midi_info minf; - snd_seq_oss_midi_make_info(dp, dp->synths[dev].midi_mapped, &minf); + snd_seq_oss_midi_make_info(dp, info->midi_mapped, &minf); inf->synth_type = SYNTH_TYPE_MIDI; inf->synth_subtype = 0; inf->nr_voices = 16; From 557301cf33aeccdb830bdf4664bd7ce74d73093b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 22 Mar 2019 16:00:54 +0100 Subject: [PATCH 2565/2608] ALSA: pcm: Fix possible OOB access in PCM oss plugins commit ca0214ee2802dd47239a4e39fb21c5b00ef61b22 upstream. The PCM OSS emulation converts and transfers the data on the fly via "plugins". The data is converted over the dynamically allocated buffer for each plugin, and recently syzkaller caught OOB in this flow. Although the bisection by syzbot pointed out to the commit 65766ee0bf7f ("ALSA: oss: Use kvzalloc() for local buffer allocations"), this is merely a commit to replace vmalloc() with kvmalloc(), hence it can't be the cause. The further debug action revealed that this happens in the case where a slave PCM doesn't support only the stereo channels while the OSS stream is set up for a mono channel. Below is a brief explanation: At each OSS parameter change, the driver sets up the PCM hw_params again in snd_pcm_oss_change_params_lock(). This is also the place where plugins are created and local buffers are allocated. The problem is that the plugins are created before the final hw_params is determined. Namely, two snd_pcm_hw_param_near() calls for setting the period size and periods may influence on the final result of channels, rates, etc, too, while the current code has already created plugins beforehand with the premature values. So, the plugin believes that channels=1, while the actual I/O is with channels=2, which makes the driver reading/writing over the allocated buffer size. The fix is simply to move the plugin allocation code after the final hw_params call. Reported-by: syzbot+d4503ae45b65c5bc1194@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/pcm_oss.c | 43 ++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index df358e838b5b..bb0ab0f6ce9d 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -940,6 +940,28 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) oss_frame_size = snd_pcm_format_physical_width(params_format(params)) * params_channels(params) / 8; + err = snd_pcm_oss_period_size(substream, params, sparams); + if (err < 0) + goto failure; + + n = snd_pcm_plug_slave_size(substream, runtime->oss.period_bytes / oss_frame_size); + err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, n, NULL); + if (err < 0) + goto failure; + + err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIODS, + runtime->oss.periods, NULL); + if (err < 0) + goto failure; + + snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); + + err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_HW_PARAMS, sparams); + if (err < 0) { + pcm_dbg(substream->pcm, "HW_PARAMS failed: %i\n", err); + goto failure; + } + #ifdef CONFIG_SND_PCM_OSS_PLUGINS snd_pcm_oss_plugin_clear(substream); if (!direct) { @@ -974,27 +996,6 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) } #endif - err = snd_pcm_oss_period_size(substream, params, sparams); - if (err < 0) - goto failure; - - n = snd_pcm_plug_slave_size(substream, runtime->oss.period_bytes / oss_frame_size); - err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, n, NULL); - if (err < 0) - goto failure; - - err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIODS, - runtime->oss.periods, NULL); - if (err < 0) - goto failure; - - snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); - - if ((err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_HW_PARAMS, sparams)) < 0) { - pcm_dbg(substream->pcm, "HW_PARAMS failed: %i\n", err); - goto failure; - } - if (runtime->oss.trigger) { sw_params->start_threshold = 1; } else { From 6182bdf081fd209a3be00477bc5a13168cdb0aca Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 25 Mar 2019 10:38:58 +0100 Subject: [PATCH 2566/2608] ALSA: pcm: Don't suspend stream in unrecoverable PCM state commit 113ce08109f8e3b091399e7cc32486df1cff48e7 upstream. Currently PCM core sets each opened stream forcibly to SUSPENDED state via snd_pcm_suspend_all() call, and the user-space is responsible for re-triggering the resume manually either via snd_pcm_resume() or prepare call. The scheme works fine usually, but there are corner cases where the stream can't be resumed by that call: the streams still in OPEN state before finishing hw_params. When they are suspended, user-space cannot perform resume or prepare because they haven't been set up yet. The only possible recovery is to re-open the device, which isn't nice at all. Similarly, when a stream is in DISCONNECTED state, it makes no sense to change it to SUSPENDED state. Ditto for in SETUP state; which you can re-prepare directly. So, this patch addresses these issues by filtering the PCM streams to be suspended by checking the PCM state. When a stream is in either OPEN, SETUP or DISCONNECTED as well as already SUSPENDED, the suspend action is skipped. To be noted, this problem was originally reported for the PCM runtime PM on HD-audio. And, the runtime PM problem itself was already addressed (although not intended) by the code refactoring commits 3d21ef0b49f8 ("ALSA: pcm: Suspend streams globally via device type PM ops") and 17bc4815de58 ("ALSA: pci: Remove superfluous snd_pcm_suspend*() calls"). These commits eliminated the snd_pcm_suspend*() calls from the runtime PM suspend callback code path, hence the racy OPEN state won't appear while runtime PM. (FWIW, the race window is between snd_pcm_open_substream() and the first power up in azx_pcm_open().) Although the runtime PM issue was already "fixed", the same problem is still present for the system PM, hence this patch is still needed. And for stable trees, this patch alone should suffice for fixing the runtime PM problem, too. Reported-and-tested-by: Jon Hunter Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm_native.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 966ac384c3f4..1a63d456a3dc 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -1395,8 +1395,15 @@ static int snd_pcm_pause(struct snd_pcm_substream *substream, int push) static int snd_pcm_pre_suspend(struct snd_pcm_substream *substream, int state) { struct snd_pcm_runtime *runtime = substream->runtime; - if (runtime->status->state == SNDRV_PCM_STATE_SUSPENDED) + switch (runtime->status->state) { + case SNDRV_PCM_STATE_SUSPENDED: return -EBUSY; + /* unresumable PCM state; return -EBUSY for skipping suspend */ + case SNDRV_PCM_STATE_OPEN: + case SNDRV_PCM_STATE_SETUP: + case SNDRV_PCM_STATE_DISCONNECTED: + return -EBUSY; + } runtime->trigger_master = substream; return 0; } From e1b7eaa9e2db82fe98fe559bc7868efcc26102df Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 14 Mar 2019 16:22:45 +0800 Subject: [PATCH 2567/2608] ALSA: hda/realtek - Add support headset mode for DELL WYSE AIO commit 136824efaab2c095fc911048f7c7ddeda258c965 upstream. This patch will enable WYSE AIO for Headset mode. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 972fd95f08ca..ebd55c408c19 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5436,6 +5436,9 @@ enum { ALC298_FIXUP_TPT470_DOCK, ALC255_FIXUP_DUMMY_LINEOUT_VERB, ALC255_FIXUP_DELL_HEADSET_MIC, + ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE, + ALC225_FIXUP_WYSE_AUTO_MUTE, + ALC225_FIXUP_WYSE_DISABLE_MIC_VREF, }; static const struct hda_fixup alc269_fixups[] = { @@ -6311,6 +6314,28 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MIC }, + [ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x16, 0x01011020 }, /* Rear Line out */ + { 0x19, 0x01a1913c }, /* use as Front headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC225_FIXUP_WYSE_AUTO_MUTE + }, + [ALC225_FIXUP_WYSE_AUTO_MUTE] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_auto_mute_via_amp, + .chained = true, + .chain_id = ALC225_FIXUP_WYSE_DISABLE_MIC_VREF + }, + [ALC225_FIXUP_WYSE_DISABLE_MIC_VREF] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_mic_vref, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -6369,6 +6394,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0871, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0872, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0873, "Dell Precision 3930", ALC255_FIXUP_DUMMY_LINEOUT_VERB), + SND_PCI_QUIRK(0x1028, 0x08ad, "Dell WYSE AIO", ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0935, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), From fe2d6d04e42b7bc4a7a4f2519e219df3ef5dba6e Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 14 Mar 2019 15:50:59 +0800 Subject: [PATCH 2568/2608] ALSA: hda/realtek - Add support headset mode for New DELL WYSE NB commit da484d00f020af3dd7cfcc6c4b69a7f856832883 upstream. Enable headset mode support for new WYSE NB platform. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ebd55c408c19..9637d0bbdeb5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6395,6 +6395,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0872, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0873, "Dell Precision 3930", ALC255_FIXUP_DUMMY_LINEOUT_VERB), SND_PCI_QUIRK(0x1028, 0x08ad, "Dell WYSE AIO", ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x08ae, "Dell WYSE NB", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0935, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), From e75c894d8186c5bc86b40a6eebf887f6ce5ebb75 Mon Sep 17 00:00:00 2001 From: Fredrik Noring Date: Wed, 27 Mar 2019 19:12:50 +0100 Subject: [PATCH 2569/2608] kbuild: modversions: Fix relative CRC byte order interpretation commit 54a7151b1496cddbb7a83546b7998103e98edc88 upstream. Fix commit 56067812d5b0 ("kbuild: modversions: add infrastructure for emitting relative CRCs") where CRCs are interpreted in host byte order rather than proper kernel byte order. The bug is conditional on CONFIG_MODULE_REL_CRCS. For example, when loading a BE module into a BE kernel compiled with a LE system, the error "disagrees about version of symbol module_layout" is produced. A message such as "Found checksum D7FA6856 vs module 5668FAD7" will be given with debug enabled, which indicates an obvious endian problem within __kcrctab within the kernel image. The general solution is to use the macro TO_NATIVE, as is done in similar cases throughout modpost.c. With this correction it has been verified that a BE kernel compiled with a LE system accepts BE modules. This change has also been verified with a LE kernel compiled with a LE system, in which case TO_NATIVE returns its value unmodified since the byte orders match. This is by far the common case. Fixes: 56067812d5b0 ("kbuild: modversions: add infrastructure for emitting relative CRCs") Signed-off-by: Fredrik Noring Cc: stable@vger.kernel.org Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- scripts/mod/modpost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index e36a673833ae..c22041a4fc36 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -645,7 +645,7 @@ static void handle_modversions(struct module *mod, struct elf_info *info, info->sechdrs[sym->st_shndx].sh_offset - (info->hdr->e_type != ET_REL ? info->sechdrs[sym->st_shndx].sh_addr : 0); - crc = *crcp; + crc = TO_NATIVE(*crcp); } sym_update_crc(symname + strlen(CRC_PFX), mod, crc, export); From 26c3a3b92782880ea7e95b8704164b0b7a75c82e Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 28 Mar 2019 20:43:30 -0700 Subject: [PATCH 2570/2608] fs/open.c: allow opening only regular files during execve() commit 73601ea5b7b18eb234219ae2adf77530f389da79 upstream. syzbot is hitting lockdep warning [1] due to trying to open a fifo during an execve() operation. But we don't need to open non regular files during an execve() operation, for all files which we will need are the executable file itself and the interpreter programs like /bin/sh and ld-linux.so.2 . Since the manpage for execve(2) says that execve() returns EACCES when the file or a script interpreter is not a regular file, and the manpage for uselib(2) says that uselib() can return EACCES, and we use FMODE_EXEC when opening for execve()/uselib(), we can bail out if a non regular file is requested with FMODE_EXEC set. Since this deadlock followed by khungtaskd warnings is trivially reproducible by a local unprivileged user, and syzbot's frequent crash due to this deadlock defers finding other bugs, let's workaround this deadlock until we get a chance to find a better solution. [1] https://syzkaller.appspot.com/bug?id=b5095bfec44ec84213bac54742a82483aad578ce Link: http://lkml.kernel.org/r/1552044017-7890-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp Reported-by: syzbot Fixes: 8924feff66f35fe2 ("splice: lift pipe_lock out of splice_to_pipe()") Signed-off-by: Tetsuo Handa Acked-by: Kees Cook Cc: Al Viro Cc: Eric Biggers Cc: Dmitry Vyukov Cc: [4.9+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/open.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/open.c b/fs/open.c index 7ea118471dce..28a3956c4479 100644 --- a/fs/open.c +++ b/fs/open.c @@ -716,6 +716,12 @@ static int do_dentry_open(struct file *f, return 0; } + /* Any file opened for execve()/uselib() has to be a regular file. */ + if (unlikely(f->f_flags & FMODE_EXEC && !S_ISREG(inode->i_mode))) { + error = -EACCES; + goto cleanup_file; + } + if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { error = get_write_access(inode); if (unlikely(error)) From e8bf3395ebcd21d7eaae5bd7e33e7dca4414ed80 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 28 Mar 2019 20:43:38 -0700 Subject: [PATCH 2571/2608] ocfs2: fix inode bh swapping mixup in ocfs2_reflink_inodes_lock commit e6a9467ea14bae8691b0f72c500510c42ea8edb8 upstream. ocfs2_reflink_inodes_lock() can swap the inode1/inode2 variables so that we always grab cluster locks in order of increasing inode number. Unfortunately, we forget to swap the inode record buffer head pointers when we've done this, which leads to incorrect bookkeepping when we're trying to make the two inodes have the same refcount tree. This has the effect of causing filesystem shutdowns if you're trying to reflink data from inode 100 into inode 97, where inode 100 already has a refcount tree attached and inode 97 doesn't. The reflink code decides to copy the refcount tree pointer from 100 to 97, but uses inode 97's inode record to open the tree root (which it doesn't have) and blows up. This issue causes filesystem shutdowns and metadata corruption! Link: http://lkml.kernel.org/r/20190312214910.GK20533@magnolia Fixes: 29ac8e856cb369 ("ocfs2: implement the VFS clone_range, copy_range, and dedupe_range features") Signed-off-by: Darrick J. Wong Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/refcounttree.c | 42 +++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 824f407df1db..3a4e1bca5e31 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4716,22 +4716,23 @@ out: /* Lock an inode and grab a bh pointing to the inode. */ static int ocfs2_reflink_inodes_lock(struct inode *s_inode, - struct buffer_head **bh1, + struct buffer_head **bh_s, struct inode *t_inode, - struct buffer_head **bh2) + struct buffer_head **bh_t) { - struct inode *inode1; - struct inode *inode2; + struct inode *inode1 = s_inode; + struct inode *inode2 = t_inode; struct ocfs2_inode_info *oi1; struct ocfs2_inode_info *oi2; + struct buffer_head *bh1 = NULL; + struct buffer_head *bh2 = NULL; bool same_inode = (s_inode == t_inode); + bool need_swap = (inode1->i_ino > inode2->i_ino); int status; /* First grab the VFS and rw locks. */ lock_two_nondirectories(s_inode, t_inode); - inode1 = s_inode; - inode2 = t_inode; - if (inode1->i_ino > inode2->i_ino) + if (need_swap) swap(inode1, inode2); status = ocfs2_rw_lock(inode1, 1); @@ -4754,17 +4755,13 @@ static int ocfs2_reflink_inodes_lock(struct inode *s_inode, trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno, (unsigned long long)oi2->ip_blkno); - if (*bh1) - *bh1 = NULL; - if (*bh2) - *bh2 = NULL; - /* We always want to lock the one with the lower lockid first. */ if (oi1->ip_blkno > oi2->ip_blkno) mlog_errno(-ENOLCK); /* lock id1 */ - status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_REFLINK_TARGET); + status = ocfs2_inode_lock_nested(inode1, &bh1, 1, + OI_LS_REFLINK_TARGET); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -4773,15 +4770,25 @@ static int ocfs2_reflink_inodes_lock(struct inode *s_inode, /* lock id2 */ if (!same_inode) { - status = ocfs2_inode_lock_nested(inode2, bh2, 1, + status = ocfs2_inode_lock_nested(inode2, &bh2, 1, OI_LS_REFLINK_TARGET); if (status < 0) { if (status != -ENOENT) mlog_errno(status); goto out_cl1; } - } else - *bh2 = *bh1; + } else { + bh2 = bh1; + } + + /* + * If we swapped inode order above, we have to swap the buffer heads + * before passing them back to the caller. + */ + if (need_swap) + swap(bh1, bh2); + *bh_s = bh1; + *bh_t = bh2; trace_ocfs2_double_lock_end( (unsigned long long)OCFS2_I(inode1)->ip_blkno, @@ -4791,8 +4798,7 @@ static int ocfs2_reflink_inodes_lock(struct inode *s_inode, out_cl1: ocfs2_inode_unlock(inode1, 1); - brelse(*bh1); - *bh1 = NULL; + brelse(bh1); out_rw2: ocfs2_rw_unlock(inode2, 1); out_i2: From 683c14fb42f60e6d25dfdc5197e294ecfe14efad Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 25 Mar 2019 10:01:46 -0700 Subject: [PATCH 2572/2608] scsi: sd: Fix a race between closing an sd device and sd I/O commit c14a57264399efd39514a2329c591a4b954246d8 upstream. The scsi_end_request() function calls scsi_cmd_to_driver() indirectly and hence needs the disk->private_data pointer. Avoid that that pointer is cleared before all affected I/O requests have finished. This patch avoids that the following crash occurs: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Call trace: scsi_mq_uninit_cmd+0x1c/0x30 scsi_end_request+0x7c/0x1b8 scsi_io_completion+0x464/0x668 scsi_finish_command+0xbc/0x160 scsi_eh_flush_done_q+0x10c/0x170 sas_scsi_recover_host+0x84c/0xa98 [libsas] scsi_error_handler+0x140/0x5b0 kthread+0x100/0x12c ret_from_fork+0x10/0x18 Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: Jason Yan Cc: Signed-off-by: Bart Van Assche Reported-by: Jason Yan Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index d0cc8fb40f63..0c1dd88f267b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1420,11 +1420,6 @@ static void sd_release(struct gendisk *disk, fmode_t mode) scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW); } - /* - * XXX and what if there are packets in flight and this close() - * XXX is followed by a "rmmod sd_mod"? - */ - scsi_disk_put(sdkp); } @@ -3521,11 +3516,23 @@ static void scsi_disk_release(struct device *dev) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct gendisk *disk = sdkp->disk; - + struct request_queue *q = disk->queue; + spin_lock(&sd_index_lock); ida_remove(&sd_index_ida, sdkp->index); spin_unlock(&sd_index_lock); + /* + * Wait until all requests that are in progress have completed. + * This is necessary to avoid that e.g. scsi_end_request() crashes + * due to clearing the disk->private_data pointer. Wait from inside + * scsi_disk_release() instead of from sd_release() to avoid that + * freezing and unfreezing the request queue affects user space I/O + * in case multiple processes open a /dev/sd... node concurrently. + */ + blk_mq_freeze_queue(q); + blk_mq_unfreeze_queue(q); + disk->private_data = NULL; put_disk(disk); put_device(&sdkp->device->sdev_gendev); From f3b8975525b32e6e5839af22aa0b4cafb462e8cc Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 27 Mar 2019 12:11:52 -0400 Subject: [PATCH 2573/2608] scsi: sd: Quiesce warning if device does not report optimal I/O size commit 1d5de5bd311be7cd54f02f7cd164f0349a75c876 upstream. Commit a83da8a4509d ("scsi: sd: Optimal I/O size should be a multiple of physical block size") split one conditional into several separate statements in an effort to provide more accurate warning messages when a device reports a nonsensical value. However, this reorganization accidentally dropped the precondition of the reported value being larger than zero. This lead to a warning getting emitted on devices that do not report an optimal I/O size at all. Remain silent if a device does not report an optimal I/O size. Fixes: a83da8a4509d ("scsi: sd: Optimal I/O size should be a multiple of physical block size") Cc: Randy Dunlap Cc: Reported-by: Hussam Al-Tayeb Tested-by: Hussam Al-Tayeb Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 0c1dd88f267b..e0c0fea227c1 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3084,6 +3084,9 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, unsigned int opt_xfer_bytes = logical_to_bytes(sdp, sdkp->opt_xfer_blocks); + if (sdkp->opt_xfer_blocks == 0) + return false; + if (sdkp->opt_xfer_blocks > dev_max) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u logical blocks " \ From 69476e1f42ed73cc1347bea07c96fb098ae2214b Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 26 Mar 2019 14:36:58 +0100 Subject: [PATCH 2574/2608] scsi: zfcp: fix rport unblock if deleted SCSI devices on Scsi_Host commit fe67888fc007a76b81e37da23ce5bd8fb95890b0 upstream. An already deleted SCSI device can exist on the Scsi_Host and remain there because something still holds a reference. A new SCSI device with the same H:C:T:L and FCP device, target port WWPN, and FCP LUN can be created. When we try to unblock an rport, we still find the deleted SCSI device and return early because the zfcp_scsi_dev of that SCSI device is not ZFCP_STATUS_COMMON_UNBLOCKED. Hence we miss to unblock the rport, even if the new proper SCSI device would be in good state. Therefore, skip deleted SCSI devices when iterating the sdevs of the shost. [cf. __scsi_device_lookup{_by_target}() or scsi_device_get()] The following abbreviated trace sequence can indicate such problem: Area : REC Tag : ersfs_3 LUN : 0x4045400300000000 WWPN : 0x50050763031bd327 LUN status : 0x40000000 not ZFCP_STATUS_COMMON_UNBLOCKED Ready count : n not incremented yet Running count : 0x00000000 ERP want : 0x01 ERP need : 0xc1 ZFCP_ERP_ACTION_NONE Area : REC Tag : ersfs_3 LUN : 0x4045400300000000 WWPN : 0x50050763031bd327 LUN status : 0x41000000 Ready count : n+1 Running count : 0x00000000 ERP want : 0x01 ERP need : 0x01 ... Area : REC Level : 4 only with increased trace level Tag : ertru_l LUN : 0x4045400300000000 WWPN : 0x50050763031bd327 LUN status : 0x40000000 Request ID : 0x0000000000000000 ERP status : 0x01800000 ERP step : 0x1000 ERP action : 0x01 ERP count : 0x00 NOT followed by a trace record with tag "scpaddy" for WWPN 0x50050763031bd327. Signed-off-by: Steffen Maier Fixes: 6f2ce1c6af37 ("scsi: zfcp: fix rport unblock race with LUN recovery") Cc: #2.6.32+ Reviewed-by: Jens Remus Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_erp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 7aa243a6cdbf..8519be26d8e5 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -1306,6 +1306,9 @@ static void zfcp_erp_try_rport_unblock(struct zfcp_port *port) struct zfcp_scsi_dev *zsdev = sdev_to_zfcp(sdev); int lun_status; + if (sdev->sdev_state == SDEV_DEL || + sdev->sdev_state == SDEV_CANCEL) + continue; if (zsdev->port != port) continue; /* LUN under port of interest */ From 2689a64637db5270e4b1f54808886a721f428406 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 26 Mar 2019 14:36:59 +0100 Subject: [PATCH 2575/2608] scsi: zfcp: fix scsi_eh host reset with port_forced ERP for non-NPIV FCP devices commit 242ec1455151267fe35a0834aa9038e4c4670884 upstream. Suppose more than one non-NPIV FCP device is active on the same channel. Send I/O to storage and have some of the pending I/O run into a SCSI command timeout, e.g. due to bit errors on the fibre. Now the error situation stops. However, we saw FCP requests continue to timeout in the channel. The abort will be successful, but the subsequent TUR fails. Scsi_eh starts. The LUN reset fails. The target reset fails. The host reset only did an FCP device recovery. However, for non-NPIV FCP devices, this does not close and reopen ports on the SAN-side if other non-NPIV FCP device(s) share the same open ports. In order to resolve the continuing FCP request timeouts, we need to explicitly close and reopen ports on the SAN-side. This was missing since the beginning of zfcp in v2.6.0 history commit ea127f975424 ("[PATCH] s390 (7/7): zfcp host adapter."). Note: The FSF requests for forced port reopen could run into FSF request timeouts due to other reasons. This would trigger an internal FCP device recovery. Pending forced port reopen recoveries would get dismissed. So some ports might not get fully reopened during this host reset handler. However, subsequent I/O would trigger the above described escalation and eventually all ports would be forced reopen to resolve any continuing FCP request timeouts due to earlier bit errors. Signed-off-by: Steffen Maier Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: #3.0+ Reviewed-by: Jens Remus Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_erp.c | 14 ++++++++++++++ drivers/s390/scsi/zfcp_ext.h | 2 ++ drivers/s390/scsi/zfcp_scsi.c | 4 ++++ 3 files changed, 20 insertions(+) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 8519be26d8e5..6d5065f679ac 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -652,6 +652,20 @@ static void zfcp_erp_strategy_memwait(struct zfcp_erp_action *erp_action) add_timer(&erp_action->timer); } +void zfcp_erp_port_forced_reopen_all(struct zfcp_adapter *adapter, + int clear, char *dbftag) +{ + unsigned long flags; + struct zfcp_port *port; + + write_lock_irqsave(&adapter->erp_lock, flags); + read_lock(&adapter->port_list_lock); + list_for_each_entry(port, &adapter->port_list, list) + _zfcp_erp_port_forced_reopen(port, clear, dbftag); + read_unlock(&adapter->port_list_lock); + write_unlock_irqrestore(&adapter->erp_lock, flags); +} + static void _zfcp_erp_port_reopen_all(struct zfcp_adapter *adapter, int clear, char *id) { diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index c1092a11e728..1b2e2541b1de 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -68,6 +68,8 @@ extern void zfcp_erp_clear_port_status(struct zfcp_port *, u32); extern int zfcp_erp_port_reopen(struct zfcp_port *, int, char *); extern void zfcp_erp_port_shutdown(struct zfcp_port *, int, char *); extern void zfcp_erp_port_forced_reopen(struct zfcp_port *, int, char *); +extern void zfcp_erp_port_forced_reopen_all(struct zfcp_adapter *adapter, + int clear, char *dbftag); extern void zfcp_erp_set_lun_status(struct scsi_device *, u32); extern void zfcp_erp_clear_lun_status(struct scsi_device *, u32); extern void zfcp_erp_lun_reopen(struct scsi_device *, int, char *); diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 0b6f51424745..6f6bc73a3a10 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -327,6 +327,10 @@ static int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt) struct zfcp_adapter *adapter = zfcp_sdev->port->adapter; int ret = SUCCESS, fc_ret; + if (!(adapter->connection_features & FSF_FEATURE_NPIV_MODE)) { + zfcp_erp_port_forced_reopen_all(adapter, 0, "schrh_p"); + zfcp_erp_wait(adapter); + } zfcp_erp_adapter_reopen(adapter, 0, "schrh_1"); zfcp_erp_wait(adapter); fc_ret = fc_block_scsi_eh(scpnt); From 4f49089456e7537c6c2c3596aaabd89d698dbe15 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 15 Mar 2019 12:16:06 -0500 Subject: [PATCH 2576/2608] tty: atmel_serial: fix a potential NULL pointer dereference commit c85be041065c0be8bc48eda4c45e0319caf1d0e5 upstream. In case dmaengine_prep_dma_cyclic fails, the fix returns a proper error code to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Fixes: 34df42f59a60 ("serial: at91: add rx dma support") Acked-by: Richard Genoud Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 2286e9d73115..9f1cef59fa28 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -1163,6 +1163,10 @@ static int atmel_prepare_rx_dma(struct uart_port *port) sg_dma_len(&atmel_port->sg_rx)/2, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT); + if (!desc) { + dev_err(port->dev, "Preparing DMA cyclic failed\n"); + goto chan_err; + } desc->callback = atmel_complete_rx_dma; desc->callback_param = port; atmel_port->desc_rx = desc; From 7787d644703fea7f87a0a30bdca3e477bf9b286c Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 4 Mar 2019 14:33:54 +0000 Subject: [PATCH 2577/2608] staging: comedi: ni_mio_common: Fix divide-by-zero for DIO cmdtest commit bafd9c64056cd034a1174dcadb65cd3b294ff8f6 upstream. `ni_cdio_cmdtest()` validates Comedi asynchronous commands for the DIO subdevice (subdevice 2) of supported National Instruments M-series cards. It is called when handling the `COMEDI_CMD` and `COMEDI_CMDTEST` ioctls for this subdevice. There are two causes for a possible divide-by-zero error when validating that the `stop_arg` member of the passed-in command is not too large. The first cause for the divide-by-zero is that calls to `comedi_bytes_per_scan()` are only valid once the command has been copied to `s->async->cmd`, but that copy is only done for the `COMEDI_CMD` ioctl. For the `COMEDI_CMDTEST` ioctl, it will use whatever was left there by the previous `COMEDI_CMD` ioctl, if any. (This is very likely, as it is usual for the application to use `COMEDI_CMDTEST` before `COMEDI_CMD`.) If there has been no previous, valid `COMEDI_CMD` for this subdevice, then `comedi_bytes_per_scan()` will return 0, so the subsequent division in `ni_cdio_cmdtest()` of `s->async->prealloc_bufsz / comedi_bytes_per_scan(s)` will be a divide-by-zero error. To fix this error, call a new function `comedi_bytes_per_scan_cmd(s, cmd)`, based on the existing `comedi_bytes_per_scan(s)` but using a specified `struct comedi_cmd` for its calculations. (Also refactor `comedi_bytes_per_scan()` to call the new function.) Once the first cause for the divide-by-zero has been fixed, the second cause is that `comedi_bytes_per_scan_cmd()` can legitimately return 0 if the `scan_end_arg` member of the `struct comedi_cmd` being tested is 0. Fix it by only performing the division (and validating that `stop_arg` is no more than the maximum value) if `comedi_bytes_per_scan_cmd()` returns a non-zero value. The problem was reported on the COMEDI mailing list here: https://groups.google.com/forum/#!topic/comedi_list/4t9WlHzMhKM Reported-by: Ivan Vasilyev Tested-by: Ivan Vasilyev Fixes: f164cbf98fa8 ("staging: comedi: ni_mio_common: add finite regeneration to dio output") Cc: # 4.6+ Cc: Spencer E. Olson Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/comedidev.h | 2 ++ drivers/staging/comedi/drivers.c | 33 ++++++++++++++++--- .../staging/comedi/drivers/ni_mio_common.c | 10 ++++-- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/drivers/staging/comedi/comedidev.h b/drivers/staging/comedi/comedidev.h index 1bb9986f865e..33f249af0063 100644 --- a/drivers/staging/comedi/comedidev.h +++ b/drivers/staging/comedi/comedidev.h @@ -992,6 +992,8 @@ int comedi_dio_insn_config(struct comedi_device *dev, unsigned int mask); unsigned int comedi_dio_update_state(struct comedi_subdevice *s, unsigned int *data); +unsigned int comedi_bytes_per_scan_cmd(struct comedi_subdevice *s, + struct comedi_cmd *cmd); unsigned int comedi_bytes_per_scan(struct comedi_subdevice *s); unsigned int comedi_nscans_left(struct comedi_subdevice *s, unsigned int nscans); diff --git a/drivers/staging/comedi/drivers.c b/drivers/staging/comedi/drivers.c index c11c22bd6d13..2e532219f08b 100644 --- a/drivers/staging/comedi/drivers.c +++ b/drivers/staging/comedi/drivers.c @@ -390,11 +390,13 @@ unsigned int comedi_dio_update_state(struct comedi_subdevice *s, EXPORT_SYMBOL_GPL(comedi_dio_update_state); /** - * comedi_bytes_per_scan() - Get length of asynchronous command "scan" in bytes + * comedi_bytes_per_scan_cmd() - Get length of asynchronous command "scan" in + * bytes * @s: COMEDI subdevice. + * @cmd: COMEDI command. * * Determines the overall scan length according to the subdevice type and the - * number of channels in the scan. + * number of channels in the scan for the specified command. * * For digital input, output or input/output subdevices, samples for * multiple channels are assumed to be packed into one or more unsigned @@ -404,9 +406,9 @@ EXPORT_SYMBOL_GPL(comedi_dio_update_state); * * Returns the overall scan length in bytes. */ -unsigned int comedi_bytes_per_scan(struct comedi_subdevice *s) +unsigned int comedi_bytes_per_scan_cmd(struct comedi_subdevice *s, + struct comedi_cmd *cmd) { - struct comedi_cmd *cmd = &s->async->cmd; unsigned int num_samples; unsigned int bits_per_sample; @@ -423,6 +425,29 @@ unsigned int comedi_bytes_per_scan(struct comedi_subdevice *s) } return comedi_samples_to_bytes(s, num_samples); } +EXPORT_SYMBOL_GPL(comedi_bytes_per_scan_cmd); + +/** + * comedi_bytes_per_scan() - Get length of asynchronous command "scan" in bytes + * @s: COMEDI subdevice. + * + * Determines the overall scan length according to the subdevice type and the + * number of channels in the scan for the current command. + * + * For digital input, output or input/output subdevices, samples for + * multiple channels are assumed to be packed into one or more unsigned + * short or unsigned int values according to the subdevice's %SDF_LSAMPL + * flag. For other types of subdevice, samples are assumed to occupy a + * whole unsigned short or unsigned int according to the %SDF_LSAMPL flag. + * + * Returns the overall scan length in bytes. + */ +unsigned int comedi_bytes_per_scan(struct comedi_subdevice *s) +{ + struct comedi_cmd *cmd = &s->async->cmd; + + return comedi_bytes_per_scan_cmd(s, cmd); +} EXPORT_SYMBOL_GPL(comedi_bytes_per_scan); static unsigned int __comedi_nscans_left(struct comedi_subdevice *s, diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index 158f3e83efb6..36361bdf934a 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -3523,6 +3523,7 @@ static int ni_cdio_check_chanlist(struct comedi_device *dev, static int ni_cdio_cmdtest(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_cmd *cmd) { + unsigned int bytes_per_scan; int err = 0; int tmp; @@ -3552,9 +3553,12 @@ static int ni_cdio_cmdtest(struct comedi_device *dev, err |= comedi_check_trigger_arg_is(&cmd->convert_arg, 0); err |= comedi_check_trigger_arg_is(&cmd->scan_end_arg, cmd->chanlist_len); - err |= comedi_check_trigger_arg_max(&cmd->stop_arg, - s->async->prealloc_bufsz / - comedi_bytes_per_scan(s)); + bytes_per_scan = comedi_bytes_per_scan_cmd(s, cmd); + if (bytes_per_scan) { + err |= comedi_check_trigger_arg_max(&cmd->stop_arg, + s->async->prealloc_bufsz / + bytes_per_scan); + } if (err) return 3; From 03f4d949bbd7cd5b979a3c756bdf575eb4cc3b96 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Wed, 27 Mar 2019 18:45:26 +0000 Subject: [PATCH 2578/2608] staging: vt6655: Remove vif check from vnt_interrupt commit cc26358f89c3e493b54766b1ca56cfc6b14db78a upstream. A check for vif is made in vnt_interrupt_work. There is a small chance of leaving interrupt disabled while vif is NULL and the work hasn't been scheduled. Signed-off-by: Malcolm Priestley CC: stable@vger.kernel.org # v4.2+ Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6655/device_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 1123b4f1e1d6..38a1ef15e25e 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -1079,8 +1079,7 @@ static irqreturn_t vnt_interrupt(int irq, void *arg) { struct vnt_private *priv = arg; - if (priv->vif) - schedule_work(&priv->interrupt_work); + schedule_work(&priv->interrupt_work); return IRQ_HANDLED; } From 6c8f9d93c2c7861c0caffe3c9b378c891c92ef39 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Sun, 24 Mar 2019 18:53:49 +0000 Subject: [PATCH 2579/2608] staging: vt6655: Fix interrupt race condition on device start up. commit 3b9c2f2e0e99bb67c96abcb659b3465efe3bee1f upstream. It appears on some slower systems that the driver can find its way out of the workqueue while the interrupt is disabled by continuous polling by it. Move MACvIntEnable to vnt_interrupt_work so that it is always enabled on all routes out of vnt_interrupt_process. Move MACvIntDisable so that the device doesn't keep polling the system while the workqueue is being processed. Signed-off-by: Malcolm Priestley CC: stable@vger.kernel.org # v4.2+ Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6655/device_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 38a1ef15e25e..84a915199e64 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -973,8 +973,6 @@ static void vnt_interrupt_process(struct vnt_private *priv) return; } - MACvIntDisable(priv->PortOffset); - spin_lock_irqsave(&priv->lock, flags); /* Read low level stats */ @@ -1062,8 +1060,6 @@ static void vnt_interrupt_process(struct vnt_private *priv) } spin_unlock_irqrestore(&priv->lock, flags); - - MACvIntEnable(priv->PortOffset, IMR_MASK_VALUE); } static void vnt_interrupt_work(struct work_struct *work) @@ -1073,6 +1069,8 @@ static void vnt_interrupt_work(struct work_struct *work) if (priv->vif) vnt_interrupt_process(priv); + + MACvIntEnable(priv->PortOffset, IMR_MASK_VALUE); } static irqreturn_t vnt_interrupt(int irq, void *arg) @@ -1081,6 +1079,8 @@ static irqreturn_t vnt_interrupt(int irq, void *arg) schedule_work(&priv->interrupt_work); + MACvIntDisable(priv->PortOffset); + return IRQ_HANDLED; } From 0d42abfacfcedc695964d655495c0dbb34e22647 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Mon, 18 Mar 2019 18:44:14 -0500 Subject: [PATCH 2580/2608] serial: max310x: Fix to avoid potential NULL pointer dereference commit 3a10e3dd52e80b9a97a3346020024d17b2c272d6 upstream. of_match_device can return a NULL pointer when matching device is not found. This patch avoids a scenario causing NULL pointer derefernce. Signed-off-by: Aditya Pakki Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 9dfedbe6c071..54660002271a 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1323,6 +1323,8 @@ static int max310x_spi_probe(struct spi_device *spi) if (spi->dev.of_node) { const struct of_device_id *of_id = of_match_device(max310x_dt_ids, &spi->dev); + if (!of_id) + return -ENODEV; devtype = (struct max310x_devtype *)of_id->data; } else { From be8c93469b723a77168953bced05bc33048f5af5 Mon Sep 17 00:00:00 2001 From: Hoan Nguyen An Date: Mon, 18 Mar 2019 18:26:32 +0900 Subject: [PATCH 2581/2608] serial: sh-sci: Fix setting SCSCR_TIE while transferring data commit 93bcefd4c6bad4c69dbc4edcd3fbf774b24d930d upstream. We disable transmission interrupt (clear SCSCR_TIE) after all data has been transmitted (if uart_circ_empty(xmit)). While transmitting, if the data is still in the tty buffer, re-enable the SCSCR_TIE bit, which was done at sci_start_tx(). This is unnecessary processing, wasting CPU operation if the data transmission length is large. And further, transmit end, FIFO empty bits disabling have also been performed in the step above. Signed-off-by: Hoan Nguyen An Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 37dba940d898..d5f933ec153c 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -806,19 +806,9 @@ static void sci_transmit_chars(struct uart_port *port) if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(port); - if (uart_circ_empty(xmit)) { + if (uart_circ_empty(xmit)) sci_stop_tx(port); - } else { - ctrl = serial_port_in(port, SCSCR); - if (port->type != PORT_SCI) { - serial_port_in(port, SCxSR); /* Dummy read */ - sci_clear_SCxSR(port, SCxSR_TDxE_CLEAR(port)); - } - - ctrl |= SCSCR_TIE; - serial_port_out(port, SCSCR, ctrl); - } } /* On SH3, SCIF may read end-of-break as a space->mark char */ From e76f00ea116822d34f7c6f4763f74faf1ac5f273 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 27 Mar 2019 10:11:14 +0900 Subject: [PATCH 2582/2608] USB: serial: cp210x: add new device id commit a595ecdd5f60b2d93863cebb07eec7f935839b54 upstream. Lorenz Messtechnik has a device that is controlled by the cp210x driver, so add the device id to the driver. The device id was provided by Silicon-Labs for the devices from this vendor. Reported-by: Uli Signed-off-by: Greg Kroah-Hartman Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index d8e6790ccffe..98e466c3cfca 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -83,6 +83,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x804E) }, /* Software Bisque Paramount ME build-in converter */ { USB_DEVICE(0x10C4, 0x8053) }, /* Enfora EDG1228 */ { USB_DEVICE(0x10C4, 0x8054) }, /* Enfora GSM2228 */ + { USB_DEVICE(0x10C4, 0x8056) }, /* Lorenz Messtechnik devices */ { USB_DEVICE(0x10C4, 0x8066) }, /* Argussoft In-System Programmer */ { USB_DEVICE(0x10C4, 0x806F) }, /* IMS USB to RS422 Converter Cable */ { USB_DEVICE(0x10C4, 0x807A) }, /* Crumb128 board */ From 40c93e6893636fcae2f92c200c587da24d3facca Mon Sep 17 00:00:00 2001 From: George McCollister Date: Tue, 5 Mar 2019 16:05:03 -0600 Subject: [PATCH 2583/2608] USB: serial: ftdi_sio: add additional NovaTech products commit 422c2537ba9d42320f8ab6573940269f87095320 upstream. Add PIDs for the NovaTech OrionLX+ and Orion I/O so they can be automatically detected. Signed-off-by: George McCollister Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 2 ++ drivers/usb/serial/ftdi_sio_ids.h | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index d45a2c352c98..e76395d7f17d 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -604,6 +604,8 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLXM_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLX_PLUS_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_NT_ORION_IO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SYNAPSE_SS200_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX2_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index b863bedb55a1..5755f0df0025 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -567,7 +567,9 @@ /* * NovaTech product ids (FTDI_VID) */ -#define FTDI_NT_ORIONLXM_PID 0x7c90 /* OrionLXm Substation Automation Platform */ +#define FTDI_NT_ORIONLXM_PID 0x7c90 /* OrionLXm Substation Automation Platform */ +#define FTDI_NT_ORIONLX_PLUS_PID 0x7c91 /* OrionLX+ Substation Automation Platform */ +#define FTDI_NT_ORION_IO_PID 0x7c92 /* Orion I/O */ /* * Synapse Wireless product ids (FTDI_VID) From 079d7d8e2a6119d4cc7e02852f0ed6f994a8df8a Mon Sep 17 00:00:00 2001 From: Lin Yi Date: Wed, 20 Mar 2019 19:04:56 +0800 Subject: [PATCH 2584/2608] USB: serial: mos7720: fix mos_parport refcount imbalance on error path commit 2908b076f5198d231de62713cb2b633a3a4b95ac upstream. The write_parport_reg_nonblock() helper takes a reference to the struct mos_parport, but failed to release it in a couple of error paths after allocation failures, leading to a memory leak. Johan said that move the kref_get() and mos_parport assignment to the end of urbtrack initialisation is a better way, so move it. and mos_parport do not used until urbtrack initialisation. Signed-off-by: Lin Yi Fixes: b69578df7e98 ("USB: usbserial: mos7720: add support for parallel port on moschip 7715") Cc: stable # 2.6.35 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7720.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index a453965f9e9a..393a91ab56ed 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -368,8 +368,6 @@ static int write_parport_reg_nonblock(struct mos7715_parport *mos_parport, if (!urbtrack) return -ENOMEM; - kref_get(&mos_parport->ref_count); - urbtrack->mos_parport = mos_parport; urbtrack->urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urbtrack->urb) { kfree(urbtrack); @@ -390,6 +388,8 @@ static int write_parport_reg_nonblock(struct mos7715_parport *mos_parport, usb_sndctrlpipe(usbdev, 0), (unsigned char *)urbtrack->setup, NULL, 0, async_complete, urbtrack); + kref_get(&mos_parport->ref_count); + urbtrack->mos_parport = mos_parport; kref_init(&urbtrack->ref_count); INIT_LIST_HEAD(&urbtrack->urblist_entry); From d93cb34931281dfc0cbab4563a13b8c6ffc340f6 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Tue, 26 Feb 2019 17:07:10 +0000 Subject: [PATCH 2585/2608] USB: serial: option: set driver_info for SIM5218 and compatibles commit f8df5c2c3e2df5ffaf9fb5503da93d477a8c7db4 upstream. The SIMCom SIM5218 and compatible devices have 5 USB interfaces, only 4 of which are serial ports. The fifth is a network interface supported by the qmi-wwan driver. Furthermore, the serial ports do not support modem control signals. Add driver_info flags to reflect this. Signed-off-by: Mans Rullgard Fixes: ec0cd94d881c ("usb: option: add SIMCom SIM5218") Cc: stable # 3.2 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index bf72245f1cea..f490ea65a570 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1069,7 +1069,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3) }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */ - { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000), /* SIMCom SIM5218 */ + .driver_info = NCTRL(0) | NCTRL(1) | NCTRL(2) | NCTRL(3) | RSVD(4) }, /* Quectel products using Qualcomm vendor ID */ { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)}, { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20), From 7502ab66ed08eef112babadeddc5f0250261fb2f Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Sat, 2 Mar 2019 13:35:53 +0100 Subject: [PATCH 2586/2608] USB: serial: option: add support for Quectel EM12 commit d1252f0237238b912c3e7a51bf237acf34c97983 upstream. The Quectel EM12 is a Cat. 12 LTE modem. It behaves in the exactly the same way as the EP06 (including the dynamic configuration behavior), so the same checks on reserved interfaces, etc. are needed. Signed-off-by: Kristian Evensen Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index f490ea65a570..157503c0692c 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -249,6 +249,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EC25 0x0125 #define QUECTEL_PRODUCT_BG96 0x0296 #define QUECTEL_PRODUCT_EP06 0x0306 +#define QUECTEL_PRODUCT_EM12 0x0512 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -1091,6 +1092,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff), + .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), From 29c87d2264ed8df669a54296d45d4df83e9cf6c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 27 Mar 2019 15:25:32 +0100 Subject: [PATCH 2587/2608] USB: serial: option: add Olicard 600 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 84f3b43f7378b98b7e3096d5499de75183d4347c upstream. This is a Qualcomm based device with a QMI function on interface 4. It is mode switched from 2020:2030 using a standard eject message. T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 6 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2020 ProdID=2031 Rev= 2.32 S: Manufacturer=Mobile Connect S: Product=Mobile Connect S: SerialNumber=0123456789ABCDEF C:* #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none) E: Ad=8a(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=125us Cc: stable@vger.kernel.org Signed-off-by: Bjørn Mork [ johan: use tabs to align comments in adjacent lines ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 157503c0692c..3311f569aa17 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1947,10 +1947,12 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e35, 0xff), /* D-Link DWM-222 */ .driver_info = RSVD(4) }, - { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ - { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ - { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ - { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ + { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2031, 0xff), /* Olicard 600 */ + .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) }, { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) }, From 0209f339b4db01a1555f769d75648df761059dfa Mon Sep 17 00:00:00 2001 From: Wentao Wang Date: Wed, 20 Mar 2019 15:30:39 +0000 Subject: [PATCH 2588/2608] Disable kgdboc failed by echo space to /sys/module/kgdboc/parameters/kgdboc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3ec8002951ea173e24b466df1ea98c56b7920e63 upstream. Echo "" to /sys/module/kgdboc/parameters/kgdboc will fail with "No such device” error. This is caused by function "configure_kgdboc" who init err to ENODEV when the config is empty (legal input) the code go out with ENODEV returned. Fixes: 2dd453168643 ("kgdboc: Fix restrict error") Signed-off-by: Wentao Wang Cc: stable Acked-by: Daniel Thompson Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/kgdboc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index f2b0d8cee8ef..0314e78e31ff 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -148,8 +148,10 @@ static int configure_kgdboc(void) char *cptr = config; struct console *cons; - if (!strlen(config) || isspace(config[0])) + if (!strlen(config) || isspace(config[0])) { + err = 0; goto noconfig; + } kgdboc_io_ops.is_console = 0; kgdb_tty_driver = NULL; From 0d9ef3f5b07e4b6fe6105a45603de1e133e4df49 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 28 Mar 2019 20:44:40 -0700 Subject: [PATCH 2589/2608] fs/proc/proc_sysctl.c: fix NULL pointer dereference in put_links commit 23da9588037ecdd4901db76a5b79a42b529c4ec3 upstream. Syzkaller reports: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI CPU: 1 PID: 5373 Comm: syz-executor.0 Not tainted 5.0.0-rc8+ #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:put_links+0x101/0x440 fs/proc/proc_sysctl.c:1599 Code: 00 0f 85 3a 03 00 00 48 8b 43 38 48 89 44 24 20 48 83 c0 38 48 89 c2 48 89 44 24 28 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 fe 02 00 00 48 8b 74 24 20 48 c7 c7 60 2a 9d 91 RSP: 0018:ffff8881d828f238 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffff8881e01b1140 RCX: ffffffff8ee98267 RDX: 0000000000000007 RSI: ffffc90001479000 RDI: ffff8881e01b1178 RBP: dffffc0000000000 R08: ffffed103ee27259 R09: ffffed103ee27259 R10: 0000000000000001 R11: ffffed103ee27258 R12: fffffffffffffff4 R13: 0000000000000006 R14: ffff8881f59838c0 R15: dffffc0000000000 FS: 00007f072254f700(0000) GS:ffff8881f7100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fff8b286668 CR3: 00000001f0542002 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: drop_sysctl_table+0x152/0x9f0 fs/proc/proc_sysctl.c:1629 get_subdir fs/proc/proc_sysctl.c:1022 [inline] __register_sysctl_table+0xd65/0x1090 fs/proc/proc_sysctl.c:1335 br_netfilter_init+0xbc/0x1000 [br_netfilter] do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f072254ec58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000280 RDI: 0000000000000003 RBP: 00007f072254ec70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f072254f6bc R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004 Modules linked in: br_netfilter(+) dvb_usb_dibusb_mc_common dib3000mc dibx000_common dvb_usb_dibusb_common dvb_usb_dw2102 dvb_usb classmate_laptop palmas_regulator cn videobuf2_v4l2 v4l2_common snd_soc_bd28623 mptbase snd_usb_usx2y snd_usbmidi_lib snd_rawmidi wmi libnvdimm lockd sunrpc grace rc_kworld_pc150u rc_core rtc_da9063 sha1_ssse3 i2c_cros_ec_tunnel adxl34x_spi adxl34x nfnetlink lib80211 i5500_temp dvb_as102 dvb_core videobuf2_common videodev media videobuf2_vmalloc videobuf2_memops udc_core lnbp22 leds_lp3952 hid_roccat_ryos s1d13xxxfb mtd vport_geneve openvswitch nf_conncount nf_nat_ipv6 nsh geneve udp_tunnel ip6_udp_tunnel snd_soc_mt6351 sis_agp phylink snd_soc_adau1761_spi snd_soc_adau1761 snd_soc_adau17x1 snd_soc_core snd_pcm_dmaengine ac97_bus snd_compress snd_soc_adau_utils snd_soc_sigmadsp_regmap snd_soc_sigmadsp raid_class hid_roccat_konepure hid_roccat_common hid_roccat c2port_duramar2150 core mdio_bcm_unimac iptable_security iptable_raw iptable_mangle iptable_nat nf_nat_ipv4 nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim devlink vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel joydev mousedev ide_pci_generic piix aesni_intel aes_x86_64 ide_core crypto_simd atkbd cryptd glue_helper serio_raw ata_generic pata_acpi i2c_piix4 floppy sch_fq_codel ip_tables x_tables ipv6 [last unloaded: lm73] Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace 770020de38961fd0 ]--- A new dir entry can be created in get_subdir and its 'header->parent' is set to NULL. Only after insert_header success, it will be set to 'dir', otherwise 'header->parent' is set to NULL and drop_sysctl_table is called. However in err handling path of get_subdir, drop_sysctl_table also be called on 'new->header' regardless its value of parent pointer. Then put_links is called, which triggers NULL-ptr deref when access member of header->parent. In fact we have multiple error paths which call drop_sysctl_table() there, upon failure on insert_links() we also call drop_sysctl_table().And even in the successful case on __register_sysctl_table() we still always call drop_sysctl_table().This patch fix it. Link: http://lkml.kernel.org/r/20190314085527.13244-1-yuehaibing@huawei.com Fixes: 0e47c99d7fe25 ("sysctl: Replace root_list with links between sysctl_table_sets") Signed-off-by: YueHaibing Reported-by: Hulk Robot Acked-by: Luis Chamberlain Cc: Kees Cook Cc: Alexey Dobriyan Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Al Viro Cc: Eric W. Biederman Cc: [3.4+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/proc_sysctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index f69c545f5868..8d5422bb9c1a 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1620,7 +1620,8 @@ static void drop_sysctl_table(struct ctl_table_header *header) if (--header->nreg) return; - put_links(header); + if (parent) + put_links(header); start_unregistering(header); if (!--header->count) kfree_rcu(header, rcu); From 2ea1c197bfbd166723c788f3e346265b3f8f27c0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 26 Feb 2019 13:44:51 -0800 Subject: [PATCH 2590/2608] drm/vgem: fix use-after-free when drm_gem_handle_create() fails commit 21d2b122732318b48c10b7262e15595ce54511d3 upstream. If drm_gem_handle_create() fails in vgem_gem_create(), then the drm_vgem_gem_object is freed twice: once when the reference is dropped by drm_gem_object_put_unlocked(), and again by __vgem_gem_destroy(). This was hit by syzkaller using fault injection. Fix it by skipping the second free. Reported-by: syzbot+e73f2fb5ed5a5df36d33@syzkaller.appspotmail.com Fixes: af33a9190d02 ("drm/vgem: Enable dmabuf import interfaces") Reviewed-by: Chris Wilson Cc: Laura Abbott Cc: Daniel Vetter Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Acked-by: Laura Abbott Signed-off-by: Rodrigo Siqueira Link: https://patchwork.freedesktop.org/patch/msgid/20190226214451.195123-1-ebiggers@kernel.org Signed-off-by: Maxime Ripard Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vgem/vgem_drv.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index 81c7ab10c083..aa592277d510 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -192,13 +192,9 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev, ret = drm_gem_handle_create(file, &obj->base, handle); drm_gem_object_put_unlocked(&obj->base); if (ret) - goto err; + return ERR_PTR(ret); return &obj->base; - -err: - __vgem_gem_destroy(obj); - return ERR_PTR(ret); } static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev, From e708e5db25085d42481dfc8da49cac67e3b103b2 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 8 Mar 2019 22:07:57 -0600 Subject: [PATCH 2591/2608] gpio: exar: add a check for the return value of ida_simple_get fails commit 7ecced0934e574b528a1ba6c237731e682216a74 upstream. ida_simple_get may fail and return a negative error number. The fix checks its return value; if it fails, go to err_destroy. Cc: Signed-off-by: Kangjie Lu Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-exar.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/gpio-exar.c b/drivers/gpio/gpio-exar.c index 0ecd2369c2ca..a09d2f9ebacc 100644 --- a/drivers/gpio/gpio-exar.c +++ b/drivers/gpio/gpio-exar.c @@ -148,6 +148,8 @@ static int gpio_exar_probe(struct platform_device *pdev) mutex_init(&exar_gpio->lock); index = ida_simple_get(&ida_index, 0, 0, GFP_KERNEL); + if (index < 0) + goto err_destroy; sprintf(exar_gpio->name, "exar_gpio%d", index); exar_gpio->gpio_chip.label = exar_gpio->name; From 5d4cc25d578cf253a76b14c1ea3ebbc825b2f38a Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 11 Mar 2019 21:29:37 +0800 Subject: [PATCH 2592/2608] gpio: adnp: Fix testing wrong value in adnp_gpio_direction_input commit c5bc6e526d3f217ed2cc3681d256dc4a2af4cc2b upstream. Current code test wrong value so it does not verify if the written data is correctly read back. Fix it. Also make it return -EPERM if read value does not match written bit, just like it done for adnp_gpio_direction_output(). Fixes: 5e969a401a01 ("gpio: Add Avionic Design N-bit GPIO expander support") Cc: Signed-off-by: Axel Lin Reviewed-by: Thierry Reding Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-adnp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-adnp.c b/drivers/gpio/gpio-adnp.c index 89863ea25de1..ee923b1b820c 100644 --- a/drivers/gpio/gpio-adnp.c +++ b/drivers/gpio/gpio-adnp.c @@ -132,8 +132,10 @@ static int adnp_gpio_direction_input(struct gpio_chip *chip, unsigned offset) if (err < 0) goto out; - if (err & BIT(pos)) - err = -EACCES; + if (value & BIT(pos)) { + err = -EPERM; + goto out; + } err = 0; From d05885ad4d32582da1716f1122cfa28f976abb7d Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 22 Mar 2019 16:51:07 +0800 Subject: [PATCH 2593/2608] phy: sun4i-usb: Support set_mode to USB_HOST for non-OTG PHYs commit 1396929e8a903db80425343cacca766a18ad6409 upstream. While only the first PHY supports mode switching, the remaining PHYs work in USB host mode. They should support set_mode with mode=USB_HOST instead of failing. This is especially needed now that the USB core does set_mode for all USB ports, which was added in commit b97a31348379 ("usb: core: comply to PHY framework"). Make set_mode with mode=USB_HOST a no-op instead of failing for the non-OTG USB PHYs. Fixes: 6ba43c291961 ("phy-sun4i-usb: Add support for phy_set_mode") Signed-off-by: Chen-Yu Tsai Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/phy/allwinner/phy-sun4i-usb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/phy/allwinner/phy-sun4i-usb.c b/drivers/phy/allwinner/phy-sun4i-usb.c index d1ccff527756..4d34dfb64998 100644 --- a/drivers/phy/allwinner/phy-sun4i-usb.c +++ b/drivers/phy/allwinner/phy-sun4i-usb.c @@ -480,8 +480,11 @@ static int sun4i_usb_phy_set_mode(struct phy *_phy, enum phy_mode mode) struct sun4i_usb_phy_data *data = to_sun4i_usb_phy_data(phy); int new_mode; - if (phy->index != 0) + if (phy->index != 0) { + if (mode == PHY_MODE_USB_HOST) + return 0; return -EINVAL; + } switch (mode) { case PHY_MODE_USB_HOST: From 54333dcc9d341f0753d18ccce76f604d135ed60a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Mar 2019 14:54:30 +0100 Subject: [PATCH 2594/2608] usb: mtu3: fix EXTCON dependency commit 3d54d10c6afed34fd45b852bf76f55e8da31d8ef upstream. When EXTCON is a loadable module, mtu3 fails to link as built-in: drivers/usb/mtu3/mtu3_plat.o: In function `mtu3_probe': mtu3_plat.c:(.text+0x690): undefined reference to `extcon_get_edev_by_phandle' Add a Kconfig dependency to force mtu3 also to be a loadable module if extconn is, but still allow it to be built without extcon. Fixes: d0ed062a8b75 ("usb: mtu3: dual-role mode support") Signed-off-by: Arnd Bergmann Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/mtu3/Kconfig b/drivers/usb/mtu3/Kconfig index 25cd61947bee..a213ce94f6eb 100644 --- a/drivers/usb/mtu3/Kconfig +++ b/drivers/usb/mtu3/Kconfig @@ -4,6 +4,7 @@ config USB_MTU3 tristate "MediaTek USB3 Dual Role controller" depends on EXTCON && (USB || USB_GADGET) && HAS_DMA depends on ARCH_MEDIATEK || COMPILE_TEST + depends on EXTCON || !EXTCON select USB_XHCI_MTK if USB_SUPPORT && USB_XHCI_HCD help Say Y or M here if your system runs on MediaTek SoCs with From 3e043e5d7f8cfcfa960ea8c76c556c34c1fb8cef Mon Sep 17 00:00:00 2001 From: Radoslav Gerganov Date: Tue, 5 Mar 2019 10:10:34 +0000 Subject: [PATCH 2595/2608] USB: gadget: f_hid: fix deadlock in f_hidg_write() commit 072684e8c58d17e853f8e8b9f6d9ce2e58d2b036 upstream. In f_hidg_write() the write_spinlock is acquired before calling usb_ep_queue() which causes a deadlock when dummy_hcd is being used. This is because dummy_queue() callbacks into f_hidg_req_complete() which tries to acquire the same spinlock. This is (part of) the backtrace when the deadlock occurs: 0xffffffffc06b1410 in f_hidg_req_complete 0xffffffffc06a590a in usb_gadget_giveback_request 0xffffffffc06cfff2 in dummy_queue 0xffffffffc06a4b96 in usb_ep_queue 0xffffffffc06b1eb6 in f_hidg_write 0xffffffff8127730b in __vfs_write 0xffffffff812774d1 in vfs_write 0xffffffff81277725 in SYSC_write Fix this by releasing the write_spinlock before calling usb_ep_queue() Reviewed-by: James Bottomley Tested-by: James Bottomley Cc: stable@vger.kernel.org # 4.11+ Fixes: 749494b6bdbb ("usb: gadget: f_hid: fix: Move IN request allocation to set_alt()") Signed-off-by: Radoslav Gerganov Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index d8e359ef6eb1..63f6e344d5b0 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -395,20 +395,20 @@ try_again: req->complete = f_hidg_req_complete; req->context = hidg; + spin_unlock_irqrestore(&hidg->write_spinlock, flags); + status = usb_ep_queue(hidg->in_ep, req, GFP_ATOMIC); if (status < 0) { ERROR(hidg->func.config->cdev, "usb_ep_queue error on int endpoint %zd\n", status); - goto release_write_pending_unlocked; + goto release_write_pending; } else { status = count; } - spin_unlock_irqrestore(&hidg->write_spinlock, flags); return status; release_write_pending: spin_lock_irqsave(&hidg->write_spinlock, flags); -release_write_pending_unlocked: hidg->write_pending = 0; spin_unlock_irqrestore(&hidg->write_spinlock, flags); From 60397a4e109bbfaa88d1a0b26e17228f7118b6f3 Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Fri, 1 Mar 2019 11:05:45 +0000 Subject: [PATCH 2596/2608] usb: common: Consider only available nodes for dr_mode commit 238e0268c82789e4c107a37045d529a6dbce51a9 upstream. There are cases where multiple device tree nodes point to the same phy node by means of the "phys" property, but we should only consider those nodes that are marked as available rather than just any node. Fixes: 98bfb3946695 ("usb: of: add an api to get dr_mode by the phy node") Cc: stable@vger.kernel.org # v4.4+ Signed-off-by: Fabrizio Castro Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/common/common.c b/drivers/usb/common/common.c index 552ff7ac5a6b..c1ab14145f62 100644 --- a/drivers/usb/common/common.c +++ b/drivers/usb/common/common.c @@ -148,6 +148,8 @@ enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *np, int arg0) do { controller = of_find_node_with_property(controller, "phys"); + if (!of_device_is_available(controller)) + continue; index = 0; do { if (arg0 == -1) { From dc4267dceaca44f938e7fbfb94d9445dfdeef996 Mon Sep 17 00:00:00 2001 From: Yasushi Asano Date: Mon, 18 Feb 2019 11:26:34 +0100 Subject: [PATCH 2597/2608] usb: host: xhci-rcar: Add XHCI_TRUST_TX_LENGTH quirk commit 40fc165304f0faaae78b761f8ee30b5d216b1850 upstream. When plugging BUFFALO LUA4-U3-AGT USB3.0 to Gigabit Ethernet LAN Adapter, warning messages filled up dmesg. [ 101.098287] xhci-hcd ee000000.usb: WARN Successful completion on short TX for slot 1 ep 4: needs XHCI_TRUST_TX_LENGTH quirk? [ 101.117463] xhci-hcd ee000000.usb: WARN Successful completion on short TX for slot 1 ep 4: needs XHCI_TRUST_TX_LENGTH quirk? [ 101.136513] xhci-hcd ee000000.usb: WARN Successful completion on short TX for slot 1 ep 4: needs XHCI_TRUST_TX_LENGTH quirk? Adding the XHCI_TRUST_TX_LENGTH quirk resolves the issue. Signed-off-by: Yasushi Asano Signed-off-by: Spyridon Papageorgiou Acked-by: Yoshihiro Shimoda Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-rcar.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-rcar.c b/drivers/usb/host/xhci-rcar.c index 97f23cc31f4c..425c2edfd6ea 100644 --- a/drivers/usb/host/xhci-rcar.c +++ b/drivers/usb/host/xhci-rcar.c @@ -236,6 +236,7 @@ int xhci_rcar_init_quirk(struct usb_hcd *hcd) xhci_rcar_is_gen3(hcd->self.controller)) xhci->quirks |= XHCI_NO_64BIT_SUPPORT; + xhci->quirks |= XHCI_TRUST_TX_LENGTH; return xhci_rcar_download_firmware(hcd); } From 09fa576a74c2b6872a617b304e62acb1183e5254 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 22 Mar 2019 17:50:15 +0200 Subject: [PATCH 2598/2608] xhci: Fix port resume done detection for SS ports with LPM enabled commit 6cbcf596934c8e16d6288c7cc62dfb7ad8eadf15 upstream. A suspended SS port in U3 link state will go to U0 when resumed, but can almost immediately after that enter U1 or U2 link power save states before host controller driver reads the port status. Host controller driver only checks for U0 state, and might miss the finished resume, leaving flags unclear and skip notifying usb code of the wake. Add U1 and U2 to the possible link states when checking for finished port resume. Cc: stable Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index aa230706b875..9a7e77a09080 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1715,10 +1715,13 @@ static void handle_port_status(struct xhci_hcd *xhci, } } - if ((portsc & PORT_PLC) && (portsc & PORT_PLS_MASK) == XDEV_U0 && - DEV_SUPERSPEED_ANY(portsc)) { + if ((portsc & PORT_PLC) && + DEV_SUPERSPEED_ANY(portsc) && + ((portsc & PORT_PLS_MASK) == XDEV_U0 || + (portsc & PORT_PLS_MASK) == XDEV_U1 || + (portsc & PORT_PLS_MASK) == XDEV_U2)) { xhci_dbg(xhci, "resume SS port %d finished\n", port_id); - /* We've just brought the device into U0 through either the + /* We've just brought the device into U0/1/2 through either the * Resume state after a device remote wakeup, or through the * U3Exit state after a host-initiated resume. If it's a device * initiated remote wake, don't pass up the link state change, From 35b7c12b3a9561db89bbce1476826d7f4ba97fd8 Mon Sep 17 00:00:00 2001 From: Romain Izard Date: Fri, 22 Mar 2019 16:53:02 +0100 Subject: [PATCH 2599/2608] usb: cdc-acm: fix race during wakeup blocking TX traffic commit 93e1c8a638308980309e009cc40b5a57ef87caf1 upstream. When the kernel is compiled with preemption enabled, the URB completion handler can run in parallel with the work responsible for waking up the tty layer. If the URB handler sets the EVENT_TTY_WAKEUP bit during the call to tty_port_tty_wakeup() to signal that there is room for additional input, it will be cleared at the end of this call. As a result, TX traffic on the upper layer will be blocked. This can be seen with a kernel configured with CONFIG_PREEMPT, and a fast modem connected with PPP running over a USB CDC-ACM port. Use test_and_clear_bit() instead, which ensures that each wakeup requested by the URB completion code will trigger a call to tty_port_tty_wakeup(). Fixes: 1aba579f3cf5 cdc-acm: handle read pipe errors Signed-off-by: Romain Izard Cc: stable Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 8ab0195f8d32..f736c8895089 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -570,10 +570,8 @@ static void acm_softint(struct work_struct *work) clear_bit(EVENT_RX_STALL, &acm->flags); } - if (test_bit(EVENT_TTY_WAKEUP, &acm->flags)) { + if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags)) tty_port_tty_wakeup(&acm->port); - clear_bit(EVENT_TTY_WAKEUP, &acm->flags); - } } /* From 4774a369518091f46435e0539de6a45bf0681c74 Mon Sep 17 00:00:00 2001 From: Lars Persson Date: Thu, 28 Mar 2019 20:44:28 -0700 Subject: [PATCH 2600/2608] mm/migrate.c: add missing flush_dcache_page for non-mapped page migrate commit d2b2c6dd227ba5b8a802858748ec9a780cb75b47 upstream. Our MIPS 1004Kc SoCs were seeing random userspace crashes with SIGILL and SIGSEGV that could not be traced back to a userspace code bug. They had all the magic signs of an I/D cache coherency issue. Now recently we noticed that the /proc/sys/vm/compact_memory interface was quite efficient at provoking this class of userspace crashes. Studying the code in mm/migrate.c there is a distinction made between migrating a page that is mapped at the instant of migration and one that is not mapped. Our problem turned out to be the non-mapped pages. For the non-mapped page the code performs a copy of the page content and all relevant meta-data of the page without doing the required D-cache maintenance. This leaves dirty data in the D-cache of the CPU and on the 1004K cores this data is not visible to the I-cache. A subsequent page-fault that triggers a mapping of the page will happily serve the process with potentially stale code. What about ARM then, this bug should have seen greater exposure? Well ARM became immune to this flaw back in 2010, see commit c01778001a4f ("ARM: 6379/1: Assume new page cache pages have dirty D-cache"). My proposed fix moves the D-cache maintenance inside move_to_new_page to make it common for both cases. Link: http://lkml.kernel.org/r/20190315083502.11849-1-larper@axis.com Fixes: 97ee0524614 ("flush cache before installing new page at migraton") Signed-off-by: Lars Persson Reviewed-by: Paul Burton Acked-by: Mel Gorman Cc: Ralf Baechle Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/migrate.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 877269339fa7..9a3ce8847308 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -247,10 +247,8 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, pte = swp_entry_to_pte(entry); } else if (is_device_public_page(new)) { pte = pte_mkdevmap(pte); - flush_dcache_page(new); } - } else - flush_dcache_page(new); + } #ifdef CONFIG_HUGETLB_PAGE if (PageHuge(new)) { @@ -971,6 +969,13 @@ static int move_to_new_page(struct page *newpage, struct page *page, */ if (!PageMappingFlags(page)) page->mapping = NULL; + + if (unlikely(is_zone_device_page(newpage))) { + if (is_device_public_page(newpage)) + flush_dcache_page(newpage); + } else + flush_dcache_page(newpage); + } out: return rc; From 25e902d6661a63dd4f8901ec2ea16fee94309958 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2019 15:51:35 +0200 Subject: [PATCH 2601/2608] perf intel-pt: Fix TSC slip commit f3b4e06b3bda759afd042d3d5fa86bea8f1fe278 upstream. A TSC packet can slip past MTC packets so that the timestamp appears to go backwards. One estimate is that can be up to about 40 CPU cycles, which is certainly less than 0x1000 TSC ticks, but accept slippage an order of magnitude more to be on the safe side. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Fixes: 79b58424b821c ("perf tools: Add Intel PT support for decoding MTC packets") Link: http://lkml.kernel.org/r/20190325135135.18348-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- .../util/intel-pt-decoder/intel-pt-decoder.c | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index f3db68abbd9a..0bc3e6e93c31 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -251,19 +251,15 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d)) decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n / decoder->tsc_ctc_ratio_d; - - /* - * Allow for timestamps appearing to backwards because a TSC - * packet has slipped past a MTC packet, so allow 2 MTC ticks - * or ... - */ - decoder->tsc_slip = multdiv(2 << decoder->mtc_shift, - decoder->tsc_ctc_ratio_n, - decoder->tsc_ctc_ratio_d); } - /* ... or 0x100 paranoia */ - if (decoder->tsc_slip < 0x100) - decoder->tsc_slip = 0x100; + + /* + * A TSC packet can slip past MTC packets so that the timestamp appears + * to go backwards. One estimate is that can be up to about 40 CPU + * cycles, which is certainly less than 0x1000 TSC ticks, but accept + * slippage an order of magnitude more to be on the safe side. + */ + decoder->tsc_slip = 0x10000; intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift); intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n); From 5f6b5b8b609bbe3d40b95aa611f66ab967fb2011 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 Mar 2019 17:36:05 +0100 Subject: [PATCH 2602/2608] cpu/hotplug: Prevent crash when CPU bringup fails on CONFIG_HOTPLUG_CPU=n commit 206b92353c839c0b27a0b9bec24195f93fd6cf7a upstream. Tianyu reported a crash in a CPU hotplug teardown callback when booting a kernel which has CONFIG_HOTPLUG_CPU disabled with the 'nosmt' boot parameter. It turns out that the SMP=y CONFIG_HOTPLUG_CPU=n case has been broken forever in case that a bringup callback fails. Unfortunately this issue was not recognized when the CPU hotplug code was reworked, so the shortcoming just stayed in place. When a bringup callback fails, the CPU hotplug code rolls back the operation and takes the CPU offline. The 'nosmt' command line argument uses a bringup failure to abort the bringup of SMT sibling CPUs. This partial bringup is required due to the MCE misdesign on Intel CPUs. With CONFIG_HOTPLUG_CPU=y the rollback works perfectly fine, but CONFIG_HOTPLUG_CPU=n lacks essential mechanisms to exercise the low level teardown of a CPU including the synchronizations in various facilities like RCU, NOHZ and others. As a consequence the teardown callbacks which must be executed on the outgoing CPU within stop machine with interrupts disabled are executed on the control CPU in interrupt enabled and preemptible context causing the kernel to crash and burn. The pre state machine code has a different failure mode which is more subtle and resulting in a less obvious use after free crash because the control side frees resources which are still in use by the undead CPU. But this is not a x86 only problem. Any architecture which supports the SMP=y HOTPLUG_CPU=n combination suffers from the same issue. It's just less likely to be triggered because in 99.99999% of the cases all bringup callbacks succeed. The easy solution of making HOTPLUG_CPU mandatory for SMP is not working on all architectures as the following architectures have either no hotplug support at all or not all subarchitectures support it: alpha, arc, hexagon, openrisc, riscv, sparc (32bit), mips (partial). Crashing the kernel in such a situation is not an acceptable state either. Implement a minimal rollback variant by limiting the teardown to the point where all regular teardown callbacks have been invoked and leave the CPU in the 'dead' idle state. This has the following consequences: - the CPU is brought down to the point where the stop_machine takedown would happen. - the CPU stays there forever and is idle - The CPU is cleared in the CPU active mask, but not in the CPU online mask which is a legit state. - Interrupts are not forced away from the CPU - All facilities which only look at online mask would still see it, but that is the case during normal hotplug/unplug operations as well. It's just a (way) longer time frame. This will expose issues, which haven't been exposed before or only seldom, because now the normally transient state of being non active but online is a permanent state. In testing this exposed already an issue vs. work queues where the vmstat code schedules work on the almost dead CPU which ends up in an unbound workqueue and triggers 'preemtible context' warnings. This is not a problem of this change, it merily exposes an already existing issue. Still this is better than crashing fully without a chance to debug it. This is mainly thought as workaround for those architectures which do not support HOTPLUG_CPU. All others should enforce HOTPLUG_CPU for SMP. Fixes: 2e1a3483ce74 ("cpu/hotplug: Split out the state walk into functions") Reported-by: Tianyu Lan Signed-off-by: Thomas Gleixner Tested-by: Tianyu Lan Acked-by: Greg Kroah-Hartman Cc: Konrad Wilk Cc: Josh Poimboeuf Cc: Mukesh Ojha Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Rik van Riel Cc: Andy Lutomirski Cc: Micheal Kelley Cc: "K. Y. Srinivasan" Cc: Linus Torvalds Cc: Borislav Petkov Cc: K. Y. Srinivasan Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190326163811.503390616@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/cpu.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 0171754db32b..32f0432f0c26 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -538,6 +538,20 @@ static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st) } } +static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st) +{ + if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return true; + /* + * When CPU hotplug is disabled, then taking the CPU down is not + * possible because takedown_cpu() and the architecture and + * subsystem specific mechanisms are not available. So the CPU + * which would be completely unplugged again needs to stay around + * in the current state. + */ + return st->state <= CPUHP_BRINGUP_CPU; +} + static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { @@ -548,8 +562,10 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, st->state++; ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); if (ret) { - st->target = prev_state; - undo_cpu_up(cpu, st); + if (can_rollback_cpu(st)) { + st->target = prev_state; + undo_cpu_up(cpu, st); + } break; } } From a6c42842c655ccb00ab71ee771396a0fbf09a71e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 Mar 2019 17:36:06 +0100 Subject: [PATCH 2603/2608] x86/smp: Enforce CONFIG_HOTPLUG_CPU when SMP=y commit bebd024e4815b1a170fcd21ead9c2222b23ce9e6 upstream. The SMT disable 'nosmt' command line argument is not working properly when CONFIG_HOTPLUG_CPU is disabled. The teardown of the sibling CPUs which are required to be brought up due to the MCE issues, cannot work. The CPUs are then kept in a half dead state. As the 'nosmt' functionality has become popular due to the speculative hardware vulnerabilities, the half torn down state is not a proper solution to the problem. Enforce CONFIG_HOTPLUG_CPU=y when SMP is enabled so the full operation is possible. Reported-by: Tianyu Lan Signed-off-by: Thomas Gleixner Acked-by: Greg Kroah-Hartman Cc: Konrad Wilk Cc: Josh Poimboeuf Cc: Mukesh Ojha Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Rik van Riel Cc: Andy Lutomirski Cc: Micheal Kelley Cc: "K. Y. Srinivasan" Cc: Linus Torvalds Cc: Borislav Petkov Cc: K. Y. Srinivasan Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190326163811.598166056@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4f393eb9745f..8fec1585ac7a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2139,14 +2139,8 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING If unsure, leave at the default value. config HOTPLUG_CPU - bool "Support for hot-pluggable CPUs" + def_bool y depends on SMP - ---help--- - Say Y here to allow turning CPUs off and on. CPUs can be - controlled through /sys/devices/system/cpu. - ( Note: power management support will enable this option - automatically on SMP systems. ) - Say N if you want to disable CPU hotplug. config BOOTPARAM_HOTPLUG_CPU0 bool "Set default setting of cpu0_hotpluggable" From 9badc8549fa75b3f1b5d74eea9d22c60a525b242 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 15 Feb 2019 12:48:39 -0800 Subject: [PATCH 2604/2608] KVM: Reject device ioctls from processes other than the VM's creator commit ddba91801aeb5c160b660caed1800eb3aef403f8 upstream. KVM's API requires thats ioctls must be issued from the same process that created the VM. In other words, userspace can play games with a VM's file descriptors, e.g. fork(), SCM_RIGHTS, etc..., but only the creator can do anything useful. Explicitly reject device ioctls that are issued by a process other than the VM's creator, and update KVM's API documentation to extend its requirements to device ioctls. Fixes: 852b6d57dc7f ("kvm: add device control API") Cc: Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- Documentation/virtual/kvm/api.txt | 16 +++++++++++----- virt/kvm/kvm_main.c | 3 +++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 5d12166bd66b..f67ed33d1054 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -13,7 +13,7 @@ of a virtual machine. The ioctls belong to three classes - VM ioctls: These query and set attributes that affect an entire virtual machine, for example memory layout. In addition a VM ioctl is used to - create virtual cpus (vcpus). + create virtual cpus (vcpus) and devices. Only run VM ioctls from the same process (address space) that was used to create the VM. @@ -24,6 +24,11 @@ of a virtual machine. The ioctls belong to three classes Only run vcpu ioctls from the same thread that was used to create the vcpu. + - device ioctls: These query and set attributes that control the operation + of a single device. + + device ioctls must be issued from the same process (address space) that + was used to create the VM. 2. File descriptors ------------------- @@ -32,10 +37,11 @@ The kvm API is centered around file descriptors. An initial open("/dev/kvm") obtains a handle to the kvm subsystem; this handle can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this handle will create a VM file descriptor which can be used to issue VM -ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu -and return a file descriptor pointing to it. Finally, ioctls on a vcpu -fd can be used to control the vcpu, including the important task of -actually running guest code. +ioctls. A KVM_CREATE_VCPU or KVM_CREATE_DEVICE ioctl on a VM fd will +create a virtual cpu or device and return a file descriptor pointing to +the new resource. Finally, ioctls on a vcpu or device fd can be used +to control the vcpu or device. For vcpus, this includes the important +task of actually running guest code. In general file descriptors can be migrated among processes by means of fork() and the SCM_RIGHTS facility of unix domain socket. These diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 66cc315efa6d..a373c60ef1c0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2812,6 +2812,9 @@ static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, { struct kvm_device *dev = filp->private_data; + if (dev->kvm->mm != current->mm) + return -EIO; + switch (ioctl) { case KVM_SET_DEVICE_ATTR: return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); From 0c60bc18506e1157624107fbab1d7883f894fc72 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 7 Mar 2019 15:43:02 -0800 Subject: [PATCH 2605/2608] KVM: x86: Emulate MSR_IA32_ARCH_CAPABILITIES on AMD hosts commit 0cf9135b773bf32fba9dd8e6699c1b331ee4b749 upstream. The CPUID flag ARCH_CAPABILITIES is unconditioinally exposed to host userspace for all x86 hosts, i.e. KVM advertises ARCH_CAPABILITIES regardless of hardware support under the pretense that KVM fully emulates MSR_IA32_ARCH_CAPABILITIES. Unfortunately, only VMX hosts handle accesses to MSR_IA32_ARCH_CAPABILITIES (despite KVM_GET_MSRS also reporting MSR_IA32_ARCH_CAPABILITIES for all hosts). Move the MSR_IA32_ARCH_CAPABILITIES handling to common x86 code so that it's emulated on AMD hosts. Fixes: 1eaafe91a0df4 ("kvm: x86: IA32_ARCH_CAPABILITIES is always supported") Cc: stable@vger.kernel.org Reported-by: Xiaoyao Li Cc: Jim Mattson Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 14 -------------- arch/x86/kvm/x86.c | 12 ++++++++++++ 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d2ae93faafe8..f9a4b85d7309 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -509,6 +509,7 @@ struct kvm_vcpu_arch { bool tpr_access_reporting; u64 ia32_xss; u64 microcode_version; + u64 arch_capabilities; /* * Paging state of the vcpu diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 229d5e39f5c0..4bd878c9f7d2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -740,7 +740,6 @@ struct vcpu_vmx { u64 msr_guest_kernel_gs_base; #endif - u64 arch_capabilities; u64 spec_ctrl; u32 vm_entry_controls_shadow; @@ -3493,12 +3492,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = to_vmx(vcpu)->spec_ctrl; break; - case MSR_IA32_ARCH_CAPABILITIES: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) - return 1; - msr_info->data = to_vmx(vcpu)->arch_capabilities; - break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); break; @@ -3663,11 +3656,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W); break; - case MSR_IA32_ARCH_CAPABILITIES: - if (!msr_info->host_initiated) - return 1; - vmx->arch_capabilities = data; - break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -5929,8 +5917,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } - vmx->arch_capabilities = kvm_get_arch_capabilities(); - vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); /* 22.2.1, 20.8.1 */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ce5b3dc348ce..5f85f17ffb75 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2234,6 +2234,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (msr_info->host_initiated) vcpu->arch.microcode_version = data; break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated) + return 1; + vcpu->arch.arch_capabilities = data; + break; case MSR_EFER: return set_efer(vcpu, data); case MSR_K7_HWCR: @@ -2523,6 +2528,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_UCODE_REV: msr_info->data = vcpu->arch.microcode_version; break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) + return 1; + msr_info->data = vcpu->arch.arch_capabilities; + break; case MSR_MTRRcap: case 0x200 ... 0x2ff: return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data); @@ -7918,6 +7929,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { int r; + vcpu->arch.arch_capabilities = kvm_get_arch_capabilities(); kvm_vcpu_mtrr_init(vcpu); r = vcpu_load(vcpu); if (r) From 1cb3e7f166b5c3efea42d2576eb2d4625a6c81ac Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Dec 2017 11:59:39 +0100 Subject: [PATCH 2606/2608] Revert "USB: core: only clean up what we allocated" commit cf4df407e0d7cde60a45369c2a3414d18e2d4fdd upstream. This reverts commit 32fd87b3bbf5f7a045546401dfe2894dbbf4d8c3. Alan wrote a better fix for this... Cc: Andrey Konovalov Cc: stable Cc: Nathan Chancellor Cc: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 1a6ccdd5a5fc..bd749e78df59 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -768,21 +768,18 @@ void usb_destroy_configuration(struct usb_device *dev) return; if (dev->rawdescriptors) { - for (i = 0; i < dev->descriptor.bNumConfigurations && - i < USB_MAXCONFIG; i++) + for (i = 0; i < dev->descriptor.bNumConfigurations; i++) kfree(dev->rawdescriptors[i]); kfree(dev->rawdescriptors); dev->rawdescriptors = NULL; } - for (c = 0; c < dev->descriptor.bNumConfigurations && - c < USB_MAXCONFIG; c++) { + for (c = 0; c < dev->descriptor.bNumConfigurations; c++) { struct usb_host_config *cf = &dev->config[c]; kfree(cf->string); - for (i = 0; i < cf->desc.bNumInterfaces && - i < USB_MAXINTERFACES; i++) { + for (i = 0; i < cf->desc.bNumInterfaces; i++) { if (cf->intf_cache[i]) kref_put(&cf->intf_cache[i]->ref, usb_release_interface_cache); From 2cca5be88ac06622da6c6aaa8a2dc7d0ed28c25e Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 11 Mar 2019 10:59:53 +0100 Subject: [PATCH 2607/2608] vfio: ccw: only free cp on final interrupt commit 50b7f1b7236bab08ebbbecf90521e84b068d7a17 upstream. When we get an interrupt for a channel program, it is not necessarily the final interrupt; for example, the issuing guest may request an intermediate interrupt by specifying the program-controlled-interrupt flag on a ccw. We must not switch the state to idle if the interrupt is not yet final; even more importantly, we must not free the translated channel program if the interrupt is not yet final, or the host can crash during cp rewind. Fixes: e5f84dbaea59 ("vfio: ccw: return I/O results asynchronously") Cc: stable@vger.kernel.org # v4.12+ Reviewed-by: Eric Farman Signed-off-by: Cornelia Huck Signed-off-by: Greg Kroah-Hartman --- drivers/s390/cio/vfio_ccw_drv.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index ae7a49ade414..d22759eb6640 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -70,20 +70,24 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work) { struct vfio_ccw_private *private; struct irb *irb; + bool is_final; private = container_of(work, struct vfio_ccw_private, io_work); irb = &private->irb; + is_final = !(scsw_actl(&irb->scsw) & + (SCSW_ACTL_DEVACT | SCSW_ACTL_SCHACT)); if (scsw_is_solicited(&irb->scsw)) { cp_update_scsw(&private->cp, &irb->scsw); - cp_free(&private->cp); + if (is_final) + cp_free(&private->cp); } memcpy(private->io_region.irb_area, irb, sizeof(*irb)); if (private->io_trigger) eventfd_signal(private->io_trigger, 1); - if (private->mdev) + if (private->mdev && is_final) private->state = VFIO_CCW_STATE_IDLE; } From 80bf6c64d5cca26b4d2a94e38cbd89c2615a25c3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Apr 2019 06:25:21 +0200 Subject: [PATCH 2608/2608] Linux 4.14.110 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e02bced59a57..37bd0b40876d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 109 +SUBLEVEL = 110 EXTRAVERSION = NAME = Petit Gorille